001 (ns beowulf.read
002 "This provides the reader required for boostrapping. It's not a bad
003 reader - it provides feedback on errors found in the input - but it isn't
004 the real Lisp reader.
005
006 Intended deviations from the behaviour of the real Lisp reader are as follows:
007
008 1. It reads the meta-expression language `MEXPR` in addition to the
009 symbolic expression language `SEXPR`, which I do not believe the Lisp 1.5
010 reader ever did;
011 2. It treats everything between a semi-colon and an end of line as a comment,
012 as most modern Lisps do; but I do not believe Lisp 1.5 had this feature."
013 (:require [beowulf.bootstrap :refer [*options*]]
014 [clojure.math.numeric-tower :refer [expt]]
015 [clojure.string :refer [starts-with? upper-case]]
016 [instaparse.core :as i]
017 [beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]]))
018
019 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
020 ;;;
021 ;;; This file provides the reader required for boostrapping. It's not a bad
022 ;;; reader - it provides feedback on errors found in the input - but it isn't
023 ;;; the real Lisp reader.
024 ;;;
025 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
026
027 (declare generate)
028
029 (def parse
030 "Parse a string presented as argument into a parse tree which can then
031 be operated upon further."
032 (i/parser
033 (str
034 ;; top level: we accept mexprs as well as sexprs.
035 "expr := mexpr | sexpr;"
036
037 ;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
038 ;; but it's a convenience.
039 "mexpr := λexpr | fncall | defn | cond | mvar | mexpr comment;
040 λexpr := λ lsqb bindings semi-colon body rsqb;
041 λ := 'λ';
042 bindings := lsqb args rsqb;
043 body := (expr semi-colon opt-space)* expr;
044 fncall := fn-name lsqb args rsqb;
045 lsqb := '[';
046 rsqb := ']';
047 defn := mexpr opt-space '=' opt-space mexpr;
048 cond := lsqb (cond-clause semi-colon opt-space)* cond-clause rsqb;
049 cond-clause := expr opt-space arrow opt-space expr;
050 arrow := '->';
051 args := (expr semi-colon opt-space)* expr;
052 fn-name := mvar;
053 mvar := #'[a-z]+';
054 semi-colon := ';';"
055
056 ;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
057 "comment := opt-space <';;'> #'[^\\n\\r]*';"
058
059 ;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
060 ;; but I've included it on the basis that it can do little harm.
061 "sexpr := quoted-expr | atom | number | dotted-pair | list | sexpr comment;
062 list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
063 dotted-pair := lpar dot-terminal ;
064 dot := '.';
065 lpar := '(';
066 rpar := ')';
067 quoted-expr := quote sexpr;
068 quote := '\\'';
069 dot-terminal := sexpr space dot space sexpr rpar;
070 space := #'\\p{javaWhitespace}+';
071 opt-space := #'\\p{javaWhitespace}*';
072 sep := ',' | opt-space;
073 atom := #'[A-Z][A-Z0-9]*';"
074
075 ;; Lisp 1.5 supported octal as well as decimal and scientific notation
076 "number := integer | decimal | scientific | octal;
077 integer := #'-?[1-9][0-9]*';
078 decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0.[0-9]*';
079 scientific := coefficient e exponent;
080 coefficient := decimal;
081 exponent := integer;
082 e := 'E';
083 octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
084 q := 'Q';
085 scale-factor := #'[0-9]*'")))
086
087 (defn simplify
088 "Simplify this parse tree `p`. If `p` is an instaparse failure object, throw
089 an `ex-info`, with `p` as the value of its `:failure` key."
090 ([p]
091 (if
092 (instance? instaparse.gll.Failure p)
093 (throw (ex-info "Ic ne behæfd" {:cause :parse-failure :failure p}))
094 (simplify p :sexpr)))
095 ([p context]
096 (if
097 (coll? p)
098 (apply
099 vector
100 (remove
101 #(if (coll? %) (empty? %))
102 (case (first p)
103 (:arg :expr :coefficient :fn-name :number :sexpr) (simplify (second p) context)
104 (:λexpr
105 :args :bindings :body :cond :cond-clause :dot-terminal
106 :fncall :octal :quoted-expr :scientific) (map #(simplify % context) p)
107 (:arrow :dot :e :lpar :lsqb :opt-space :q :quote :rpar :rsqb
108 :semi-colon :sep :space) nil
109 :atom (if
110 (= context :mexpr)
111 [:quoted-expr p]
112 p)
113 :comment (if
114 (:strict *options*)
115 (throw
116 (ex-info "Cannot parse comments in strict mode"
117 {:cause :strict})))
118 :dotted-pair (if
119 (= context :mexpr)
120 [:fncall
121 [:mvar "cons"]
122 [:args
123 (simplify (nth p 1) context)
124 (simplify (nth p 2) context)]]
125 (map simplify p))
126 :mexpr (if
127 (:strict *options*)
128 (throw
129 (ex-info "Cannot parse meta expressions in strict mode"
130 {:cause :strict}))
131 (simplify (second p) :mexpr))
132 :list (if
133 (= context :mexpr)
134 [:fncall
135 [:mvar "list"]
136 [:args (apply vector (map simplify (rest p)))]]
137 (map #(simplify % context) p))
138 ;;default
139 p)))
140 p)))
141
142
143 ;; # From Lisp 1.5 Programmers Manual, page 10
144 ;; Note that I've retyped much of this, since copy/pasting out of PDF is less
145 ;; than reliable. Any typos are mine. Quote starts [[
146
147 ;; We are now in a position to define the universal LISP function
148 ;; evalquote[fn;args], When evalquote is given a function and a list of arguments
149 ;; for that function, it computes the value of the function applied to the arguments.
150 ;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
151 ;; of the function evalquote must be an S-expression. Since we have been
152 ;; writing functions as M-expressions, it is necessary to translate them into
153 ;; S-expressions.
154
155 ;; The following rules define a method of translating functions written in the
156 ;; meta-language into S-expressions.
157 ;; 1. If the function is represented by its name, it is translated by changing
158 ;; all of the letters to upper case, making it an atomic symbol. Thus is
159 ;; translated to CAR.
160 ;; 2. If the function uses the lambda notation, then the expression
161 ;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
162 ;; of ε.
163 ;; 3. If the function begins with label, then the translation of
164 ;; label[α;ε] is (LABEL α* ε*).
165
166 ;; Forms are translated as follows:
167 ;; 1. A variable, like a function name, is translated by using uppercase letters.
168 ;; Thus the translation of varl is VAR1.
169 ;; 2. The obvious translation of letting a constant translate into itself will not
170 ;; work. Since the translation of x is X, the translation of X must be something
171 ;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
172 ;; into (QUOTE X).
173 ;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
174 ;; 4. The conditional expression [pl-el;...;pn-en] is translated into
175 ;; (COND (p1* e1*)...(pn* en*))
176
177 ;; ## Examples
178
179 ;; M-expressions S-expressions
180 ;; x X
181 ;; car CAR
182 ;; car[x] (CAR X)
183 ;; T (QUOTE T)
184 ;; ff[car [x]] (FF (CAR X))
185 ;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
186 ;; ((QUOTE T)(FF (CAR X))))
187 ;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
188 ;; ((ATOM X) X)
189 ;; ((QUOTE T)(FF (CAR X))))))
190
191 ;; ]] quote ends
192
193 (defn gen-cond-clause
194 "Generate a cond clause from this simplified parse tree fragment `p`;
195 returns `nil` if `p` does not represent a cond clause."
196 [p]
197 (if
198 (and (coll? p)(= :cond-clause (first p)))
199 (make-beowulf-list
200 (list (generate (nth p 1))
201 (generate (nth p 2))))))
202
203 (defn gen-cond
204 "Generate a cond statement from this simplified parse tree fragment `p`;
205 returns `nil` if `p` does not represent a (MEXPR) cond statement."
206 [p]
207 (if
208 (and (coll? p)(= :cond (first p)))
209 (make-beowulf-list
210 (cons
211 'COND
212 (map
213 gen-cond-clause
214 (rest p))))))
215
216 (defn gen-fn-call
217 "Generate a function call from this simplified parse tree fragment `p`;
218 returns `nil` if `p` does not represent a (MEXPR) function call.
219 TODO: I'm not yet certain but it appears that args in mexprs are
220 implicitly quoted; this function does not (yet) do that."
221 [p]
222 (if
223 (and (coll? p)(= :fncall (first p))(= :mvar (first (second p))))
224 (make-cons-cell
225 (generate (second p))
226 (generate (nth p 2)))))
227
228
229 (defn gen-dot-terminated-list
230 "Generate a list, which may be dot-terminated, from this partial parse tree
231 'p'. Note that the function acts recursively and progressively decapitates
232 its argument, so that the argument will not always be a valid parse tree."
233 [p]
234 (cond
235 (empty? p)
236 NIL
237 (and (coll? (first p)) (= :dot-terminal (first (first p))))
238 (let [dt (first p)]
239 (make-cons-cell
240 (generate (nth dt 1))
241 (generate (nth dt 2))))
242 :else
243 (make-cons-cell
244 (generate (first p))
245 (gen-dot-terminated-list (rest p)))))
246
247
248 (defn strip-leading-zeros
249 "`read-string` interprets strings with leading zeros as octal; strip
250 any from this string `s`. If what's left is empty (i.e. there were
251 only zeros, return `\"0\"`."
252 ([s]
253 (strip-leading-zeros s ""))
254 ([s prefix]
255 (if
256 (empty? s) "0"
257 (case (first s)
258 (\+ \-)(strip-leading-zeros (subs s 1) (str (first s) prefix))
259 "0" (strip-leading-zeros (subs s 1) prefix)
260 (str prefix s)))))
261
262 (defn generate
263 "Generate lisp structure from this parse tree `p`. It is assumed that
264 `p` has been simplified."
265 [p]
266 (if
267 (coll? p)
268 (case (first p)
269 :λ "LAMBDA"
270 :λexpr (make-cons-cell
271 (generate (nth p 1))
272 (make-cons-cell (generate (nth p 2))
273 (generate (nth p 3))))
274 (:args :list) (gen-dot-terminated-list (rest p))
275 :atom (symbol (second p))
276 :bindings (generate (second p))
277 :body (make-beowulf-list (map generate (rest p)))
278 :cond (gen-cond p)
279 (:decimal :integer) (read-string (strip-leading-zeros (second p)))
280 :dotted-pair (make-cons-cell
281 (generate (nth p 1))
282 (generate (nth p 2)))
283 :exponent (generate (second p))
284 :fncall (gen-fn-call p)
285 :mvar (symbol (upper-case (second p)))
286 :octal (let [n (read-string (strip-leading-zeros (second p) "0"))
287 scale (generate (nth p 2))]
288 (* n (expt 8 scale)))
289
290 ;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
291 :quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
292 :scale-factor (if
293 (empty? (second p)) 0
294 (read-string (strip-leading-zeros (second p))))
295 :scientific (let [n (generate (second p))
296 exponent (generate (nth p 2))]
297 (* n (expt 10 exponent)))
298
299 ;; default
300 (throw (Exception. (str "Cannot yet generate " (first p)))))
301 p))
302
303 (defmacro gsp
304 "Shortcut macro - the internals of read; or, if you like, read-string.
305 Argument `s` should be a string representation of a valid Lisp
306 expression."
307 [s]
308 `(generate (simplify (parse ~s))))
309
310 (defn READ
311 [input]
312 (gsp (or input (read-line))))