001 (ns beowulf.read
002 "This provides the reader required for boostrapping. It's not a bad
003 reader - it provides feedback on errors found in the input - but it isn't
004 the real Lisp reader.
005
006 Intended deviations from the behaviour of the real Lisp reader are as follows:
007
008 1. It reads the meta-expression language `MEXPR` in addition to the
009 symbolic expression language `SEXPR`, which I do not believe the Lisp 1.5
010 reader ever did;
011 2. It treats everything between a semi-colon and an end of line as a comment,
012 as most modern Lisps do; but I do not believe Lisp 1.5 had this feature.
013
014 Both these extensions can be disabled by using the `--strict` command line
015 switch."
016 (:require [beowulf.bootstrap :refer [*options*]]
017 [clojure.math.numeric-tower :refer [expt]]
018 [clojure.string :refer [starts-with? upper-case]]
019 [instaparse.core :as i]
020 [beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]]))
021
022 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
023 ;;;
024 ;;; This file provides the reader required for boostrapping. It's not a bad
025 ;;; reader - it provides feedback on errors found in the input - but it isn't
026 ;;; the real Lisp reader.
027 ;;;
028 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
029
030 (declare generate)
031
032 (def parse
033 "Parse a string presented as argument into a parse tree which can then
034 be operated upon further."
035 (i/parser
036 (str
037 ;; top level: we accept mexprs as well as sexprs.
038 "expr := mexpr | sexpr;"
039
040 ;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
041 ;; but it's a convenience.
042 "mexpr := λexpr | fncall | defn | cond | mvar | mexpr comment;
043 λexpr := λ lsqb bindings semi-colon body rsqb;
044 λ := 'λ';
045 bindings := lsqb args rsqb;
046 body := (expr semi-colon opt-space)* expr;
047 fncall := fn-name lsqb args rsqb;
048 lsqb := '[';
049 rsqb := ']';
050 defn := mexpr opt-space '=' opt-space mexpr;
051 cond := lsqb (cond-clause semi-colon opt-space)* cond-clause rsqb;
052 cond-clause := expr opt-space arrow opt-space expr;
053 arrow := '->';
054 args := (expr semi-colon opt-space)* expr;
055 fn-name := mvar;
056 mvar := #'[a-z]+';
057 semi-colon := ';';"
058
059 ;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
060 "comment := opt-space <';;'> #'[^\\n\\r]*';"
061
062 ;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
063 ;; but I've included it on the basis that it can do little harm.
064 "sexpr := quoted-expr | atom | number | dotted-pair | list | sexpr comment;
065 list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
066 dotted-pair := lpar dot-terminal ;
067 dot := '.';
068 lpar := '(';
069 rpar := ')';
070 quoted-expr := quote sexpr;
071 quote := '\\'';
072 dot-terminal := sexpr space dot space sexpr rpar;
073 space := #'\\p{javaWhitespace}+';
074 opt-space := #'\\p{javaWhitespace}*';
075 sep := ',' | opt-space;
076 atom := #'[A-Z][A-Z0-9]*';"
077
078 ;; Lisp 1.5 supported octal as well as decimal and scientific notation
079 "number := integer | decimal | scientific | octal;
080 integer := #'-?[1-9][0-9]*';
081 decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0.[0-9]*';
082 scientific := coefficient e exponent;
083 coefficient := decimal;
084 exponent := integer;
085 e := 'E';
086 octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
087 q := 'Q';
088 scale-factor := #'[0-9]*'")))
089
090 (defn simplify
091 "Simplify this parse tree `p`. If `p` is an instaparse failure object, throw
092 an `ex-info`, with `p` as the value of its `:failure` key."
093 ([p]
094 (if
095 (instance? instaparse.gll.Failure p)
096 (throw (ex-info "Ic ne behæfd" {:cause :parse-failure :failure p}))
097 (simplify p :sexpr)))
098 ([p context]
099 (if
100 (coll? p)
101 (apply
102 vector
103 (remove
104 #(if (coll? %) (empty? %))
105 (case (first p)
106 (:arg :expr :coefficient :fn-name :number :sexpr) (simplify (second p) context)
107 (:λexpr
108 :args :bindings :body :cond :cond-clause :dot-terminal
109 :fncall :octal :quoted-expr :scientific) (map #(simplify % context) p)
110 (:arrow :dot :e :lpar :lsqb :opt-space :q :quote :rpar :rsqb
111 :semi-colon :sep :space) nil
112 :atom (if
113 (= context :mexpr)
114 [:quoted-expr p]
115 p)
116 :comment (if
117 (:strict *options*)
118 (throw
119 (ex-info "Cannot parse comments in strict mode"
120 {:cause :strict})))
121 :dotted-pair (if
122 (= context :mexpr)
123 [:fncall
124 [:mvar "cons"]
125 [:args
126 (simplify (nth p 1) context)
127 (simplify (nth p 2) context)]]
128 (map simplify p))
129 :mexpr (if
130 (:strict *options*)
131 (throw
132 (ex-info "Cannot parse meta expressions in strict mode"
133 {:cause :strict}))
134 (simplify (second p) :mexpr))
135 :list (if
136 (= context :mexpr)
137 [:fncall
138 [:mvar "list"]
139 [:args (apply vector (map simplify (rest p)))]]
140 (map #(simplify % context) p))
141 ;;default
142 p)))
143 p)))
144
145
146 ;; # From Lisp 1.5 Programmers Manual, page 10
147 ;; Note that I've retyped much of this, since copy/pasting out of PDF is less
148 ;; than reliable. Any typos are mine. Quote starts [[
149
150 ;; We are now in a position to define the universal LISP function
151 ;; evalquote[fn;args], When evalquote is given a function and a list of arguments
152 ;; for that function, it computes the value of the function applied to the arguments.
153 ;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
154 ;; of the function evalquote must be an S-expression. Since we have been
155 ;; writing functions as M-expressions, it is necessary to translate them into
156 ;; S-expressions.
157
158 ;; The following rules define a method of translating functions written in the
159 ;; meta-language into S-expressions.
160 ;; 1. If the function is represented by its name, it is translated by changing
161 ;; all of the letters to upper case, making it an atomic symbol. Thus is
162 ;; translated to CAR.
163 ;; 2. If the function uses the lambda notation, then the expression
164 ;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
165 ;; of ε.
166 ;; 3. If the function begins with label, then the translation of
167 ;; label[α;ε] is (LABEL α* ε*).
168
169 ;; Forms are translated as follows:
170 ;; 1. A variable, like a function name, is translated by using uppercase letters.
171 ;; Thus the translation of varl is VAR1.
172 ;; 2. The obvious translation of letting a constant translate into itself will not
173 ;; work. Since the translation of x is X, the translation of X must be something
174 ;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
175 ;; into (QUOTE X).
176 ;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
177 ;; 4. The conditional expression [pl-el;...;pn-en] is translated into
178 ;; (COND (p1* e1*)...(pn* en*))
179
180 ;; ## Examples
181
182 ;; M-expressions S-expressions
183 ;; x X
184 ;; car CAR
185 ;; car[x] (CAR X)
186 ;; T (QUOTE T)
187 ;; ff[car [x]] (FF (CAR X))
188 ;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
189 ;; ((QUOTE T)(FF (CAR X))))
190 ;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
191 ;; ((ATOM X) X)
192 ;; ((QUOTE T)(FF (CAR X))))))
193
194 ;; ]] quote ends
195
196 (defn gen-cond-clause
197 "Generate a cond clause from this simplified parse tree fragment `p`;
198 returns `nil` if `p` does not represent a cond clause."
199 [p]
200 (if
201 (and (coll? p)(= :cond-clause (first p)))
202 (make-beowulf-list
203 (list (generate (nth p 1))
204 (generate (nth p 2))))))
205
206 (defn gen-cond
207 "Generate a cond statement from this simplified parse tree fragment `p`;
208 returns `nil` if `p` does not represent a (MEXPR) cond statement."
209 [p]
210 (if
211 (and (coll? p)(= :cond (first p)))
212 (make-beowulf-list
213 (cons
214 'COND
215 (map
216 gen-cond-clause
217 (rest p))))))
218
219 (defn gen-fn-call
220 "Generate a function call from this simplified parse tree fragment `p`;
221 returns `nil` if `p` does not represent a (MEXPR) function call."
222 [p]
223 (if
224 (and (coll? p)(= :fncall (first p))(= :mvar (first (second p))))
225 (make-cons-cell
226 (generate (second p))
227 (generate (nth p 2)))))
228
229
230 (defn gen-dot-terminated-list
231 "Generate a list, which may be dot-terminated, from this partial parse tree
232 'p'. Note that the function acts recursively and progressively decapitates
233 its argument, so that the argument will not always be a valid parse tree."
234 [p]
235 (cond
236 (empty? p)
237 NIL
238 (and (coll? (first p)) (= :dot-terminal (first (first p))))
239 (let [dt (first p)]
240 (make-cons-cell
241 (generate (nth dt 1))
242 (generate (nth dt 2))))
243 :else
244 (make-cons-cell
245 (generate (first p))
246 (gen-dot-terminated-list (rest p)))))
247
248
249 (defn strip-leading-zeros
250 "`read-string` interprets strings with leading zeros as octal; strip
251 any from this string `s`. If what's left is empty (i.e. there were
252 only zeros, return `\"0\"`."
253 ([s]
254 (strip-leading-zeros s ""))
255 ([s prefix]
256 (if
257 (empty? s) "0"
258 (case (first s)
259 (\+ \-)(strip-leading-zeros (subs s 1) (str (first s) prefix))
260 "0" (strip-leading-zeros (subs s 1) prefix)
261 (str prefix s)))))
262
263 (defn generate
264 "Generate lisp structure from this parse tree `p`. It is assumed that
265 `p` has been simplified."
266 [p]
267 (if
268 (coll? p)
269 (case (first p)
270 :λ "LAMBDA"
271 :λexpr (make-cons-cell
272 (generate (nth p 1))
273 (make-cons-cell (generate (nth p 2))
274 (generate (nth p 3))))
275 (:args :list) (gen-dot-terminated-list (rest p))
276 :atom (symbol (second p))
277 :bindings (generate (second p))
278 :body (make-beowulf-list (map generate (rest p)))
279 :cond (gen-cond p)
280 (:decimal :integer) (read-string (strip-leading-zeros (second p)))
281 :dotted-pair (make-cons-cell
282 (generate (nth p 1))
283 (generate (nth p 2)))
284 :exponent (generate (second p))
285 :fncall (gen-fn-call p)
286 :mvar (symbol (upper-case (second p)))
287 :octal (let [n (read-string (strip-leading-zeros (second p) "0"))
288 scale (generate (nth p 2))]
289 (* n (expt 8 scale)))
290
291 ;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
292 :quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
293 :scale-factor (if
294 (empty? (second p)) 0
295 (read-string (strip-leading-zeros (second p))))
296 :scientific (let [n (generate (second p))
297 exponent (generate (nth p 2))]
298 (* n (expt 10 exponent)))
299
300 ;; default
301 (throw (Exception. (str "Cannot yet generate " (first p)))))
302 p))
303
304 (defmacro gsp
305 "Shortcut macro - the internals of read; or, if you like, read-string.
306 Argument `s` should be a string representation of a valid Lisp
307 expression."
308 [s]
309 `(generate (simplify (parse ~s))))
310
311 (defn READ
312 "An implementation of a Lisp reader sufficient for bootstrapping; not necessarily
313 the final Lisp reader."
314 [input]
315 (gsp (or input (read-line))))