001 (ns beowulf.read
002 (:require [beowulf.bootstrap :refer [*options*]]
003 [clojure.math.numeric-tower :refer [expt]]
004 [clojure.string :refer [starts-with? upper-case]]
005 [instaparse.core :as i]
006 [beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]]))
007
008 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
009 ;;;
010 ;;; This file provides the reader required for boostrapping. It's not a bad
011 ;;; reader - it provides feedback on errors found in the input - but it isn't
012 ;;; the real Lisp reader.
013 ;;;
014 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
015
016 (declare generate)
017
018 (def parse
019 "Parse a string presented as argument into a parse tree which can then
020 be operated upon further."
021 (i/parser
022 (str
023 ;; top level: we accept mexprs as well as sexprs.
024 "expr := mexpr | sexpr;"
025
026 ;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
027 ;; but it's a convenience.
028 "mexpr := λexpr | fncall | defn | cond | mvar | mexpr comment;
029 λexpr := λ lsqb bindings semi-colon body rsqb;
030 λ := 'λ';
031 bindings := lsqb args rsqb;
032 body := (expr semi-colon opt-space)* expr;
033 fncall := fn-name lsqb args rsqb;
034 lsqb := '[';
035 rsqb := ']';
036 defn := mexpr opt-space '=' opt-space mexpr;
037 cond := lsqb (cond-clause semi-colon opt-space)* cond-clause rsqb;
038 cond-clause := expr opt-space arrow opt-space expr;
039 arrow := '->';
040 args := (expr semi-colon opt-space)* expr;
041 fn-name := mvar;
042 mvar := #'[a-z]+';
043 semi-colon := ';';"
044
045 ;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
046 "comment := opt-space <';;'> #'[^\\n\\r]*';"
047
048 ;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
049 ;; but I've included it on the basis that it can do little harm.
050 "sexpr := quoted-expr | atom | number | dotted-pair | list | sexpr comment;
051 list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
052 dotted-pair := lpar dot-terminal ;
053 dot := '.';
054 lpar := '(';
055 rpar := ')';
056 quoted-expr := quote sexpr;
057 quote := '\\'';
058 dot-terminal := sexpr space dot space sexpr rpar;
059 space := #'\\p{javaWhitespace}+';
060 opt-space := #'\\p{javaWhitespace}*';
061 sep := ',' | opt-space;
062 atom := #'[A-Z][A-Z0-9]*';"
063
064 ;; Lisp 1.5 supported octal as well as decimal and scientific notation
065 "number := integer | decimal | scientific | octal;
066 integer := #'-?[1-9][0-9]*';
067 decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0.[0-9]*';
068 scientific := coefficient e exponent;
069 coefficient := decimal;
070 exponent := integer;
071 e := 'E';
072 octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
073 q := 'Q';
074 scale-factor := #'[0-9]*'")))
075
076 (defn simplify
077 "Simplify this parse tree `p`. If `p` is an instaparse failure object, throw
078 an `ex-info`, with `p` as the value of its `:failure` key."
079 ([p]
080 (if
081 (instance? instaparse.gll.Failure p)
082 (throw (ex-info "Ic ne behæfd" {:cause :parse-failure :failure p}))
083 (simplify p :sexpr)))
084 ([p context]
085 (if
086 (coll? p)
087 (apply
088 vector
089 (remove
090 #(if (coll? %) (empty? %))
091 (case (first p)
092 (:arg :expr :coefficient :fn-name :number :sexpr) (simplify (second p) context)
093 (:λexpr
094 :args :bindings :body :cond :cond-clause :dot-terminal
095 :fncall :octal :quoted-expr :scientific) (map #(simplify % context) p)
096 (:arrow :dot :e :lpar :lsqb :opt-space :q :quote :rpar :rsqb
097 :semi-colon :sep :space) nil
098 :atom (if
099 (= context :mexpr)
100 [:quoted-expr p]
101 p)
102 :comment (if
103 (:strict *options*)
104 (throw
105 (ex-info "Cannot parse comments in strict mode"
106 {:cause :strict})))
107 :dotted-pair (if
108 (= context :mexpr)
109 [:fncall
110 [:mvar "cons"]
111 [:args
112 (simplify (nth p 1) context)
113 (simplify (nth p 2) context)]]
114 (map simplify p))
115 :mexpr (if
116 (:strict *options*)
117 (throw
118 (ex-info "Cannot parse meta expressions in strict mode"
119 {:cause :strict}))
120 (simplify (second p) :mexpr))
121 :list (if
122 (= context :mexpr)
123 [:fncall
124 [:mvar "list"]
125 [:args (apply vector (map simplify (rest p)))]]
126 (map #(simplify % context) p))
127 ;;default
128 p)))
129 p)))
130
131
132 ;; # From Lisp 1.5 Programmers Manual, page 10
133 ;; Note that I've retyped much of this, since copy/pasting out of PDF is less
134 ;; than reliable. Any typos are mine. Quote starts [[
135
136 ;; We are now in a position to define the universal LISP function
137 ;; evalquote[fn;args], When evalquote is given a function and a list of arguments
138 ;; for that function, it computes the value of the function applied to the arguments.
139 ;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
140 ;; of the function evalquote must be an S-expression. Since we have been
141 ;; writing functions as M-expressions, it is necessary to translate them into
142 ;; S-expressions.
143
144 ;; The following rules define a method of translating functions written in the
145 ;; meta-language into S-expressions.
146 ;; 1. If the function is represented by its name, it is translated by changing
147 ;; all of the letters to upper case, making it an atomic symbol. Thus is
148 ;; translated to CAR.
149 ;; 2. If the function uses the lambda notation, then the expression
150 ;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
151 ;; of ε.
152 ;; 3. If the function begins with label, then the translation of
153 ;; label[α;ε] is (LABEL α* ε*).
154
155 ;; Forms are translated as follows:
156 ;; 1. A variable, like a function name, is translated by using uppercase letters.
157 ;; Thus the translation of varl is VAR1.
158 ;; 2. The obvious translation of letting a constant translate into itself will not
159 ;; work. Since the translation of x is X, the translation of X must be something
160 ;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
161 ;; into (QUOTE X).
162 ;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
163 ;; 4. The conditional expression [pl-el;...;pn-en] is translated into
164 ;; (COND (p1* e1*)...(pn* en*))
165
166 ;; ## Examples
167
168 ;; M-expressions S-expressions
169 ;; x X
170 ;; car CAR
171 ;; car[x] (CAR X)
172 ;; T (QUOTE T)
173 ;; ff[car [x]] (FF (CAR X))
174 ;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
175 ;; ((QUOTE T)(FF (CAR X))))
176 ;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
177 ;; ((ATOM X) X)
178 ;; ((QUOTE T)(FF (CAR X))))))
179
180 ;; ]] quote ends
181
182 (defn gen-cond-clause
183 "Generate a cond clause from this simplified parse tree fragment `p`;
184 returns `nil` if `p` does not represent a cond clause."
185 [p]
186 (if
187 (and (coll? p)(= :cond-clause (first p)))
188 (make-beowulf-list
189 (list (generate (nth p 1))
190 (generate (nth p 2))))))
191
192 (defn gen-cond
193 "Generate a cond statement from this simplified parse tree fragment `p`;
194 returns `nil` if `p` does not represent a (MEXPR) cond statement."
195 [p]
196 (if
197 (and (coll? p)(= :cond (first p)))
198 (make-beowulf-list
199 (cons
200 'COND
201 (map
202 gen-cond-clause
203 (rest p))))))
204
205 (defn gen-fn-call
206 "Generate a function call from this simplified parse tree fragment `p`;
207 returns `nil` if `p` does not represent a (MEXPR) function call.
208 TODO: I'm not yet certain but it appears that args in mexprs are
209 implicitly quoted; this function does not (yet) do that."
210 [p]
211 (if
212 (and (coll? p)(= :fncall (first p))(= :mvar (first (second p))))
213 (make-cons-cell
214 (generate (second p))
215 (generate (nth p 2)))))
216
217
218 (defn gen-dot-terminated-list
219 "Generate a list, which may be dot-terminated, from this partial parse tree
220 'p'. Note that the function acts recursively and progressively decapitates
221 its argument, so that the argument will not always be a valid parse tree."
222 [p]
223 (cond
224 (empty? p)
225 NIL
226 (and (coll? (first p)) (= :dot-terminal (first (first p))))
227 (let [dt (first p)]
228 (make-cons-cell
229 (generate (nth dt 1))
230 (generate (nth dt 2))))
231 :else
232 (make-cons-cell
233 (generate (first p))
234 (gen-dot-terminated-list (rest p)))))
235
236
237 (defn strip-leading-zeros
238 "`read-string` interprets strings with leading zeros as octal; strip
239 any from this string `s`. If what's left is empty (i.e. there were
240 only zeros, return `\"0\"`."
241 ([s]
242 (strip-leading-zeros s ""))
243 ([s prefix]
244 (if
245 (empty? s) "0"
246 (case (first s)
247 (\+ \-)(strip-leading-zeros (subs s 1) (str (first s) prefix))
248 "0" (strip-leading-zeros (subs s 1) prefix)
249 (str prefix s)))))
250
251 (defn generate
252 "Generate lisp structure from this parse tree `p`. It is assumed that
253 `p` has been simplified."
254 [p]
255 (if
256 (coll? p)
257 (case (first p)
258 :λ "LAMBDA"
259 :λexpr (make-cons-cell
260 (generate (nth p 1))
261 (make-cons-cell (generate (nth p 2))
262 (generate (nth p 3))))
263 (:args :list) (gen-dot-terminated-list (rest p))
264 :atom (symbol (second p))
265 :bindings (generate (second p))
266 :body (make-beowulf-list (map generate (rest p)))
267 :cond (gen-cond p)
268 (:decimal :integer) (read-string (strip-leading-zeros (second p)))
269 :dotted-pair (make-cons-cell
270 (generate (nth p 1))
271 (generate (nth p 2)))
272 :exponent (generate (second p))
273 :fncall (gen-fn-call p)
274 :mvar (symbol (upper-case (second p)))
275 :octal (let [n (read-string (strip-leading-zeros (second p) "0"))
276 scale (generate (nth p 2))]
277 (* n (expt 8 scale)))
278
279 ;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
280 :quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
281 :scale-factor (if
282 (empty? (second p)) 0
283 (read-string (strip-leading-zeros (second p))))
284 :scientific (let [n (generate (second p))
285 exponent (generate (nth p 2))]
286 (* n (expt 10 exponent)))
287
288 ;; default
289 (throw (Exception. (str "Cannot yet generate " (first p)))))
290 p))
291
292 (defmacro gsp
293 "Shortcut macro - the internals of read; or, if you like, read-string.
294 Argument `s` should be a string representation of a valid Lisp
295 expression."
296 [s]
297 `(generate (simplify (parse ~s))))
298
299 (defn READ
300 [input]
301 (gsp (or input (read-line))))