Modularised the reader; some general improvement

This commit is contained in:
Simon Brooke 2023-03-26 11:50:56 +01:00
parent 9b532d39a8
commit b5e418118b
14 changed files with 594 additions and 556 deletions

View file

@ -11,7 +11,11 @@
objects."
(:require [clojure.string :as s]
[clojure.tools.trace :refer [deftrace]]
[beowulf.cons-cell :refer [cons-cell? make-beowulf-list make-cons-cell NIL pretty-print T F]]))
[beowulf.cons-cell :refer [cons-cell? make-beowulf-list make-cons-cell
NIL pretty-print T F]]
[beowulf.host :refer [ADD1 DIFFERENCE FIXP NUMBERP PLUS2 QUOTIENT
REMAINDER RPLACA RPLACD SUB1 TIMES2]])
(:import [beowulf.cons_cell ConsCell]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
@ -57,12 +61,6 @@
[x]
`(if (or (symbol? ~x) (number? ~x)) T NIL))
(defmacro NUMBERP
"Returns `T` if and only if the argument `x` is bound to an number; else `F`.
TODO: check whether floating point numbers, rationals, etc were numbers in Lisp 1.5"
[x]
`(if (number? ~x) T F))
(defmacro CONS
"Construct a new instance of cons cell with this `car` and `cdr`."
[car cdr]
@ -75,7 +73,7 @@
(if
(= x NIL) NIL
(try
(.getCar x)
(or (.getCar x) NIL)
(catch Exception any
(throw (Exception.
(str "Cannot take CAR of `" x "` (" (.getName (.getClass x)) ")") any))))))
@ -149,9 +147,12 @@
(defn EQ
"Returns `T` if and only if both `x` and `y` are bound to the same atom,
else `F`."
else `NIL`."
[x y]
(if (and (= (ATOM x) T) (= x y)) T F))
(cond (and (instance? ConsCell x)
(.equals x y)) T
(and (= (ATOM x) T) (= x y)) T
:else NIL))
(defn EQUAL
"This is a predicate that is true if its two arguments are identical
@ -162,7 +163,7 @@
NOTE: returns `F` on failure, not `NIL`"
[x y]
(cond
(= (ATOM x) T) (EQ x y)
(= (ATOM x) T) (if (= x y) T F)
(= (EQUAL (CAR x) (CAR y)) T) (EQUAL (CDR x) (CDR y))
:else F))
@ -378,10 +379,10 @@
"Not certain whether or not this is part of LISP 1.5; adapted from PSL.
return the current value of the object list. Note that in PSL this function
returns a list of the symbols bound, not the whole association list."
[args]
(@oblist))
[]
(make-beowulf-list (map CAR @oblist)))
(deftrace DEFINE
(defn DEFINE
"Bootstrap-only version of `DEFINE` which, post boostrap, can be overwritten
in LISP.
@ -401,29 +402,49 @@
(recur (CDR cursor) a))))
(CAR args)))
(defn SET
"Implementation of SET in Clojure. Add to the `oblist` a binding of the
value of `var` to the value of `val`. NOTE WELL: this is not SETQ!"
[symbol val]
(doall
(swap!
oblist
(fn [ob s v] (make-cons-cell (make-cons-cell s v) ob))
symbol val)
NIL))
(defn APPLY
"For bootstrapping, at least, a version of APPLY written in Clojure.
All args are assumed to be symbols or `beowulf.cons-cell/ConsCell` objects.
See page 13 of the Lisp 1.5 Programmers Manual."
[function args environment]
(cond
(= NIL function) (throw (ex-info "NIL is not a function" {:context "APPLY"
:function "NIL"
:args args}))
(=
(ATOM? function)
T) (cond
;; TODO: doesn't check whether `function` is bound in the environment;
;; we'll need that before we can bootstrap.
;; (fn? (eval function)) (apply (eval function) args)
(not=
(ASSOC function environment)
NIL) (APPLY (CDR (ASSOC function environment)) args environment)
(= function 'ATOM) (if (ATOM? (CAR args)) T NIL)
(= function 'CAR) (CAAR args)
(= function 'CDR) (CDAR args)
(= function 'CONS) (make-cons-cell (CAR args) (CADR args))
(= function 'DEFINE) (DEFINE args)
(= function 'ATOM) (if (ATOM? (CAR args)) T NIL)
(= function 'EQ) (if (= (CAR args) (CADR args)) T NIL)
(= function 'EQ) (apply EQ args)
(= function 'INTEROP) (INTEROP (CAR args) (CDR args))
:else
(APPLY
(= function 'SET) (SET (CAR args) (CADR args))
(EVAL function environment)(APPLY
(EVAL function environment)
args
environment))
environment)
:else
(throw (ex-info "No function found" {:context "APPLY"
:function function
:args args})))
(fn? function) ;; i.e., it's a Clojure function
(apply function (to-clojure args))
(= (first function) 'LAMBDA) (EVAL
@ -508,8 +529,11 @@
"For bootstrapping, at least, a version of EVAL written in Clojure.
All args are assumed to be symbols or `beowulf.cons-cell/ConsCell` objects.
See page 13 of the Lisp 1.5 Programmers Manual."
[expr env]
([expr]
(EVAL expr @oblist))
([expr env]
(if
(:trace *options*)
(traced-eval expr env)
(eval-internal expr env)))
(eval-internal expr env))))

View file

@ -4,11 +4,7 @@
must have both CAR and CDR mutable, so cannot be implemented on top
of Clojure lists.")
(declare cons-cell?)
(def NIL
"The canonical empty list symbol."
(symbol "NIL"))
(declare cons-cell? NIL)
(def T
"The canonical true value."
@ -32,9 +28,13 @@
"Return the first element of this sequence.")
(getCdr
[this]
"like `more`, q.v., but returns List `NIL` not Clojure `nil` when empty." ))
"like `more`, q.v., but returns List `NIL` not Clojure `nil` when empty.")
(getUid
[this]
"Returns a unique identifier for this object")
)
(deftype ConsCell [^:unsynchronized-mutable CAR ^:unsynchronized-mutable CDR]
(deftype ConsCell [^:unsynchronized-mutable CAR ^:unsynchronized-mutable CDR uid]
;; Note that, because the CAR and CDR fields are unsynchronised mutable - i.e.
;; plain old Java instance variables which can be written as well as read -
;; ConsCells are NOT thread safe. This does not matter, since Lisp 1.5 is
@ -73,13 +73,16 @@
(str "Invalid value in RPLACD: `" value "` (" (type value) ")")
{:cause :bad-value
:detail :rplaca}))))
(getCar [this]
(. this CAR))
(getCdr [this]
(. this CDR))
(getUid [this]
(. this uid))
clojure.lang.ISeq
(cons [this x] (ConsCell. x this))
(cons [this x] (ConsCell. x this (gensym "c")))
(first [this] (.CAR this))
;; next and more must return ISeq:
;; https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/ISeq.java
@ -101,7 +104,7 @@
clojure.lang.Sequential
clojure.lang.IPersistentCollection
(empty [this] false) ;; a cons cell is by definition not empty.
(empty [this] (= this NIL)) ;; a cons cell is by definition not empty.
(equiv [this other] (if
(seq? other)
(and
@ -123,13 +126,10 @@
(count [this] (loop [cell this
result 1]
(if
(coll? (.getCdr this))
(recur (.getCdr this) (inc result))
(and (coll? (.getCdr cell)) (not= NIL (.getCdr cell)))
(recur (.getCdr cell) (inc result))
result)))
;; (if
;; (coll? (.getCdr this))
;; (inc (.count (.getCdr this)))
;; 1))
java.lang.Object
(toString [this]
(str "("
@ -137,9 +137,7 @@
(cond
(instance? ConsCell (. this CDR)) (str " " (subs (.toString (. this CDR)) 1))
(= NIL (. this CDR)) ")"
:else (str " . " (. this CDR)))))
)
:else (str " . " (. this CDR))))))
(defn- to-string
"Printing ConsCells gave me a *lot* of trouble. This is an internal function
@ -154,15 +152,18 @@
(instance? beowulf.cons_cell.ConsCell c)
(let [car (.first c)
cdr (.getCdr c)
cons? (instance? beowulf.cons_cell.ConsCell cdr)
cons? (and
(instance? beowulf.cons_cell.ConsCell cdr)
(not (nil? cdr))
(not= cdr NIL))
ss (str
s
(to-string car)
(cond
cons?
" "
(or (nil? cdr) (= cdr NIL))
")"
cons?
" "
:else
(str " . " (to-string cdr) ")")))]
(if
@ -190,13 +191,13 @@
s
(pretty-print car width n)
(cond
(or (nil? cdr) (= cdr NIL))
")"
cons?
(if
(< (+ (count indent) print-width) width)
" "
(str "\n" indent))
(or (nil? cdr) (= cdr NIL))
")"
:else
(str " . " (pretty-print cdr width n) ")")))]
(if
@ -216,11 +217,15 @@
"Construct a new instance of cons cell with this `car` and `cdr`."
[car cdr]
(try
(ConsCell. car cdr)
(ConsCell. car cdr (gensym "c"))
(catch Exception any
(throw (ex-info "Cound not construct cons cell" {:car car
:cdr cdr} any)))))
(def NIL
"The canonical empty list symbol."
'NIL)
(defn cons-cell?
"Is this object `o` a beowulf cons-cell?"
[o]
@ -238,7 +243,8 @@
(coll? (first x))
(make-beowulf-list (first x))
(first x))
(make-beowulf-list (rest x)))
(make-beowulf-list (rest x))
(gensym "c"))
:else
NIL)
(catch Exception any

View file

@ -1,7 +1,7 @@
(ns beowulf.core
"Essentially, the `-main` function and the bootstrap read-eval-print loop."
(:require [beowulf.bootstrap :refer [EVAL oblist *options*]]
[beowulf.read :refer [READ]]
[beowulf.read :refer [READ read-from-console]]
[clojure.java.io :as io]
[clojure.pprint :refer [pprint]]
[clojure.string :refer [trim]]
@ -31,7 +31,7 @@
(try
;; TODO: does not currently allow the reading of forms covering multiple
;; lines.
(let [input (trim (read-line))]
(let [input (trim (read-from-console))]
(cond
(= input stop-word) (throw (ex-info "\nFærwell!" {:cause :quit}))
input (println (str "> " (print-str (EVAL (READ input) @oblist))))
@ -41,7 +41,7 @@
e
(let [data (ex-data e)]
(println (.getMessage e))
(if
(when
data
(case (:cause data)
:parse-failure (println (:failure data))

View file

@ -13,12 +13,10 @@
Both these extensions can be disabled by using the `--strict` command line
switch."
(:require [beowulf.bootstrap :refer [*options*]]
[clojure.math.numeric-tower :refer [expt]]
[clojure.string :refer [join split starts-with? trim upper-case]]
[instaparse.core :as i]
[instaparse.failure :as f]
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]])
(:require [beowulf.reader.generate :refer [generate]]
[beowulf.reader.parser :refer [parse]]
[beowulf.reader.simplify :refer [simplify]]
[clojure.string :refer [join split starts-with? trim]])
(:import [java.io InputStream]
[instaparse.gll Failure]))
@ -30,8 +28,6 @@
;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(declare generate)
(defn strip-line-comments
"Strip blank lines and comment lines from this string `s`, expected to
be Lisp source."
@ -55,386 +51,6 @@
(range)
(split s #"\n"))))))
(def parse
"Parse a string presented as argument into a parse tree which can then
be operated upon further."
(i/parser
(str
;; we tolerate whitespace and comments around legitimate input
"raw := expr | opt-comment expr opt-comment;"
;; top level: we accept mexprs as well as sexprs.
"expr := mexpr | sexpr ;"
;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
"comment := opt-space <';;'> opt-space #'[^\\n\\r]*';"
;; there's a notation comprising a left brace followed by mexprs
;; followed by a right brace which doesn't seem to be documented
;; but I think must represent a prog(?)
;; "prog := lbrace exprs rbrace;"
;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
;; but it's a convenience.
"exprs := expr | exprs;"
"mexpr := λexpr | fncall | defn | cond | mvar | iexpr | mexpr comment;
λexpr := λ lsqb bindings semi-colon body rsqb;
λ := ';
bindings := lsqb args rsqb;
body := (expr semi-colon opt-space)* expr;
fncall := fn-name lsqb args rsqb;
lsqb := '[';
rsqb := ']';
lbrace := '{';
rbrace := '}';
defn := mexpr opt-space '=' opt-space mexpr;
cond := lsqb (opt-space cond-clause semi-colon opt-space)* cond-clause rsqb;
cond-clause := expr opt-space arrow opt-space expr opt-space;
arrow := '->';
args := (opt-space expr semi-colon opt-space)* expr;
fn-name := mvar;
mvar := #'[a-z]+';
semi-colon := ';';"
;; Infix operators appear in mexprs, e.g. on page 7. Ooops!
;; I do not know what infix operators are considered legal.
"iexpr := iexp iop iexp;
iexp := mexpr | mvar | number | mexpr | opt-space iexp opt-space;
iop := '>' | '<' | '+' | '-' | '/' | '=' ;"
;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
"opt-comment := opt-space | comment;"
"comment := opt-space <';;'> #'[^\\n\\r]*' opt-space;"
;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
;; but I've included it on the basis that it can do little harm.
"sexpr := quoted-expr | atom | number | dotted-pair | list | sexpr comment;
list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal | lbrace exprs rbrace;
list := lpar opt-space sexpr rpar | lpar opt-space (sexpr sep)* rpar | lpar opt-space (sexpr sep)* dot-terminal;
dotted-pair := lpar dot-terminal ;
dot := '.';
lpar := '(';
rpar := ')';
quoted-expr := quote sexpr;
quote := '\\'';
dot-terminal := sexpr space dot space sexpr rpar;
space := #'\\p{javaWhitespace}+';
opt-space := #'\\p{javaWhitespace}*';
sep := ',' | opt-space;
atom := #'[A-Z][A-Z0-9]*';"
;; Lisp 1.5 supported octal as well as decimal and scientific notation
"number := integer | decimal | scientific | octal;
integer := #'-?[1-9][0-9]*';
decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0.[0-9]*';
scientific := coefficient e exponent;
coefficient := decimal;
exponent := integer;
e := 'E';
octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
q := 'Q';
scale-factor := #'[0-9]*'")))
(declare simplify)
(defn simplify-second-of-two
"There are a number of possible simplifications such that if the `tree` has
only two elements, the second is semantically sufficient."
[tree context]
(if (= (count tree) 2)
(simplify (second tree) context)
tree))
(defn remove-optional-space
[tree]
(if (vector? tree)
(if (= :opt-space (first tree))
nil
(remove nil?
(map remove-optional-space tree)))
tree))
(defn remove-nesting
[tree]
(let [tree' (remove-optional-space tree)]
(if-let [key (when (and (vector? tree') (keyword? (first tree'))) (first tree'))]
(loop [r tree']
(if (and r (vector? r) (keyword? (first r)))
(if (= (first r) key)
(recur (simplify (second r) :foo))
r)
r))
tree')))
(defn simplify
"Simplify this parse tree `p`. If `p` is an instaparse failure object, throw
an `ex-info`, with `p` as the value of its `:failure` key."
([p]
(if
(instance? Failure p)
(throw (ex-info (str "Ic ne behæfd: " (f/pprint-failure p)) {:cause :parse-failure
:phase :simplify
:failure p}))
(simplify p :expr)))
([p context]
(if
(coll? p)
(apply
vector
(remove
#(when (coll? %) (empty? %))
(case (first p)
(:λexpr
:args :bindings :body :cond :cond-clause :defn :dot-terminal
:fncall :lhs :octal :quoted-expr :rhs :scientific) (map #(simplify % context) p)
(:arg :expr :coefficient :fn-name :number) (simplify (second p) context)
(:arrow :dot :e :lpar :lsqb :opt-comment :opt-space :q :quote :rpar :rsqb
:semi-colon :sep :space) nil
:atom (if
(= context :mexpr)
[:quoted-expr p]
p)
:comment (when
(:strict *options*)
(throw
(ex-info "Cannot parse comments in strict mode"
{:cause :strict})))
:dotted-pair (if
(= context :mexpr)
[:fncall
[:mvar "cons"]
[:args
(simplify (nth p 1) context)
(simplify (nth p 2) context)]]
(map simplify p))
:iexp (second (remove-nesting p))
:iexpr [:iexpr
[:lhs (simplify (second p) context)]
(simplify (nth p 2) context) ;; really should be the operator
[:rhs (simplify (nth p 3) context)]]
:mexpr (if
(:strict *options*)
(throw
(ex-info "Cannot parse meta expressions in strict mode"
{:cause :strict}))
(simplify (second p) :mexpr))
:list (if
(= context :mexpr)
[:fncall
[:mvar "list"]
[:args (apply vector (map simplify (rest p)))]]
(map #(simplify % context) p))
:raw (first (remove empty? (map simplify (rest p))))
:sexpr (simplify (second p) :sexpr)
;;default
p)))
p)))
;; # From Lisp 1.5 Programmers Manual, page 10
;; Note that I've retyped much of this, since copy/pasting out of PDF is less
;; than reliable. Any typos are mine. Quote starts [[
;; We are now in a position to define the universal LISP function
;; evalquote[fn;args], When evalquote is given a function and a list of arguments
;; for that function, it computes the value of the function applied to the arguments.
;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
;; of the function evalquote must be an S-expression. Since we have been
;; writing functions as M-expressions, it is necessary to translate them into
;; S-expressions.
;; The following rules define a method of translating functions written in the
;; meta-language into S-expressions.
;; 1. If the function is represented by its name, it is translated by changing
;; all of the letters to upper case, making it an atomic symbol. Thus is
;; translated to CAR.
;; 2. If the function uses the lambda notation, then the expression
;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
;; of ε.
;; 3. If the function begins with label, then the translation of
;; label[α;ε] is (LABEL α* ε*).
;; Forms are translated as follows:
;; 1. A variable, like a function name, is translated by using uppercase letters.
;; Thus the translation of varl is VAR1.
;; 2. The obvious translation of letting a constant translate into itself will not
;; work. Since the translation of x is X, the translation of X must be something
;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
;; into (QUOTE X).
;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
;; 4. The conditional expression [pl-el;...;pn-en] is translated into
;; (COND (p1* e1*)...(pn* en*))
;; ## Examples
;; M-expressions S-expressions
;; x X
;; car CAR
;; car[x] (CAR X)
;; T (QUOTE T)
;; ff[car [x]] (FF (CAR X))
;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))
;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
;; ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))))
;; ]] quote ends
(defn gen-cond-clause
"Generate a cond clause from this simplified parse tree fragment `p`;
returns `nil` if `p` does not represent a cond clause."
[p]
(when
(and (coll? p) (= :cond-clause (first p)))
(make-beowulf-list
(list (if (= (nth p 1) [:quoted-expr [:atom "T"]])
'T
(generate (nth p 1)))
(generate (nth p 2))))))
(defn gen-cond
"Generate a cond statement from this simplified parse tree fragment `p`;
returns `nil` if `p` does not represent a (MEXPR) cond statement."
[p]
(when
(and (coll? p) (= :cond (first p)))
(make-beowulf-list
(cons
'COND
(map
generate
(rest p))))))
(defn gen-fn-call
"Generate a function call from this simplified parse tree fragment `p`;
returns `nil` if `p` does not represent a (MEXPR) function call."
[p]
(when
(and (coll? p) (= :fncall (first p)) (= :mvar (first (second p))))
(make-cons-cell
(generate (second p))
(generate (nth p 2)))))
(defn gen-dot-terminated-list
"Generate a list, which may be dot-terminated, from this partial parse tree
'p'. Note that the function acts recursively and progressively decapitates
its argument, so that the argument will not always be a valid parse tree."
[p]
(cond
(empty? p)
NIL
(and (coll? (first p)) (= :dot-terminal (first (first p))))
(let [dt (first p)]
(make-cons-cell
(generate (nth dt 1))
(generate (nth dt 2))))
:else
(make-cons-cell
(generate (first p))
(gen-dot-terminated-list (rest p)))))
(defn generate-defn
[tree]
(make-beowulf-list
(list 'SET
(list 'QUOTE (generate (-> tree second second)))
(list 'QUOTE
(cons 'LAMBDA
(cons (generate (nth (second tree) 2))
(map generate (-> tree rest rest rest))))))))
(defn generate-set
"Actually not sure what the mexpr representation of set looks like"
[tree]
(throw (ex-info "Not Yet Implemented" {:feature "generate-set"})))
(defn generate-assign
"Generate an assignment statement based on this `tree`. If the thing
being assigned to is a function signature, then we have to do something
different to if it's an atom."
[tree]
(case (first (second tree))
:fncall (generate-defn tree)
(:mvar :atom) (generate-set tree)))
(defn strip-leading-zeros
"`read-string` interprets strings with leading zeros as octal; strip
any from this string `s`. If what's left is empty (i.e. there were
only zeros, return `\"0\"`."
([s]
(strip-leading-zeros s ""))
([s prefix]
(if
(empty? s) "0"
(case (first s)
(\+ \-) (strip-leading-zeros (subs s 1) (str (first s) prefix))
"0" (strip-leading-zeros (subs s 1) prefix)
(str prefix s)))))
(defn generate
"Generate lisp structure from this parse tree `p`. It is assumed that
`p` has been simplified."
[p]
(try
(if
(coll? p)
(case (first p)
"LAMBDA"
:λexpr (make-cons-cell
(generate (nth p 1))
(make-cons-cell (generate (nth p 2))
(generate (nth p 3))))
:args (make-beowulf-list (map generate (rest p)))
:atom (symbol (second p))
:bindings (generate (second p))
:body (make-beowulf-list (map generate (rest p)))
:cond (gen-cond p)
:cond-clause (gen-cond-clause p)
(:decimal :integer) (read-string (strip-leading-zeros (second p)))
:defn (generate-assign p)
:dotted-pair (make-cons-cell
(generate (nth p 1))
(generate (nth p 2)))
:exponent (generate (second p))
:fncall (gen-fn-call p)
:list (gen-dot-terminated-list (rest p))
:mvar (symbol (upper-case (second p)))
:octal (let [n (read-string (strip-leading-zeros (second p) "0"))
scale (generate (nth p 2))]
(* n (expt 8 scale)))
;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
:quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
:scale-factor (if
(empty? (second p)) 0
(read-string (strip-leading-zeros (second p))))
:scientific (let [n (generate (second p))
exponent (generate (nth p 2))]
(* n (expt 10 exponent)))
;; default
(throw (ex-info (str "Unrecognised head: " (first p))
{:generating p})))
p)
(catch Throwable any
(throw (ex-info "Could not generate"
{:generating p}
any)))))
;; (defn parse
;; "Parse string `s` into a parse tree which can then be operated upon further."
;; [s]
;; (let [r (parse-internal s)]
;; (when (instance? Failure r)
;; (throw
;; (ex-info "Parse failed"
;; (merge {:fail r :source s} r))))
;; r))
(defn gsp
"Shortcut macro - the internals of read; or, if you like, read-string.
Argument `s` should be a string representation of a valid Lisp
@ -447,15 +63,24 @@
(throw (ex-info "Parse failed" (assoc parse-tree :source source))))
(generate (simplify parse-tree)))))
(defn read-from-console
"Attempt to read a complete lisp expression from the console."
[]
(loop [r (read-line)]
(if (= (count (re-seq #"\(" r))
(count (re-seq #"\)" r)))
r
(recur (str r "\n" (read-line))))))
(defn READ
"An implementation of a Lisp reader sufficient for bootstrapping; not necessarily
the final Lisp reader. `input` should be either a string representation of a LISP
expression, or else an input stream. A single form will be read."
([]
(gsp (read-line)))
(gsp (read-from-console)))
([input]
(cond
(empty? input) (gsp (read-line))
(empty? input) (gsp (read-from-console))
(string? input) (gsp input)
(instance? InputStream input) (READ (slurp input))
:else (throw (ex-info "READ: `input` should be a string or an input stream" {})))))

View file

@ -0,0 +1,198 @@
(ns beowulf.reader.generate
(:require [beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]]
[clojure.math.numeric-tower :refer [expt]]
[clojure.string :refer [upper-case]]))
;; # From Lisp 1.5 Programmers Manual, page 10
;; Note that I've retyped much of this, since copy/pasting out of PDF is less
;; than reliable. Any typos are mine. Quote starts [[
;; We are now in a position to define the universal LISP function
;; evalquote[fn;args], When evalquote is given a function and a list of arguments
;; for that function, it computes the value of the function applied to the arguments.
;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
;; of the function evalquote must be an S-expression. Since we have been
;; writing functions as M-expressions, it is necessary to translate them into
;; S-expressions.
;; The following rules define a method of translating functions written in the
;; meta-language into S-expressions.
;; 1. If the function is represented by its name, it is translated by changing
;; all of the letters to upper case, making it an atomic symbol. Thus is
;; translated to CAR.
;; 2. If the function uses the lambda notation, then the expression
;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
;; of ε.
;; 3. If the function begins with label, then the translation of
;; label[α;ε] is (LABEL α* ε*).
;; Forms are translated as follows:
;; 1. A variable, like a function name, is translated by using uppercase letters.
;; Thus the translation of varl is VAR1.
;; 2. The obvious translation of letting a constant translate into itself will not
;; work. Since the translation of x is X, the translation of X must be something
;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
;; into (QUOTE X).
;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
;; 4. The conditional expression [pl-el;...;pn-en] is translated into
;; (COND (p1* e1*)...(pn* en*))
;; ## Examples
;; M-expressions S-expressions
;; x X
;; car CAR
;; car[x] (CAR X)
;; T (QUOTE T)
;; ff[car [x]] (FF (CAR X))
;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))
;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
;; ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))))
;; ]] quote ends
(declare generate)
(defn gen-cond-clause
"Generate a cond clause from this simplified parse tree fragment `p`;
returns `nil` if `p` does not represent a cond clause."
[p]
(when
(and (coll? p) (= :cond-clause (first p)))
(make-beowulf-list
(list (if (= (nth p 1) [:quoted-expr [:atom "T"]])
'T
(generate (nth p 1)))
(generate (nth p 2))))))
(defn gen-cond
"Generate a cond statement from this simplified parse tree fragment `p`;
returns `nil` if `p` does not represent a (MEXPR) cond statement."
[p]
(when
(and (coll? p) (= :cond (first p)))
(make-beowulf-list
(cons
'COND
(map
generate
(rest p))))))
(defn gen-fn-call
"Generate a function call from this simplified parse tree fragment `p`;
returns `nil` if `p` does not represent a (MEXPR) function call."
[p]
(when
(and (coll? p) (= :fncall (first p)) (= :mvar (first (second p))))
(make-cons-cell
(generate (second p))
(generate (nth p 2)))))
(defn gen-dot-terminated-list
"Generate a list, which may be dot-terminated, from this partial parse tree
'p'. Note that the function acts recursively and progressively decapitates
its argument, so that the argument will not always be a valid parse tree."
[p]
(cond
(empty? p)
NIL
(and (coll? (first p)) (= :dot-terminal (first (first p))))
(let [dt (first p)]
(make-cons-cell
(generate (nth dt 1))
(generate (nth dt 2))))
:else
(make-cons-cell
(generate (first p))
(gen-dot-terminated-list (rest p)))))
(defn generate-defn
[tree]
(make-beowulf-list
(list 'SET
(list 'QUOTE (generate (-> tree second second)))
(list 'QUOTE
(cons 'LAMBDA
(cons (generate (nth (second tree) 2))
(map generate (-> tree rest rest rest))))))))
(defn generate-set
"Actually not sure what the mexpr representation of set looks like"
[tree]
(throw (ex-info "Not Yet Implemented" {:feature "generate-set"})))
(defn generate-assign
"Generate an assignment statement based on this `tree`. If the thing
being assigned to is a function signature, then we have to do something
different to if it's an atom."
[tree]
(case (first (second tree))
:fncall (generate-defn tree)
(:mvar :atom) (generate-set tree)))
(defn strip-leading-zeros
"`read-string` interprets strings with leading zeros as octal; strip
any from this string `s`. If what's left is empty (i.e. there were
only zeros, return `\"0\"`."
([s]
(strip-leading-zeros s ""))
([s prefix]
(if
(empty? s) "0"
(case (first s)
(\+ \-) (strip-leading-zeros (subs s 1) (str (first s) prefix))
"0" (strip-leading-zeros (subs s 1) prefix)
(str prefix s)))))
(defn generate
"Generate lisp structure from this parse tree `p`. It is assumed that
`p` has been simplified."
[p]
(try
(if
(coll? p)
(case (first p)
"LAMBDA"
:λexpr (make-cons-cell
(generate (nth p 1))
(make-cons-cell (generate (nth p 2))
(generate (nth p 3))))
:args (make-beowulf-list (map generate (rest p)))
:atom (symbol (second p))
:bindings (generate (second p))
:body (make-beowulf-list (map generate (rest p)))
:cond (gen-cond p)
:cond-clause (gen-cond-clause p)
(:decimal :integer) (read-string (strip-leading-zeros (second p)))
:defn (generate-assign p)
:dotted-pair (make-cons-cell
(generate (nth p 1))
(generate (nth p 2)))
:exponent (generate (second p))
:fncall (gen-fn-call p)
:list (gen-dot-terminated-list (rest p))
:mvar (symbol (upper-case (second p)))
:octal (let [n (read-string (strip-leading-zeros (second p) "0"))
scale (generate (nth p 2))]
(* n (expt 8 scale)))
;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
:quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
:scale-factor (if
(empty? (second p)) 0
(read-string (strip-leading-zeros (second p))))
:scientific (let [n (generate (second p))
exponent (generate (nth p 2))]
(* n (expt 10 exponent)))
;; default
(throw (ex-info (str "Unrecognised head: " (first p))
{:generating p})))
p)
(catch Throwable any
(throw (ex-info "Could not generate"
{:generating p}
any)))))

View file

@ -0,0 +1,84 @@
(ns beowulf.reader.parser
"The actual parser, supporting both S-expression and M-expression syntax."
(:require [instaparse.core :as i]))
(def parse
"Parse a string presented as argument into a parse tree which can then
be operated upon further."
(i/parser
(str
;; we tolerate whitespace and comments around legitimate input
"raw := expr | opt-comment expr opt-comment;"
;; top level: we accept mexprs as well as sexprs.
"expr := mexpr | sexpr ;"
;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
"comment := opt-space <';;'> opt-space #'[^\\n\\r]*';"
;; there's a notation comprising a left brace followed by mexprs
;; followed by a right brace which doesn't seem to be documented
;; but I think must represent a prog(?)
;; "prog := lbrace exprs rbrace;"
;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
;; but it's a convenience.
"exprs := expr | exprs;"
"mexpr := λexpr | fncall | defn | cond | mvar | iexpr | mexpr comment;
λexpr := λ lsqb bindings semi-colon body rsqb;
λ := ';
bindings := lsqb args rsqb;
body := (expr semi-colon opt-space)* expr;
fncall := fn-name lsqb args rsqb;
lsqb := '[';
rsqb := ']';
lbrace := '{';
rbrace := '}';
defn := mexpr opt-space '=' opt-space mexpr;
cond := lsqb (opt-space cond-clause semi-colon opt-space)* cond-clause rsqb;
cond-clause := expr opt-space arrow opt-space expr opt-space;
arrow := '->';
args := (opt-space expr semi-colon opt-space)* expr;
fn-name := mvar;
mvar := #'[a-z]+';
semi-colon := ';';"
;; Infix operators appear in mexprs, e.g. on page 7. Ooops!
;; I do not know what infix operators are considered legal.
"iexpr := iexp iop iexp;
iexp := mexpr | mvar | number | mexpr | opt-space iexp opt-space;
iop := '>' | '<' | '+' | '-' | '/' | '=' ;"
;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
"opt-comment := opt-space | comment;"
"comment := opt-space <';;'> #'[^\\n\\r]*' opt-space;"
;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
;; but I've included it on the basis that it can do little harm.
"sexpr := quoted-expr | atom | number | dotted-pair | list | sexpr comment;
list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal | lbrace exprs rbrace;
list := lpar opt-space sexpr rpar | lpar opt-space (sexpr sep)* rpar | lpar opt-space (sexpr sep)* dot-terminal;
dotted-pair := lpar dot-terminal ;
dot := '.';
lpar := '(';
rpar := ')';
quoted-expr := quote sexpr;
quote := '\\'';
dot-terminal := sexpr space dot space sexpr rpar;
space := #'\\p{javaWhitespace}+';
opt-space := #'\\p{javaWhitespace}*';
sep := ',' | opt-space;
atom := #'[A-Z][A-Z0-9]*';"
;; Lisp 1.5 supported octal as well as decimal and scientific notation
"number := integer | decimal | scientific | octal;
integer := #'-?[1-9][0-9]*';
decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0.[0-9]*';
scientific := coefficient e exponent;
coefficient := decimal;
exponent := integer;
e := 'E';
octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
q := 'Q';
scale-factor := #'[0-9]*'")))

View file

@ -0,0 +1,94 @@
(ns beowulf.reader.simplify
"Simplify parse trees. Be aware that this is very tightly coupled
with the parser."
(:require [beowulf.bootstrap :refer [*options*]]
[instaparse.failure :as f])
(:import [instaparse.gll Failure]))
(declare simplify)
(defn remove-optional-space
[tree]
(if (vector? tree)
(if (= :opt-space (first tree))
nil
(remove nil?
(map remove-optional-space tree)))
tree))
(defn remove-nesting
[tree]
(let [tree' (remove-optional-space tree)]
(if-let [key (when (and (vector? tree') (keyword? (first tree'))) (first tree'))]
(loop [r tree']
(if (and r (vector? r) (keyword? (first r)))
(if (= (first r) key)
(recur (simplify (second r) :foo))
r)
r))
tree')))
(defn simplify
"Simplify this parse tree `p`. If `p` is an instaparse failure object, throw
an `ex-info`, with `p` as the value of its `:failure` key."
([p]
(if
(instance? Failure p)
(throw (ex-info (str "Ic ne behæfd: " (f/pprint-failure p)) {:cause :parse-failure
:phase :simplify
:failure p}))
(simplify p :expr)))
([p context]
(if
(coll? p)
(apply
vector
(remove
#(when (coll? %) (empty? %))
(case (first p)
(:λexpr
:args :bindings :body :cond :cond-clause :defn :dot-terminal
:fncall :lhs :octal :quoted-expr :rhs :scientific) (map #(simplify % context) p)
(:arg :expr :coefficient :fn-name :number) (simplify (second p) context)
(:arrow :dot :e :lpar :lsqb :opt-comment :opt-space :q :quote :rpar :rsqb
:semi-colon :sep :space) nil
:atom (if
(= context :mexpr)
[:quoted-expr p]
p)
:comment (when
(:strict *options*)
(throw
(ex-info "Cannot parse comments in strict mode"
{:cause :strict})))
:dotted-pair (if
(= context :mexpr)
[:fncall
[:mvar "cons"]
[:args
(simplify (nth p 1) context)
(simplify (nth p 2) context)]]
(map simplify p))
:iexp (second (remove-nesting p))
:iexpr [:iexpr
[:lhs (simplify (second p) context)]
(simplify (nth p 2) context) ;; really should be the operator
[:rhs (simplify (nth p 3) context)]]
:mexpr (if
(:strict *options*)
(throw
(ex-info "Cannot parse meta expressions in strict mode"
{:cause :strict}))
(simplify (second p) :mexpr))
:list (if
(= context :mexpr)
[:fncall
[:mvar "list"]
[:args (apply vector (map simplify (rest p)))]]
(map #(simplify % context) p))
:raw (first (remove empty? (map simplify (rest p))))
:sexpr (simplify (second p) :sexpr)
;;default
p)))
p)))

View file

@ -1,8 +1,9 @@
(ns beowulf.bootstrap-test
(:require [clojure.math.numeric-tower :refer [abs]]
[clojure.test :refer :all]
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL T F]]
[beowulf.bootstrap :refer :all]
(:require [clojure.test :refer [deftest testing is]]
[beowulf.cons-cell :refer [make-cons-cell NIL T F]]
[beowulf.bootstrap :refer [APPEND ASSOC ATOM ATOM? CAR CAAAAR CADR
CADDR CADDDR CDR EQ EQUAL MEMBER
PAIRLIS SUBLIS SUBST]]
[beowulf.read :refer [gsp]]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -51,21 +52,6 @@
actual (ATOM? (gsp "(A B C D)"))]
(is (= actual expected) "A list is explicitly not an atom"))))
(deftest numberp-tests
(testing "NUMBERP"
(let [expected T
actual (NUMBERP 7)]
(is (= actual expected) "7 is a number"))
(let [expected T
actual (NUMBERP 3.14)]
(is (= actual expected) "3.14 is a number"))
(let [expected F
actual (NUMBERP NIL)]
(is (= actual expected) "NIL is not a number"))
(let [expected F
actual (NUMBERP (gsp "HELLO"))]
(is (= actual expected) "HELLO is not a number"))))
(deftest access-function-tests
(testing "CAR"
(let [expected 'A
@ -132,13 +118,18 @@
(let [expected 'T
actual (EQ 'FRED 'FRED)]
(is (= actual expected) "identical symbols"))
(let [expected 'F
(let [expected 'NIL
actual (EQ 'FRED 'ELFREDA)]
(is (= actual expected) "different symbols"))
(let [expected 'F
(let [expected 'T
l (gsp "(NOT AN ATOM)")
actual (EQ l l)]
(is (= actual expected) "identical lists (EQ is not defined for lists)")))
(is (= actual expected) "identically the same list"))
(let [expected 'NIL
l1 (gsp "(NOT AN ATOM)")
l2 (gsp "(NOT AN ATOM)")
actual (EQ l1 l2)]
(is (= actual expected) "different lists with the same content")))
(testing "equal"
(let [expected 'T
actual (EQUAL 'FRED 'FRED)]

View file

@ -1,11 +1,12 @@
(ns beowulf.cons-cell-test
(:require [clojure.test :refer :all]
[beowulf.cons-cell :refer :all]))
(:require [clojure.test :refer [deftest is testing]]
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell pretty-print]])
(:import [beowulf.cons_cell ConsCell]))
(deftest cons-cell-tests
(testing "make-cons-cell"
(let [expected "(A . B)"
actual (print-str (beowulf.cons_cell.ConsCell. 'A 'B))]
actual (print-str (ConsCell. 'A 'B (gensym "c")))]
(is (= actual expected) "Cons cells should print as cons cells, natch."))
(let [expected "(A . B)"
actual (print-str (make-cons-cell 'A 'B))]

View file

@ -1,8 +1,8 @@
(ns beowulf.core-test
(:require [clojure.java.io :refer [reader]]
[clojure.string :refer [split]]
[clojure.test :refer :all]
[beowulf.core :refer :all]))
[clojure.test :refer [deftest is testing]]
[beowulf.core :refer [-main repl stop-word]]))
;; (deftest a-test
;; (testing "FIXME, I fail."
@ -36,7 +36,7 @@
(testing "No flags"
(let [expected-greeting "Hider wilcuman. Béowulf is mín nama."
expected-quit-message (str "Sprecan '" stop-word "' tó laéfan")
expected-result #".*\(A \. B\)"
expected-result #".*\(3 \. 4\)"
expected-prompt "Sprecan:: "
expected-signoff "Færwell!"
;; anticipated output (note blank lines):
@ -45,11 +45,11 @@
; Sprecan 'STOP' tó laéfan
; Sprecan:: > (A . B)
; Sprecan:: > (3 . 4)
; Sprecan::
; Færwell!
[_ greeting _ _ quit-message _ result prompt signoff]
(with-open [r (reader (string->stream (str "cons[A; B]\n" stop-word)))]
(with-open [r (reader (string->stream (str "cons[3; 4]\n" stop-word)))]
(binding [*in* r]
(split (with-out-str (-main)) #"\n")))]
(is (= greeting expected-greeting))
@ -63,11 +63,11 @@
(let [expected-greeting "Hider wilcuman. Béowulf is mín nama."
expected-quit-message (str "Sprecan '" stop-word "' tó laéfan")
expected-error #"Unknown option:.*"
expected-result #".*\(A \. B\)"
expected-result #".*\(5 \. 6\)"
expected-prompt "Sprecan:: "
expected-signoff "Færwell!"
[_ greeting _ error quit-message _ result prompt signoff]
(with-open [r (reader (string->stream (str "cons[A; B]\n" stop-word)))]
(with-open [r (reader (string->stream (str "cons[5; 6]\n" stop-word)))]
(binding [*in* r]
(split (with-out-str (-main "--unknown")) #"\n")))]
(is (= greeting expected-greeting))

View file

@ -1,9 +1,8 @@
(ns beowulf.host-test
(:require [clojure.math.numeric-tower :refer [abs]]
[clojure.test :refer :all]
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL T F]]
(:require [clojure.test :refer [deftest is testing]]
[beowulf.bootstrap :refer [CDR]]
[beowulf.host :refer :all]
[beowulf.cons-cell :refer [F make-beowulf-list NIL T]]
[beowulf.host :refer [DIFFERENCE NUMBERP PLUS2 RPLACA RPLACD TIMES2]]
[beowulf.read :refer [gsp]]))
(deftest destructive-change-test
@ -35,6 +34,21 @@
)
)
(deftest numberp-tests
(testing "NUMBERP"
(let [expected T
actual (NUMBERP 7)]
(is (= actual expected) "7 is a number"))
(let [expected T
actual (NUMBERP 3.14)]
(is (= actual expected) "3.14 is a number"))
(let [expected F
actual (NUMBERP NIL)]
(is (= actual expected) "NIL is not a number"))
(let [expected F
actual (NUMBERP (gsp "HELLO"))]
(is (= actual expected) "HELLO is not a number"))))
(deftest arithmetic-test
;; These are just sanity-test tests; they're by no means exhaustive.
(testing "PLUS2"

View file

@ -1,8 +1,6 @@
(ns beowulf.interop-test
(:require [clojure.test :refer :all]
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL T F]]
(:require [clojure.test :refer [deftest is testing]]
[beowulf.bootstrap :refer [EVAL INTEROP QUOTE]]
[beowulf.host :refer :all]
[beowulf.read :refer [gsp]]))

View file

@ -1,9 +1,12 @@
(ns beowulf.mexpr-test
"These tests are taken generally from the examples on page 10 of
Lisp 1.5 Programmers Manual"
(:require [clojure.test :refer :all]
(:require [clojure.test :refer [deftest is testing]]
[beowulf.bootstrap :refer [*options*]]
[beowulf.read :refer [parse simplify generate gsp]]))
[beowulf.read :refer [gsp]]
[beowulf.reader.generate :refer [generate]]
[beowulf.reader.parser :refer [parse]]
[beowulf.reader.simplify :refer [simplify]]))
;; These tests are taken generally from the examples on page 10 of
;; Lisp 1.5 Programmers Manual:
@ -39,13 +42,14 @@
;; Wrapping in a function call puts us into mexpr contest;
;; "T" would be interpreted as a sexpr, which would not be
;; quoted.
(let [expected "(ATOM (QUOTE A))"
(let [expected "(ATOM A)"
actual (print-str (gsp "atom[A]"))]
(is (= actual expected)
"Atoms should normally be quoted"))
(is (= actual expected)))
;; I'm not clear how `car[(A B C)]` should be translated, but
;; I suspect as (CAR (LIST 'A 'B 'C)).
;; I suspect as (CAR (LIST A B C)).
(let [expected "(CAR (LIST A B C))"
actual (print-str (gsp "car[(A B C)]"))]
(is (= actual expected)))
))
(deftest fncall-tests
@ -79,6 +83,6 @@
(deftest assignment-tests
(testing "Function assignment"
(let [expected "(SET (QUOTE FF) (LAMBDA (X) (COND ((ATOM X) X) (T (FF (CAR X))))))"
actual (gsp "ff[x]=[atom[x] -> x; T -> ff[car[x]]]")]
(let [expected "(SET (QUOTE FF) (QUOTE (LAMBDA (X) (COND ((ATOM X) X) (T (FF (CAR X)))))))"
actual (print-str (gsp "ff[x]=[atom[x] -> x; T -> ff[car[x]]]"))]
(is (= actual expected)))))

View file

@ -1,9 +1,8 @@
(ns beowulf.sexpr-test
(:require [clojure.math.numeric-tower :refer [abs]]
[clojure.test :refer :all]
[beowulf.cons-cell :refer :all]
(:require [clojure.test :refer [deftest is testing]]
[beowulf.bootstrap :refer [*options*]]
[beowulf.read :refer [parse simplify generate gsp]]))
[beowulf.cons-cell :refer []]
[beowulf.read :refer [gsp]]))
;; broadly, sexprs should be homoiconic