Full mexpr parsing working, number parsing working

This commit is contained in:
Simon Brooke 2019-08-15 16:35:46 +01:00
parent b4091f28dc
commit 0321401c2a
7 changed files with 316 additions and 46 deletions

View file

@ -4,6 +4,7 @@
:license {:name "GPL-2.0-or-later"
:url "https://www.eclipse.org/legal/epl-2.0/"}
:dependencies [[org.clojure/clojure "1.10.0"]
[org.clojure/math.numeric-tower "0.0.4"]
[instaparse "1.4.10"]]
:main ^:skip-aot beowulf.core
:target-path "target/%s"

View file

@ -1,5 +1,4 @@
(ns beowulf.print
;; (:require [beowulf.cons-cell])
)
(defprotocol Printable

View file

@ -1,5 +1,7 @@
(ns beowulf.read
(:require [instaparse.core :as i]
(:require [clojure.math.numeric-tower :refer [expt]]
[clojure.string :refer [starts-with? upper-case]]
[instaparse.core :as i]
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]])
;; (:import [beowulf.cons-cell ConsCell])
)
@ -16,7 +18,11 @@
;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
;; but it's a convenience.
"mexpr := fncall | defn | cond;
"mexpr := λexpr | fncall | defn | cond | mvar;
λexpr := λ lsqb bindings semi-colon body rsqb;
λ := ';
bindings := lsqb args rsqb;
body := (expr semi-colon opt-space)* expr;
fncall := fn-name lsqb args rsqb;
lsqb := '[';
rsqb := ']';
@ -24,31 +30,44 @@
cond := lsqb (cond-clause semi-colon opt-space)* cond-clause rsqb;
cond-clause := expr opt-space arrow opt-space expr;
arrow := '->';
args := (arg semi-colon opt-space)* arg;
arg := mexpr | sexpr;
fn-name := #'[a-z]*';
args := (expr semi-colon opt-space)* expr;
fn-name := mvar;
mvar := #'[a-z]+';
semi-colon := ';';"
;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
;; but I've included it on the basis that it can do little harm.
"list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
"sexpr := quoted-expr | atom | number | dotted-pair | list;
list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
dotted-pair := lpar dot-terminal ;
dot := '.';
lpar := '(';
rpar := ')';
sexpr := atom | number | dotted-pair | list | quoted-expr;
quoted-expr := quote sexpr;
quote := '\\'';
dot-terminal := sexpr space dot space sexpr rpar;
space := #'\\p{javaWhitespace}+';
opt-space := #'\\p{javaWhitespace}*';
sep := ',' | opt-space;
atom := #'[A-Z][A-Z0-9]*';
number := #'-?[0-9][0-9.]*';")))
atom := #'[A-Z][A-Z0-9]*';"
;; Lisp 1.5 supported octal as well as decimal and scientific notation
"number := integer | decimal | scientific | octal;
integer := #'-?[1-9][0-9]*';
decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0\\.[0-9]*';
scientific := coefficient e exponent;
coefficient := decimal;
exponent := integer;
e := 'E';
octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
q := 'Q';
scale-factor := #'[0-9]*'")))
(defn simplify
"Simplify this parse tree `p`."
[p]
([p]
(simplify p :sexpr))
([p context]
(if
(coll? p)
(apply
@ -56,32 +75,86 @@
(remove
#(if (coll? %) (empty? %))
(case (first p)
(:arrow :dot :lpar :lsqb :opt-space :quote :rpar :rsqb :semi-colon :sep :space) nil
(:arg :expr :mexpr :sexpr) (simplify (second p))
(:args :cond :cond-clause :dot-terminal :dotted-pair :fncall :list) (map simplify p)
;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
:quoted-expr [:fncall [:fn-name "quote"] [:args (simplify (nth p 2))]]
(:arg :expr :coefficient :fn-name :number :sexpr) (simplify (second p) context)
(:λexpr
:args :bindings :body :cond :cond-clause :dot-terminal
:fncall :octal :quoted-expr :scientific) (map #(simplify % context) p)
(:arrow :dot :e :lpar :lsqb :opt-space :q :quote :rpar :rsqb
:semi-colon :sep :space) nil
:atom (if
(= context :mexpr)
[:quoted-expr p]
p)
:dotted-pair (if
(= context :mexpr)
[:fncall
[:mvar "cons"]
[:args
(simplify (nth p 1) context)
(simplify (nth p 2) context)]]
(map simplify p))
:mexpr (simplify (second p) :mexpr)
:list (if
(= context :mexpr)
[:fncall
[:mvar "list"]
[:args (apply vector (map simplify (rest p)))]]
(map #(simplify % context) p))
;;default
p)))
p))
p)))
(defn gen-dot-terminated-list
"Generate a list, which may be dot-terminated, from this partial parse tree
'p'. Note that the function acts recursively and progressively decapitates
its argument, so that the argument will not always be a valid parse tree."
[p]
(cond
(empty? p)
NIL
(and (coll? (first p)) (= :dot-terminal (first (first p))))
(let [dt (first p)]
(make-cons-cell
(generate (nth dt 1))
(generate (nth dt 2))))
:else
(make-cons-cell
(generate (first p))
(gen-dot-terminated-list (rest p)))))
;; # From Lisp 1.5 Programmers Manual, page 10
;; Note that I've retyped much of this, since copy/pasting out of PDF is less
;; than reliable. Any typos are mine. Quote starts [[
;; We are now in a position to define the universal LISP function
;; evalquote[fn;args], When evalquote is given a function and a list of arguments
;; for that function, it computes the value of the function applied to the arguments.
;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
;; of the function evalquote must be an S-expression. Since we have been
;; writing functions as M-expressions, it is necessary to translate them into
;; S-expressions.
;; The following rules define a method of translating functions written in the
;; meta-language into S-expressions.
;; 1. If the function is represented by its name, it is translated by changing
;; all of the letters to upper case, making it an atomic symbol. Thus is
;; translated to CAR.
;; 2. If the function uses the lambda notation, then the expression
;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
;; of ε.
;; 3. If the function begins with label, then the translation of
;; label[α;ε] is (LABEL α* ε*).
;; Forms are translated as follows:
;; 1. A variable, like a function name, is translated by using uppercase letters.
;; Thus the translation of varl is VAR1.
;; 2. The obvious translation of letting a constant translate into itself will not
;; work. Since the translation of x is X, the translation of X must be something
;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
;; into (QUOTE X).
;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
;; 4. The conditional expression [pl-el;...;pn-en] is translated into
;; (COND (p1* e1*)...(pn* en*))
;; ## Examples
;; M-expressions S-expressions
;; x X
;; car CAR
;; car[x] (CAR X)
;; T (QUOTE T)
;; ff[car [x]] (FF (CAR X))
;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))
;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
;; ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))))
;; ]] quote ends
(defn gen-cond-clause
"Generate a cond clause from this simplified parse tree fragment `p`;
@ -113,11 +186,45 @@
implicitly quoted; this function does not (yet) do that."
[p]
(if
(and (coll? p)(= :fncall (first p))(= :fn-name (first (second p))))
(and (coll? p)(= :fncall (first p))(= :mvar (first (second p))))
(make-cons-cell
(second (second p))
(generate (second p))
(generate (nth p 2)))))
(defn gen-dot-terminated-list
"Generate a list, which may be dot-terminated, from this partial parse tree
'p'. Note that the function acts recursively and progressively decapitates
its argument, so that the argument will not always be a valid parse tree."
[p]
(cond
(empty? p)
NIL
(and (coll? (first p)) (= :dot-terminal (first (first p))))
(let [dt (first p)]
(make-cons-cell
(generate (nth dt 1))
(generate (nth dt 2))))
:else
(make-cons-cell
(generate (first p))
(gen-dot-terminated-list (rest p)))))
(defn strip-leading-zeros
"`read-string` interprets strings with leading zeros as octal; strip
any from this string `s`. If what's left is empty (i.e. there were
only zeros, return `\"0\"`."
([s]
(strip-leading-zeros s ""))
([s prefix]
(if
(empty? s) "0"
(case (first s)
(\+ \-)(strip-leading-zeros (subs s 1) (str (first s) prefix))
"0" (strip-leading-zeros (subs s 1) prefix)
(str prefix s)))))
(defn generate
"Generate lisp structure from this parse tree `p`. It is assumed that
`p` has been simplified."
@ -125,14 +232,36 @@
(if
(coll? p)
(case (first p)
"LAMBDA"
:λexpr (make-cons-cell
(generate (nth p 1))
(make-cons-cell (generate (nth p 2))
(generate (nth p 3))))
(:args :list) (gen-dot-terminated-list (rest p))
:atom (symbol (second p))
:bindings (generate (second p))
:body (make-beowulf-list (map generate (rest p)))
:cond (gen-cond p)
(:decimal :integer) (read-string (strip-leading-zeros (second p)))
:dotted-pair (make-cons-cell
(generate (nth p 1))
(generate (nth p 2)))
:exponent (generate (second p))
:fncall (gen-fn-call p)
(:args :list) (gen-dot-terminated-list (rest p))
:number (clojure.core/read-string (second p))
:mvar (upper-case (second p))
:octal (let [n (read-string (strip-leading-zeros (second p) "0"))
scale (generate (nth p 2))]
(* n (expt 8 scale)))
;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
:quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
:scale-factor (if
(empty? (second p)) 0
(read-string (strip-leading-zeros (second p))))
:scientific (let [n (generate (second p))
exponent (generate (nth p 2))]
(* n (expt 10 exponent)))
;; default
(throw (Exception. (str "Cannot yet generate " (first p)))))
p))

View file

@ -2,6 +2,6 @@
(:require [clojure.test :refer :all]
[beowulf.core :refer :all]))
(deftest a-test
(testing "FIXME, I fail."
(is (= 0 1))))
;; (deftest a-test
;; (testing "FIXME, I fail."
;; (is (= 0 1))))

View file

@ -0,0 +1,67 @@
(ns beowulf.mexpr-test
(:require [clojure.test :refer :all]
[beowulf.read :refer [parse simplify generate]]
[beowulf.print :refer :all]))
;; These tests are taken generally from the examples on page 10 of
;; Lisp 1.5 Programmers Manual:
;; ## Examples
;; M-expressions S-expressions
;; x X
;; car CAR
;; car[x] (CAR X)
;; T (QUOTE T)
;; ff[car [x]] (FF (CAR X))
;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))
;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
;; ((ATOM X) X)
;; ((QUOTE T)(FF (CAR X))))))
(deftest variable-tests
(testing "Variable translation"
(let [expected "X"
actual (prin (generate (simplify (parse "x"))))]
(is (= actual expected)))
(let [expected "CAR"
actual (prin (generate (simplify (parse "car"))))]
(is (= actual expected)))
))
(deftest literal-tests
(testing "Literal translation"
;; in the context of an M-expression, an upper case letter
;; or string represents a Lisp literal, and should be quoted.
;; Wrapping in a function call puts us into mexpr contest;
;; "T" would be interpreted as a sexpr, which would not be
;; quoted.
(let [expected "(ATOM (QUOTE T))"
actual (prin (generate (simplify (parse "atom[T]"))))]
(is (= actual expected)))
;; I'm not clear how `car[(A B C)]` should be translated, but
;; I suspect as (CAR (LIST 'A 'B 'C)).
))
(deftest fncall-tests
(testing "Function calls"
(let [expected "(CAR X)"
actual (prin (generate (simplify (parse "car[x]"))))]
(is (= actual expected)))
(let [expected "(FF (CAR X))"
actual (prin (generate (simplify (parse "ff[car[x]]"))))]
(is (= actual expected)))))
(deftest conditional-tests
(testing "Conditional expressions"
(let [expected "(COND ((ATOM X) X) ((QUOTE T) (FF (CAR X))))"
actual (prin (generate (simplify (parse "[atom[x]->x; T->ff[car[x]]]"))))]
(is (= actual expected)))
(let [expected "(LABEL FF (LAMBDA (X) (COND ((ATOM X) X) ((QUOTE T) (FF (CAR X))))))"
actual (prin
(generate
(simplify
(parse "label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]]"))))]
(is (= actual expected)))))

View file

@ -2,9 +2,7 @@
(:require [clojure.test :refer :all]
[beowulf.core :refer :all]))
(deftest a-test
(testing "FIXME, I fail."
(is (= 0 1))))
;; (deftest a-test
;; (testing "FIXME, I fail."
;; (is (= 0 1))))
(deftest parser-tests
(testing "symbols"

View file

@ -0,0 +1,76 @@
(ns beowulf.sexpr-test
(:require [clojure.math.numeric-tower :refer [abs]]
[clojure.test :refer :all]
[beowulf.read :refer [parse simplify generate]]
[beowulf.print :refer :all]))
;; broadly, sexprs should be homoiconic
(deftest atom-tests
(testing "Reading atoms"
(let [expected 'A
actual (generate (simplify (parse (str expected))))]
(is (= actual expected)))
(let [expected 'APPLE
actual (generate (simplify (parse (str expected))))]
(is (= actual expected)))
(let [expected 'PART2
actual (generate (simplify (parse (str expected))))]
(is (= actual expected)))
(let [expected 'EXTRALONGSTRINGOFLETTERS
actual (generate (simplify (parse (str expected))))]
(is (= actual expected)))
(let [expected 'A4B66XYZ2
actual (generate (simplify (parse (str expected))))]
(is (= actual expected)))))
(deftest number-tests
(testing "Reading octal numbers"
(let [expected 1
actual (generate (simplify (parse "1Q")))]
(is (= actual expected)))
(let [expected -1
actual (generate (simplify (parse "-1Q")))]
(is (= actual expected)))
(let [expected 8
actual (generate (simplify (parse "1Q1")))]
(is (= actual expected)))
(let [expected -8
actual (generate (simplify (parse "-1Q1")))]
(is (= actual expected)))
(let [expected 128
actual (generate (simplify (parse "2Q2")))]
(is (= actual expected)))
(let [expected 2093056
actual (generate (simplify (parse "777Q4")))]
(is (= actual expected))))
(testing "Reading decimal numbers - broadly should be homiconic"
(let [expected 7
actual (generate (simplify (parse "7")))]
(is (= actual expected)))
(let [expected -7
actual (generate (simplify (parse "-7")))]
(is (= actual expected)))
(let [expected 3.141592
actual (generate (simplify (parse "3.141592")))]
(is (= actual expected)))
(let [expected 1234567890
actual (generate (simplify (parse "1234567890")))]
(is (= actual expected)))
(let [expected -45.23
actual (generate (simplify (parse "-45.23")))]
(is (= actual expected))))
(testing "Reading scientific notation")
(let [expected 2/5
actual (generate (simplify (parse "4E-1")))]
(is (< (abs (- actual expected)) 0.0001)))
(let [expected 60
actual (generate (simplify (parse "6E1")))]
(is (< (abs (- actual expected)) 0.0001)))
(let [expected 60
actual (generate (simplify (parse "600.00E-1")))]
(is (< (abs (- actual expected)) 0.0001)))
(let [expected 60
actual (generate (simplify (parse "0.6E2")))]
(is (< (abs (- actual expected)) 0.0001))))