Full mexpr parsing working, number parsing working
This commit is contained in:
parent
b4091f28dc
commit
0321401c2a
|
@ -4,6 +4,7 @@
|
|||
:license {:name "GPL-2.0-or-later"
|
||||
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
||||
:dependencies [[org.clojure/clojure "1.10.0"]
|
||||
[org.clojure/math.numeric-tower "0.0.4"]
|
||||
[instaparse "1.4.10"]]
|
||||
:main ^:skip-aot beowulf.core
|
||||
:target-path "target/%s"
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
(ns beowulf.print
|
||||
;; (:require [beowulf.cons-cell])
|
||||
)
|
||||
|
||||
(defprotocol Printable
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
(ns beowulf.read
|
||||
(:require [instaparse.core :as i]
|
||||
(:require [clojure.math.numeric-tower :refer [expt]]
|
||||
[clojure.string :refer [starts-with? upper-case]]
|
||||
[instaparse.core :as i]
|
||||
[beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]])
|
||||
;; (:import [beowulf.cons-cell ConsCell])
|
||||
)
|
||||
|
@ -16,7 +18,11 @@
|
|||
|
||||
;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
|
||||
;; but it's a convenience.
|
||||
"mexpr := fncall | defn | cond;
|
||||
"mexpr := λexpr | fncall | defn | cond | mvar;
|
||||
λexpr := λ lsqb bindings semi-colon body rsqb;
|
||||
λ := 'λ';
|
||||
bindings := lsqb args rsqb;
|
||||
body := (expr semi-colon opt-space)* expr;
|
||||
fncall := fn-name lsqb args rsqb;
|
||||
lsqb := '[';
|
||||
rsqb := ']';
|
||||
|
@ -24,31 +30,44 @@
|
|||
cond := lsqb (cond-clause semi-colon opt-space)* cond-clause rsqb;
|
||||
cond-clause := expr opt-space arrow opt-space expr;
|
||||
arrow := '->';
|
||||
args := (arg semi-colon opt-space)* arg;
|
||||
arg := mexpr | sexpr;
|
||||
fn-name := #'[a-z]*';
|
||||
args := (expr semi-colon opt-space)* expr;
|
||||
fn-name := mvar;
|
||||
mvar := #'[a-z]+';
|
||||
semi-colon := ';';"
|
||||
|
||||
;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
|
||||
;; but I've included it on the basis that it can do little harm.
|
||||
"list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
|
||||
"sexpr := quoted-expr | atom | number | dotted-pair | list;
|
||||
list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal;
|
||||
dotted-pair := lpar dot-terminal ;
|
||||
dot := '.';
|
||||
lpar := '(';
|
||||
rpar := ')';
|
||||
sexpr := atom | number | dotted-pair | list | quoted-expr;
|
||||
quoted-expr := quote sexpr;
|
||||
quote := '\\'';
|
||||
dot-terminal := sexpr space dot space sexpr rpar;
|
||||
space := #'\\p{javaWhitespace}+';
|
||||
opt-space := #'\\p{javaWhitespace}*';
|
||||
sep := ',' | opt-space;
|
||||
atom := #'[A-Z][A-Z0-9]*';
|
||||
number := #'-?[0-9][0-9.]*';")))
|
||||
atom := #'[A-Z][A-Z0-9]*';"
|
||||
|
||||
;; Lisp 1.5 supported octal as well as decimal and scientific notation
|
||||
"number := integer | decimal | scientific | octal;
|
||||
integer := #'-?[1-9][0-9]*';
|
||||
decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0\\.[0-9]*';
|
||||
scientific := coefficient e exponent;
|
||||
coefficient := decimal;
|
||||
exponent := integer;
|
||||
e := 'E';
|
||||
octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
|
||||
q := 'Q';
|
||||
scale-factor := #'[0-9]*'")))
|
||||
|
||||
(defn simplify
|
||||
"Simplify this parse tree `p`."
|
||||
[p]
|
||||
([p]
|
||||
(simplify p :sexpr))
|
||||
([p context]
|
||||
(if
|
||||
(coll? p)
|
||||
(apply
|
||||
|
@ -56,32 +75,86 @@
|
|||
(remove
|
||||
#(if (coll? %) (empty? %))
|
||||
(case (first p)
|
||||
(:arrow :dot :lpar :lsqb :opt-space :quote :rpar :rsqb :semi-colon :sep :space) nil
|
||||
(:arg :expr :mexpr :sexpr) (simplify (second p))
|
||||
(:args :cond :cond-clause :dot-terminal :dotted-pair :fncall :list) (map simplify p)
|
||||
;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
|
||||
:quoted-expr [:fncall [:fn-name "quote"] [:args (simplify (nth p 2))]]
|
||||
(:arg :expr :coefficient :fn-name :number :sexpr) (simplify (second p) context)
|
||||
(:λexpr
|
||||
:args :bindings :body :cond :cond-clause :dot-terminal
|
||||
:fncall :octal :quoted-expr :scientific) (map #(simplify % context) p)
|
||||
(:arrow :dot :e :lpar :lsqb :opt-space :q :quote :rpar :rsqb
|
||||
:semi-colon :sep :space) nil
|
||||
:atom (if
|
||||
(= context :mexpr)
|
||||
[:quoted-expr p]
|
||||
p)
|
||||
:dotted-pair (if
|
||||
(= context :mexpr)
|
||||
[:fncall
|
||||
[:mvar "cons"]
|
||||
[:args
|
||||
(simplify (nth p 1) context)
|
||||
(simplify (nth p 2) context)]]
|
||||
(map simplify p))
|
||||
:mexpr (simplify (second p) :mexpr)
|
||||
:list (if
|
||||
(= context :mexpr)
|
||||
[:fncall
|
||||
[:mvar "list"]
|
||||
[:args (apply vector (map simplify (rest p)))]]
|
||||
(map #(simplify % context) p))
|
||||
;;default
|
||||
p)))
|
||||
p))
|
||||
p)))
|
||||
|
||||
(defn gen-dot-terminated-list
|
||||
"Generate a list, which may be dot-terminated, from this partial parse tree
|
||||
'p'. Note that the function acts recursively and progressively decapitates
|
||||
its argument, so that the argument will not always be a valid parse tree."
|
||||
[p]
|
||||
(cond
|
||||
(empty? p)
|
||||
NIL
|
||||
(and (coll? (first p)) (= :dot-terminal (first (first p))))
|
||||
(let [dt (first p)]
|
||||
(make-cons-cell
|
||||
(generate (nth dt 1))
|
||||
(generate (nth dt 2))))
|
||||
:else
|
||||
(make-cons-cell
|
||||
(generate (first p))
|
||||
(gen-dot-terminated-list (rest p)))))
|
||||
|
||||
|
||||
;; # From Lisp 1.5 Programmers Manual, page 10
|
||||
;; Note that I've retyped much of this, since copy/pasting out of PDF is less
|
||||
;; than reliable. Any typos are mine. Quote starts [[
|
||||
|
||||
;; We are now in a position to define the universal LISP function
|
||||
;; evalquote[fn;args], When evalquote is given a function and a list of arguments
|
||||
;; for that function, it computes the value of the function applied to the arguments.
|
||||
;; LISP functions have S-expressions as arguments. In particular, the argument "fn"
|
||||
;; of the function evalquote must be an S-expression. Since we have been
|
||||
;; writing functions as M-expressions, it is necessary to translate them into
|
||||
;; S-expressions.
|
||||
|
||||
;; The following rules define a method of translating functions written in the
|
||||
;; meta-language into S-expressions.
|
||||
;; 1. If the function is represented by its name, it is translated by changing
|
||||
;; all of the letters to upper case, making it an atomic symbol. Thus is
|
||||
;; translated to CAR.
|
||||
;; 2. If the function uses the lambda notation, then the expression
|
||||
;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation
|
||||
;; of ε.
|
||||
;; 3. If the function begins with label, then the translation of
|
||||
;; label[α;ε] is (LABEL α* ε*).
|
||||
|
||||
;; Forms are translated as follows:
|
||||
;; 1. A variable, like a function name, is translated by using uppercase letters.
|
||||
;; Thus the translation of varl is VAR1.
|
||||
;; 2. The obvious translation of letting a constant translate into itself will not
|
||||
;; work. Since the translation of x is X, the translation of X must be something
|
||||
;; else to avoid ambiguity. The solution is to quote it. Thus X is translated
|
||||
;; into (QUOTE X).
|
||||
;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*)
|
||||
;; 4. The conditional expression [pl-el;...;pn-en] is translated into
|
||||
;; (COND (p1* e1*)...(pn* en*))
|
||||
|
||||
;; ## Examples
|
||||
|
||||
;; M-expressions S-expressions
|
||||
;; x X
|
||||
;; car CAR
|
||||
;; car[x] (CAR X)
|
||||
;; T (QUOTE T)
|
||||
;; ff[car [x]] (FF (CAR X))
|
||||
;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
|
||||
;; ((QUOTE T)(FF (CAR X))))
|
||||
;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
|
||||
;; ((ATOM X) X)
|
||||
;; ((QUOTE T)(FF (CAR X))))))
|
||||
|
||||
;; ]] quote ends
|
||||
|
||||
(defn gen-cond-clause
|
||||
"Generate a cond clause from this simplified parse tree fragment `p`;
|
||||
|
@ -113,11 +186,45 @@
|
|||
implicitly quoted; this function does not (yet) do that."
|
||||
[p]
|
||||
(if
|
||||
(and (coll? p)(= :fncall (first p))(= :fn-name (first (second p))))
|
||||
(and (coll? p)(= :fncall (first p))(= :mvar (first (second p))))
|
||||
(make-cons-cell
|
||||
(second (second p))
|
||||
(generate (second p))
|
||||
(generate (nth p 2)))))
|
||||
|
||||
|
||||
(defn gen-dot-terminated-list
|
||||
"Generate a list, which may be dot-terminated, from this partial parse tree
|
||||
'p'. Note that the function acts recursively and progressively decapitates
|
||||
its argument, so that the argument will not always be a valid parse tree."
|
||||
[p]
|
||||
(cond
|
||||
(empty? p)
|
||||
NIL
|
||||
(and (coll? (first p)) (= :dot-terminal (first (first p))))
|
||||
(let [dt (first p)]
|
||||
(make-cons-cell
|
||||
(generate (nth dt 1))
|
||||
(generate (nth dt 2))))
|
||||
:else
|
||||
(make-cons-cell
|
||||
(generate (first p))
|
||||
(gen-dot-terminated-list (rest p)))))
|
||||
|
||||
|
||||
(defn strip-leading-zeros
|
||||
"`read-string` interprets strings with leading zeros as octal; strip
|
||||
any from this string `s`. If what's left is empty (i.e. there were
|
||||
only zeros, return `\"0\"`."
|
||||
([s]
|
||||
(strip-leading-zeros s ""))
|
||||
([s prefix]
|
||||
(if
|
||||
(empty? s) "0"
|
||||
(case (first s)
|
||||
(\+ \-)(strip-leading-zeros (subs s 1) (str (first s) prefix))
|
||||
"0" (strip-leading-zeros (subs s 1) prefix)
|
||||
(str prefix s)))))
|
||||
|
||||
(defn generate
|
||||
"Generate lisp structure from this parse tree `p`. It is assumed that
|
||||
`p` has been simplified."
|
||||
|
@ -125,14 +232,36 @@
|
|||
(if
|
||||
(coll? p)
|
||||
(case (first p)
|
||||
:λ "LAMBDA"
|
||||
:λexpr (make-cons-cell
|
||||
(generate (nth p 1))
|
||||
(make-cons-cell (generate (nth p 2))
|
||||
(generate (nth p 3))))
|
||||
(:args :list) (gen-dot-terminated-list (rest p))
|
||||
:atom (symbol (second p))
|
||||
:bindings (generate (second p))
|
||||
:body (make-beowulf-list (map generate (rest p)))
|
||||
:cond (gen-cond p)
|
||||
(:decimal :integer) (read-string (strip-leading-zeros (second p)))
|
||||
:dotted-pair (make-cons-cell
|
||||
(generate (nth p 1))
|
||||
(generate (nth p 2)))
|
||||
:exponent (generate (second p))
|
||||
:fncall (gen-fn-call p)
|
||||
(:args :list) (gen-dot-terminated-list (rest p))
|
||||
:number (clojure.core/read-string (second p))
|
||||
:mvar (upper-case (second p))
|
||||
:octal (let [n (read-string (strip-leading-zeros (second p) "0"))
|
||||
scale (generate (nth p 2))]
|
||||
(* n (expt 8 scale)))
|
||||
|
||||
;; the quote read macro (which probably didn't exist in Lisp 1.5, but...)
|
||||
:quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p))))
|
||||
:scale-factor (if
|
||||
(empty? (second p)) 0
|
||||
(read-string (strip-leading-zeros (second p))))
|
||||
:scientific (let [n (generate (second p))
|
||||
exponent (generate (nth p 2))]
|
||||
(* n (expt 10 exponent)))
|
||||
|
||||
;; default
|
||||
(throw (Exception. (str "Cannot yet generate " (first p)))))
|
||||
p))
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
(:require [clojure.test :refer :all]
|
||||
[beowulf.core :refer :all]))
|
||||
|
||||
(deftest a-test
|
||||
(testing "FIXME, I fail."
|
||||
(is (= 0 1))))
|
||||
;; (deftest a-test
|
||||
;; (testing "FIXME, I fail."
|
||||
;; (is (= 0 1))))
|
||||
|
|
67
test/beowulf/mexpr_test.clj
Normal file
67
test/beowulf/mexpr_test.clj
Normal file
|
@ -0,0 +1,67 @@
|
|||
(ns beowulf.mexpr-test
|
||||
(:require [clojure.test :refer :all]
|
||||
[beowulf.read :refer [parse simplify generate]]
|
||||
[beowulf.print :refer :all]))
|
||||
|
||||
;; These tests are taken generally from the examples on page 10 of
|
||||
;; Lisp 1.5 Programmers Manual:
|
||||
|
||||
;; ## Examples
|
||||
|
||||
;; M-expressions S-expressions
|
||||
;; x X
|
||||
;; car CAR
|
||||
;; car[x] (CAR X)
|
||||
;; T (QUOTE T)
|
||||
;; ff[car [x]] (FF (CAR X))
|
||||
;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X)
|
||||
;; ((QUOTE T)(FF (CAR X))))
|
||||
;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND
|
||||
;; ((ATOM X) X)
|
||||
;; ((QUOTE T)(FF (CAR X))))))
|
||||
|
||||
(deftest variable-tests
|
||||
(testing "Variable translation"
|
||||
(let [expected "X"
|
||||
actual (prin (generate (simplify (parse "x"))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected "CAR"
|
||||
actual (prin (generate (simplify (parse "car"))))]
|
||||
(is (= actual expected)))
|
||||
))
|
||||
|
||||
(deftest literal-tests
|
||||
(testing "Literal translation"
|
||||
;; in the context of an M-expression, an upper case letter
|
||||
;; or string represents a Lisp literal, and should be quoted.
|
||||
;; Wrapping in a function call puts us into mexpr contest;
|
||||
;; "T" would be interpreted as a sexpr, which would not be
|
||||
;; quoted.
|
||||
(let [expected "(ATOM (QUOTE T))"
|
||||
actual (prin (generate (simplify (parse "atom[T]"))))]
|
||||
(is (= actual expected)))
|
||||
;; I'm not clear how `car[(A B C)]` should be translated, but
|
||||
;; I suspect as (CAR (LIST 'A 'B 'C)).
|
||||
))
|
||||
|
||||
(deftest fncall-tests
|
||||
(testing "Function calls"
|
||||
(let [expected "(CAR X)"
|
||||
actual (prin (generate (simplify (parse "car[x]"))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected "(FF (CAR X))"
|
||||
actual (prin (generate (simplify (parse "ff[car[x]]"))))]
|
||||
(is (= actual expected)))))
|
||||
|
||||
(deftest conditional-tests
|
||||
(testing "Conditional expressions"
|
||||
(let [expected "(COND ((ATOM X) X) ((QUOTE T) (FF (CAR X))))"
|
||||
actual (prin (generate (simplify (parse "[atom[x]->x; T->ff[car[x]]]"))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected "(LABEL FF (LAMBDA (X) (COND ((ATOM X) X) ((QUOTE T) (FF (CAR X))))))"
|
||||
actual (prin
|
||||
(generate
|
||||
(simplify
|
||||
(parse "label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]]"))))]
|
||||
(is (= actual expected)))))
|
||||
|
|
@ -2,9 +2,7 @@
|
|||
(:require [clojure.test :refer :all]
|
||||
[beowulf.core :refer :all]))
|
||||
|
||||
(deftest a-test
|
||||
(testing "FIXME, I fail."
|
||||
(is (= 0 1))))
|
||||
;; (deftest a-test
|
||||
;; (testing "FIXME, I fail."
|
||||
;; (is (= 0 1))))
|
||||
|
||||
(deftest parser-tests
|
||||
(testing "symbols"
|
||||
|
|
76
test/beowulf/sexpr_test.clj
Normal file
76
test/beowulf/sexpr_test.clj
Normal file
|
@ -0,0 +1,76 @@
|
|||
(ns beowulf.sexpr-test
|
||||
(:require [clojure.math.numeric-tower :refer [abs]]
|
||||
[clojure.test :refer :all]
|
||||
[beowulf.read :refer [parse simplify generate]]
|
||||
[beowulf.print :refer :all]))
|
||||
|
||||
;; broadly, sexprs should be homoiconic
|
||||
|
||||
(deftest atom-tests
|
||||
(testing "Reading atoms"
|
||||
(let [expected 'A
|
||||
actual (generate (simplify (parse (str expected))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 'APPLE
|
||||
actual (generate (simplify (parse (str expected))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 'PART2
|
||||
actual (generate (simplify (parse (str expected))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 'EXTRALONGSTRINGOFLETTERS
|
||||
actual (generate (simplify (parse (str expected))))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 'A4B66XYZ2
|
||||
actual (generate (simplify (parse (str expected))))]
|
||||
(is (= actual expected)))))
|
||||
|
||||
(deftest number-tests
|
||||
(testing "Reading octal numbers"
|
||||
(let [expected 1
|
||||
actual (generate (simplify (parse "1Q")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected -1
|
||||
actual (generate (simplify (parse "-1Q")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 8
|
||||
actual (generate (simplify (parse "1Q1")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected -8
|
||||
actual (generate (simplify (parse "-1Q1")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 128
|
||||
actual (generate (simplify (parse "2Q2")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 2093056
|
||||
actual (generate (simplify (parse "777Q4")))]
|
||||
(is (= actual expected))))
|
||||
(testing "Reading decimal numbers - broadly should be homiconic"
|
||||
(let [expected 7
|
||||
actual (generate (simplify (parse "7")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected -7
|
||||
actual (generate (simplify (parse "-7")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 3.141592
|
||||
actual (generate (simplify (parse "3.141592")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected 1234567890
|
||||
actual (generate (simplify (parse "1234567890")))]
|
||||
(is (= actual expected)))
|
||||
(let [expected -45.23
|
||||
actual (generate (simplify (parse "-45.23")))]
|
||||
(is (= actual expected))))
|
||||
(testing "Reading scientific notation")
|
||||
(let [expected 2/5
|
||||
actual (generate (simplify (parse "4E-1")))]
|
||||
(is (< (abs (- actual expected)) 0.0001)))
|
||||
(let [expected 60
|
||||
actual (generate (simplify (parse "6E1")))]
|
||||
(is (< (abs (- actual expected)) 0.0001)))
|
||||
(let [expected 60
|
||||
actual (generate (simplify (parse "600.00E-1")))]
|
||||
(is (< (abs (- actual expected)) 0.0001)))
|
||||
(let [expected 60
|
||||
actual (generate (simplify (parse "0.6E2")))]
|
||||
(is (< (abs (- actual expected)) 0.0001))))
|
||||
|
Loading…
Reference in a new issue