diff --git a/project.clj b/project.clj index 6c9f3d6..661f0ce 100644 --- a/project.clj +++ b/project.clj @@ -4,6 +4,7 @@ :license {:name "GPL-2.0-or-later" :url "https://www.eclipse.org/legal/epl-2.0/"} :dependencies [[org.clojure/clojure "1.10.0"] + [org.clojure/math.numeric-tower "0.0.4"] [instaparse "1.4.10"]] :main ^:skip-aot beowulf.core :target-path "target/%s" diff --git a/src/beowulf/print.clj b/src/beowulf/print.clj index b9563bd..5c8c196 100644 --- a/src/beowulf/print.clj +++ b/src/beowulf/print.clj @@ -1,5 +1,4 @@ (ns beowulf.print -;; (:require [beowulf.cons-cell]) ) (defprotocol Printable diff --git a/src/beowulf/read.clj b/src/beowulf/read.clj index 216b18f..f0cee09 100644 --- a/src/beowulf/read.clj +++ b/src/beowulf/read.clj @@ -1,5 +1,7 @@ (ns beowulf.read - (:require [instaparse.core :as i] + (:require [clojure.math.numeric-tower :refer [expt]] + [clojure.string :refer [starts-with? upper-case]] + [instaparse.core :as i] [beowulf.cons-cell :refer [make-beowulf-list make-cons-cell NIL]]) ;; (:import [beowulf.cons-cell ConsCell]) ) @@ -16,7 +18,11 @@ ;; mexprs. I'm pretty clear that Lisp 1.5 could never read these, ;; but it's a convenience. - "mexpr := fncall | defn | cond; + "mexpr := λexpr | fncall | defn | cond | mvar; + λexpr := λ lsqb bindings semi-colon body rsqb; + λ := 'λ'; + bindings := lsqb args rsqb; + body := (expr semi-colon opt-space)* expr; fncall := fn-name lsqb args rsqb; lsqb := '['; rsqb := ']'; @@ -24,31 +30,44 @@ cond := lsqb (cond-clause semi-colon opt-space)* cond-clause rsqb; cond-clause := expr opt-space arrow opt-space expr; arrow := '->'; - args := (arg semi-colon opt-space)* arg; - arg := mexpr | sexpr; - fn-name := #'[a-z]*'; + args := (expr semi-colon opt-space)* expr; + fn-name := mvar; + mvar := #'[a-z]+'; semi-colon := ';';" ;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro, ;; but I've included it on the basis that it can do little harm. - "list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal; + "sexpr := quoted-expr | atom | number | dotted-pair | list; + list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal; dotted-pair := lpar dot-terminal ; dot := '.'; lpar := '('; rpar := ')'; - sexpr := atom | number | dotted-pair | list | quoted-expr; quoted-expr := quote sexpr; quote := '\\''; dot-terminal := sexpr space dot space sexpr rpar; space := #'\\p{javaWhitespace}+'; opt-space := #'\\p{javaWhitespace}*'; sep := ',' | opt-space; - atom := #'[A-Z][A-Z0-9]*'; - number := #'-?[0-9][0-9.]*';"))) + atom := #'[A-Z][A-Z0-9]*';" + + ;; Lisp 1.5 supported octal as well as decimal and scientific notation + "number := integer | decimal | scientific | octal; + integer := #'-?[1-9][0-9]*'; + decimal := #'-?[1-9][0-9]*\\.?[0-9]*' | #'0\\.[0-9]*'; + scientific := coefficient e exponent; + coefficient := decimal; + exponent := integer; + e := 'E'; + octal := #'[+-]?[0-7]+{1,12}' q scale-factor; + q := 'Q'; + scale-factor := #'[0-9]*'"))) (defn simplify "Simplify this parse tree `p`." - [p] + ([p] + (simplify p :sexpr)) + ([p context] (if (coll? p) (apply @@ -56,32 +75,86 @@ (remove #(if (coll? %) (empty? %)) (case (first p) - (:arrow :dot :lpar :lsqb :opt-space :quote :rpar :rsqb :semi-colon :sep :space) nil - (:arg :expr :mexpr :sexpr) (simplify (second p)) - (:args :cond :cond-clause :dot-terminal :dotted-pair :fncall :list) (map simplify p) - ;; the quote read macro (which probably didn't exist in Lisp 1.5, but...) - :quoted-expr [:fncall [:fn-name "quote"] [:args (simplify (nth p 2))]] + (:arg :expr :coefficient :fn-name :number :sexpr) (simplify (second p) context) + (:λexpr + :args :bindings :body :cond :cond-clause :dot-terminal + :fncall :octal :quoted-expr :scientific) (map #(simplify % context) p) + (:arrow :dot :e :lpar :lsqb :opt-space :q :quote :rpar :rsqb + :semi-colon :sep :space) nil + :atom (if + (= context :mexpr) + [:quoted-expr p] + p) + :dotted-pair (if + (= context :mexpr) + [:fncall + [:mvar "cons"] + [:args + (simplify (nth p 1) context) + (simplify (nth p 2) context)]] + (map simplify p)) + :mexpr (simplify (second p) :mexpr) + :list (if + (= context :mexpr) + [:fncall + [:mvar "list"] + [:args (apply vector (map simplify (rest p)))]] + (map #(simplify % context) p)) ;;default p))) - p)) + p))) -(defn gen-dot-terminated-list - "Generate a list, which may be dot-terminated, from this partial parse tree - 'p'. Note that the function acts recursively and progressively decapitates - its argument, so that the argument will not always be a valid parse tree." - [p] - (cond - (empty? p) - NIL - (and (coll? (first p)) (= :dot-terminal (first (first p)))) - (let [dt (first p)] - (make-cons-cell - (generate (nth dt 1)) - (generate (nth dt 2)))) - :else - (make-cons-cell - (generate (first p)) - (gen-dot-terminated-list (rest p))))) + + +;; # From Lisp 1.5 Programmers Manual, page 10 +;; Note that I've retyped much of this, since copy/pasting out of PDF is less +;; than reliable. Any typos are mine. Quote starts [[ + +;; We are now in a position to define the universal LISP function +;; evalquote[fn;args], When evalquote is given a function and a list of arguments +;; for that function, it computes the value of the function applied to the arguments. +;; LISP functions have S-expressions as arguments. In particular, the argument "fn" +;; of the function evalquote must be an S-expression. Since we have been +;; writing functions as M-expressions, it is necessary to translate them into +;; S-expressions. + +;; The following rules define a method of translating functions written in the +;; meta-language into S-expressions. +;; 1. If the function is represented by its name, it is translated by changing +;; all of the letters to upper case, making it an atomic symbol. Thus is +;; translated to CAR. +;; 2. If the function uses the lambda notation, then the expression +;; λ[[x ..;xn]; ε] is translated into (LAMBDA (X1 ...XN) ε*), where ε* is the translation +;; of ε. +;; 3. If the function begins with label, then the translation of +;; label[α;ε] is (LABEL α* ε*). + +;; Forms are translated as follows: +;; 1. A variable, like a function name, is translated by using uppercase letters. +;; Thus the translation of varl is VAR1. +;; 2. The obvious translation of letting a constant translate into itself will not +;; work. Since the translation of x is X, the translation of X must be something +;; else to avoid ambiguity. The solution is to quote it. Thus X is translated +;; into (QUOTE X). +;; 3. The form fn[argl;. ..;argn] is translated into (fn* argl* ...argn*) +;; 4. The conditional expression [pl-el;...;pn-en] is translated into +;; (COND (p1* e1*)...(pn* en*)) + +;; ## Examples + +;; M-expressions S-expressions +;; x X +;; car CAR +;; car[x] (CAR X) +;; T (QUOTE T) +;; ff[car [x]] (FF (CAR X)) +;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X) +;; ((QUOTE T)(FF (CAR X)))) +;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND +;; ((ATOM X) X) +;; ((QUOTE T)(FF (CAR X)))))) + +;; ]] quote ends (defn gen-cond-clause "Generate a cond clause from this simplified parse tree fragment `p`; @@ -113,11 +186,45 @@ implicitly quoted; this function does not (yet) do that." [p] (if - (and (coll? p)(= :fncall (first p))(= :fn-name (first (second p)))) + (and (coll? p)(= :fncall (first p))(= :mvar (first (second p)))) (make-cons-cell - (second (second p)) + (generate (second p)) (generate (nth p 2))))) + +(defn gen-dot-terminated-list + "Generate a list, which may be dot-terminated, from this partial parse tree + 'p'. Note that the function acts recursively and progressively decapitates + its argument, so that the argument will not always be a valid parse tree." + [p] + (cond + (empty? p) + NIL + (and (coll? (first p)) (= :dot-terminal (first (first p)))) + (let [dt (first p)] + (make-cons-cell + (generate (nth dt 1)) + (generate (nth dt 2)))) + :else + (make-cons-cell + (generate (first p)) + (gen-dot-terminated-list (rest p))))) + + +(defn strip-leading-zeros + "`read-string` interprets strings with leading zeros as octal; strip + any from this string `s`. If what's left is empty (i.e. there were + only zeros, return `\"0\"`." + ([s] + (strip-leading-zeros s "")) + ([s prefix] + (if + (empty? s) "0" + (case (first s) + (\+ \-)(strip-leading-zeros (subs s 1) (str (first s) prefix)) + "0" (strip-leading-zeros (subs s 1) prefix) + (str prefix s))))) + (defn generate "Generate lisp structure from this parse tree `p`. It is assumed that `p` has been simplified." @@ -125,14 +232,36 @@ (if (coll? p) (case (first p) + :λ "LAMBDA" + :λexpr (make-cons-cell + (generate (nth p 1)) + (make-cons-cell (generate (nth p 2)) + (generate (nth p 3)))) + (:args :list) (gen-dot-terminated-list (rest p)) :atom (symbol (second p)) + :bindings (generate (second p)) + :body (make-beowulf-list (map generate (rest p))) :cond (gen-cond p) + (:decimal :integer) (read-string (strip-leading-zeros (second p))) :dotted-pair (make-cons-cell (generate (nth p 1)) (generate (nth p 2))) + :exponent (generate (second p)) :fncall (gen-fn-call p) - (:args :list) (gen-dot-terminated-list (rest p)) - :number (clojure.core/read-string (second p)) + :mvar (upper-case (second p)) + :octal (let [n (read-string (strip-leading-zeros (second p) "0")) + scale (generate (nth p 2))] + (* n (expt 8 scale))) + + ;; the quote read macro (which probably didn't exist in Lisp 1.5, but...) + :quoted-expr (make-beowulf-list (list 'QUOTE (generate (second p)))) + :scale-factor (if + (empty? (second p)) 0 + (read-string (strip-leading-zeros (second p)))) + :scientific (let [n (generate (second p)) + exponent (generate (nth p 2))] + (* n (expt 10 exponent))) + ;; default (throw (Exception. (str "Cannot yet generate " (first p))))) p)) diff --git a/test/beowulf/core_test.clj b/test/beowulf/core_test.clj index bd733f4..2b10204 100644 --- a/test/beowulf/core_test.clj +++ b/test/beowulf/core_test.clj @@ -2,6 +2,6 @@ (:require [clojure.test :refer :all] [beowulf.core :refer :all])) -(deftest a-test - (testing "FIXME, I fail." - (is (= 0 1)))) +;; (deftest a-test +;; (testing "FIXME, I fail." +;; (is (= 0 1)))) diff --git a/test/beowulf/mexpr_test.clj b/test/beowulf/mexpr_test.clj new file mode 100644 index 0000000..f8022e5 --- /dev/null +++ b/test/beowulf/mexpr_test.clj @@ -0,0 +1,67 @@ +(ns beowulf.mexpr-test + (:require [clojure.test :refer :all] + [beowulf.read :refer [parse simplify generate]] + [beowulf.print :refer :all])) + +;; These tests are taken generally from the examples on page 10 of +;; Lisp 1.5 Programmers Manual: + +;; ## Examples + +;; M-expressions S-expressions +;; x X +;; car CAR +;; car[x] (CAR X) +;; T (QUOTE T) +;; ff[car [x]] (FF (CAR X)) +;; [atom[x]->x; T->ff[car[x]]] (COND ((ATOM X) X) +;; ((QUOTE T)(FF (CAR X)))) +;; label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]] (LABEL FF (LAMBDA (X) (COND +;; ((ATOM X) X) +;; ((QUOTE T)(FF (CAR X)))))) + +(deftest variable-tests + (testing "Variable translation" + (let [expected "X" + actual (prin (generate (simplify (parse "x"))))] + (is (= actual expected))) + (let [expected "CAR" + actual (prin (generate (simplify (parse "car"))))] + (is (= actual expected))) + )) + +(deftest literal-tests + (testing "Literal translation" + ;; in the context of an M-expression, an upper case letter + ;; or string represents a Lisp literal, and should be quoted. + ;; Wrapping in a function call puts us into mexpr contest; + ;; "T" would be interpreted as a sexpr, which would not be + ;; quoted. + (let [expected "(ATOM (QUOTE T))" + actual (prin (generate (simplify (parse "atom[T]"))))] + (is (= actual expected))) + ;; I'm not clear how `car[(A B C)]` should be translated, but + ;; I suspect as (CAR (LIST 'A 'B 'C)). + )) + +(deftest fncall-tests + (testing "Function calls" + (let [expected "(CAR X)" + actual (prin (generate (simplify (parse "car[x]"))))] + (is (= actual expected))) + (let [expected "(FF (CAR X))" + actual (prin (generate (simplify (parse "ff[car[x]]"))))] + (is (= actual expected))))) + +(deftest conditional-tests + (testing "Conditional expressions" + (let [expected "(COND ((ATOM X) X) ((QUOTE T) (FF (CAR X))))" + actual (prin (generate (simplify (parse "[atom[x]->x; T->ff[car[x]]]"))))] + (is (= actual expected))) + (let [expected "(LABEL FF (LAMBDA (X) (COND ((ATOM X) X) ((QUOTE T) (FF (CAR X))))))" + actual (prin + (generate + (simplify + (parse "label[ff;λ[[x];[atom[x]->x; T->ff[car[x]]]]]"))))] + (is (= actual expected))))) + diff --git a/test/beowulf/read_test.clj b/test/beowulf/read_test.clj index 73ca194..1fe206a 100644 --- a/test/beowulf/read_test.clj +++ b/test/beowulf/read_test.clj @@ -2,9 +2,7 @@ (:require [clojure.test :refer :all] [beowulf.core :refer :all])) -(deftest a-test - (testing "FIXME, I fail." - (is (= 0 1)))) +;; (deftest a-test +;; (testing "FIXME, I fail." +;; (is (= 0 1)))) -(deftest parser-tests - (testing "symbols" diff --git a/test/beowulf/sexpr_test.clj b/test/beowulf/sexpr_test.clj new file mode 100644 index 0000000..b0f48c1 --- /dev/null +++ b/test/beowulf/sexpr_test.clj @@ -0,0 +1,76 @@ +(ns beowulf.sexpr-test + (:require [clojure.math.numeric-tower :refer [abs]] + [clojure.test :refer :all] + [beowulf.read :refer [parse simplify generate]] + [beowulf.print :refer :all])) + +;; broadly, sexprs should be homoiconic + +(deftest atom-tests + (testing "Reading atoms" + (let [expected 'A + actual (generate (simplify (parse (str expected))))] + (is (= actual expected))) + (let [expected 'APPLE + actual (generate (simplify (parse (str expected))))] + (is (= actual expected))) + (let [expected 'PART2 + actual (generate (simplify (parse (str expected))))] + (is (= actual expected))) + (let [expected 'EXTRALONGSTRINGOFLETTERS + actual (generate (simplify (parse (str expected))))] + (is (= actual expected))) + (let [expected 'A4B66XYZ2 + actual (generate (simplify (parse (str expected))))] + (is (= actual expected))))) + +(deftest number-tests + (testing "Reading octal numbers" + (let [expected 1 + actual (generate (simplify (parse "1Q")))] + (is (= actual expected))) + (let [expected -1 + actual (generate (simplify (parse "-1Q")))] + (is (= actual expected))) + (let [expected 8 + actual (generate (simplify (parse "1Q1")))] + (is (= actual expected))) + (let [expected -8 + actual (generate (simplify (parse "-1Q1")))] + (is (= actual expected))) + (let [expected 128 + actual (generate (simplify (parse "2Q2")))] + (is (= actual expected))) + (let [expected 2093056 + actual (generate (simplify (parse "777Q4")))] + (is (= actual expected)))) + (testing "Reading decimal numbers - broadly should be homiconic" + (let [expected 7 + actual (generate (simplify (parse "7")))] + (is (= actual expected))) + (let [expected -7 + actual (generate (simplify (parse "-7")))] + (is (= actual expected))) + (let [expected 3.141592 + actual (generate (simplify (parse "3.141592")))] + (is (= actual expected))) + (let [expected 1234567890 + actual (generate (simplify (parse "1234567890")))] + (is (= actual expected))) + (let [expected -45.23 + actual (generate (simplify (parse "-45.23")))] + (is (= actual expected)))) + (testing "Reading scientific notation") + (let [expected 2/5 + actual (generate (simplify (parse "4E-1")))] + (is (< (abs (- actual expected)) 0.0001))) + (let [expected 60 + actual (generate (simplify (parse "6E1")))] + (is (< (abs (- actual expected)) 0.0001))) + (let [expected 60 + actual (generate (simplify (parse "600.00E-1")))] + (is (< (abs (- actual expected)) 0.0001))) + (let [expected 60 + actual (generate (simplify (parse "0.6E2")))] + (is (< (abs (- actual expected)) 0.0001)))) +