001 (ns beowulf.reader.parser
002 "The actual parser, supporting both S-expression and M-expression syntax."
003 (:require [instaparse.core :as i]))
004
005 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
006 ;;;
007 ;;; Copyright (C) 2022-2023 Simon Brooke
008 ;;;
009 ;;; This program is free software; you can redistribute it and/or
010 ;;; modify it under the terms of the GNU General Public License
011 ;;; as published by the Free Software Foundation; either version 2
012 ;;; of the License, or (at your option) any later version.
013 ;;;
014 ;;; This program is distributed in the hope that it will be useful,
015 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
016 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
017 ;;; GNU General Public License for more details.
018 ;;;
019 ;;; You should have received a copy of the GNU General Public License
020 ;;; along with this program; if not, write to the Free Software
021 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
022 ;;;
023 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
024
025 (def parse
026 "Parse a string presented as argument into a parse tree which can then
027 be operated upon further."
028 (i/parser
029 (str
030 ;; we tolerate whitespace and comments around legitimate input
031 "raw := expr | opt-comment expr opt-comment;"
032 ;; top level: we accept mexprs as well as sexprs.
033 "expr := mexpr | sexpr ;"
034
035 ;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
036 "comment := opt-space <';;'> opt-space #'[^\\n\\r]*';"
037
038 ;; there's a notation comprising a left brace followed by mexprs
039 ;; followed by a right brace which doesn't seem to be documented
040 ;; but I think must represent assembly code(?)
041
042 ;; "assembly := lbrace exprs rbrace;"
043
044 ;; mexprs. I'm pretty clear that Lisp 1.5 could never read these,
045 ;; but it's a convenience.
046
047 ;; TODO: this works for now but in fact the Programmer's Manual
048 ;; gives a much simpler formulation of M-expression grammar on
049 ;; page 9, and of the S-expression grammar on page 8. It would
050 ;; be worth going back and redoing this from the book.
051
052 "exprs := expr | exprs;"
053 "mexpr := λexpr | fncall | defn | cond | mvar | mconst | iexpr | number | mexpr comment;
054 λexpr := λ lsqb bindings semi-colon opt-space body opt-space rsqb;
055 λ := 'λ' | 'lambda';
056 bindings := lsqb args rsqb | lsqb rsqb;
057 body := (opt-space mexpr semi-colon)* opt-space mexpr;
058 fncall := fn-name bindings;
059 lsqb := '[';
060 rsqb := ']';
061 lbrace := '{';
062 rbrace := '}';
063 defn := mexpr opt-space '=' opt-space mexpr;
064 cond := lsqb (opt-space cond-clause semi-colon opt-space)* cond-clause rsqb;
065 cond-clause := mexpr opt-space arrow opt-space mexpr opt-space;
066 arrow := '->';
067 args := arg | (opt-space arg semi-colon opt-space)* opt-space arg opt-space;
068 arg := mexpr;
069 fn-name := mvar;
070 mvar := #'[a-z][a-z0-9]*';
071 mconst := #'[A-Z][A-Z0-9]*';
072 semi-colon := ';';"
073
074 ;; Infix operators appear in mexprs, e.g. on page 7. Ooops!
075 ;; I do not know what infix operators are considered legal.
076 ;; In particular I do not know what symbol was used for
077 ;; multiply
078 "iexpr := iexp iop iexp;
079 iexp := mexpr | number | opt-space iexp opt-space;
080 iop := '>' | '<' | '+' | '-' | '*' '/' | '=' ;"
081
082 ;; comments. I'm pretty confident Lisp 1.5 did NOT have these.
083 "opt-comment := opt-space | comment;"
084 "comment := opt-space <';;'> #'[^\\n\\r]*' opt-space;"
085
086 ;; sexprs. Note it's not clear to me whether Lisp 1.5 had the quote macro,
087 ;; but I've included it on the basis that it can do little harm.
088 "sexpr := quoted-expr | atom | number | subr | dotted-pair | list | sexpr comment;
089 list := lpar sexpr rpar | lpar (sexpr sep)* rpar | lpar (sexpr sep)* dot-terminal | lbrace exprs rbrace;
090 list := lpar opt-space sexpr rpar | lpar opt-space (sexpr sep)* rpar | lpar opt-space (sexpr sep)* dot-terminal;
091 dotted-pair := lpar dot-terminal ;
092 dot := '.';
093 lpar := '(';
094 rpar := ')';
095 quoted-expr := quote sexpr;
096 quote := '\\'';
097 dot-terminal := sexpr space dot space sexpr rpar;
098 space := #'\\p{javaWhitespace}+';
099 opt-space := #'\\p{javaWhitespace}*';
100 sep := ',' | opt-space;
101 atom := #'[A-Z][A-Z0-9]*';"
102
103 ;; we need a way of representing Clojure functions on the object list;
104 ;; subr objects aren't expected to be normally entered on the REPL, but
105 ;; must be on the object list or functions to which functions are passed
106 ;; won't be able to access them.
107 "subr := #'[a-z][a-z.]*/[A-Za-z][A-Za-z0-9]*';"
108
109 ;; Lisp 1.5 supported octal as well as decimal and scientific notation
110 "number := integer | decimal | scientific | octal;
111 integer := #'-?[0-9]+';
112 decimal := integer dot integer;
113 scientific := coefficient e exponent;
114 coefficient := decimal | integer;
115 exponent := integer;
116 e := 'E';
117 octal := #'[+-]?[0-7]+{1,12}' q scale-factor;
118 q := 'Q';
119 scale-factor := #'[0-9]*'")))
120