From 3829bd97a92cae2715a1471bd0da013b67b7d387 Mon Sep 17 00:00:00 2001
From: Simon Brooke <simon@journeyman.cc>
Date: Sat, 22 Jul 2023 21:11:06 +0100
Subject: [PATCH] Well, I didn't get rid of simplify altogether...

But it and the rest of the code are greatly simplified. All correctness tests pass, many others don't.
---
 src/mw_parser/declarative.clj       | 75 ++++++++++++++++++-----------
 src/mw_parser/generate.clj          | 25 +---------
 src/mw_parser/simplify.clj          | 44 ++---------------
 test/mw_parser/declarative_test.clj | 16 +++---
 4 files changed, 62 insertions(+), 98 deletions(-)

diff --git a/src/mw_parser/declarative.clj b/src/mw_parser/declarative.clj
index 69bda0d..fada891 100644
--- a/src/mw_parser/declarative.clj
+++ b/src/mw_parser/declarative.clj
@@ -2,10 +2,12 @@
       :author "Simon Brooke"}
  mw-parser.declarative
   (:require [clojure.string :refer [join split-lines]]
-            [instaparse.core :refer [parser]]
+            [instaparse.core :refer [failure? get-failure parser]]
+            [instaparse.failure :refer [pprint-failure]]
             [mw-parser.flow :refer [flow-grammar]]
             [mw-parser.generate :refer [generate]]
             [mw-parser.simplify :refer [simplify]]
+            [taoensso.timbre :as l]
             [trptr.java-wrapper.locale :refer [get-default]])
   (:import [java.util Locale]))
 
@@ -39,50 +41,51 @@
 
 (def ruleset-grammar
   "Experimental: parse a whole file in one go."
-  (join "\n" ["LINES := LINE | LINE CR LINES;"
-              "LINE := RULE | FLOW-RULE | CR | COMMENT | '' ;"
-              "CR := #'[\\r\\n]';"
-              "COMMENT := #'[;#]+[^\\r\\n]*' | #'/\\*.*\\*/'"]))
+  ;; TODO: bug here. We're double-counting (some) blank lines
+  (join "\n" ["LINES := (LINE)+;"
+              "LINE := RULE <CR> | FLOW-RULE <CR> | COMMENT <CR> | <CR> ;"
+              "CR := #'[ \\t]*[\\r\\n][- \\t]*';"
+              "COMMENT := #'[;\\#]+[^\\r\\n]*' | #'/\\*.*\\*/'"]))
 
 (def rule-grammar
   "Basic rule language grammar.
    
   in order to simplify translation into other natural languages, all
   TOKENS within the parser should be unambiguous."
-  (join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;"
-              "ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS"
+  (join "\n" ["RULE := IF <SPACE> CONDITIONS <SPACE> <THEN> <SPACE> ACTIONS;"
+              "ACTIONS := ACTION | (ACTION <SPACE> <AND> <SPACE> ACTION)+"
               "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;"
-              "PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;"
-              "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"]))
+              "PROBABLE-ACTION := VALUE <SPACE> <CHANCE-IN> <SPACE> VALUE <SPACE> SIMPLE-ACTION;"
+              "SIMPLE-ACTION := SYMBOL <SPACE> BECOMES <SPACE> EXPRESSION;"]))
 
 (def common-grammar
   "Grammar rules used both in the rule grammar and in the flow grammar"
   (join "\n" ["COMPARATIVE := MORE | LESS;"
-              "COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;"
+              "COMPARATIVE-QUALIFIER := IS <SPACE> COMPARATIVE <SPACE> THAN | COMPARATIVE <SPACE> THAN;"
               "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
               "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
-              "CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;"
-              "DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;"
-              "DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;"
-              "DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;"
-              "EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;"
+              "CONJUNCT-CONDITION := CONDITION <SPACE> <AND> <SPACE> CONDITIONS;"
+              "DISJUNCT-CONDITION := CONDITION <SPACE> <OR> <SPACE> CONDITIONS;"
+              "DISJUNCT-EXPRESSION := <IN> <SPACE> DISJUNCT-VALUE;"
+              "DISJUNCT-VALUE := (VALUE <SPACE> <OR> <SPACE>)* VALUE;"
+              "EQUIVALENCE := IS <SPACE> EQUAL | EQUAL | IS ;"
               "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
-              "NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;"
-              "NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;"
+              "NEGATED-QUALIFIER := QUALIFIER <SPACE> NOT | NOT <SPACE> QUALIFIER;"
+              "NEIGHBOURS-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> IS <SPACE> PROPERTY-CONDITION | QUALIFIER <SPACE> NEIGHBOURS-CONDITION;"
               "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
-              "NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;"
+              "NUMERIC-EXPRESSION := VALUE | VALUE <SPACE> OPERATOR <SPACE> NUMERIC-EXPRESSION;"
               "OPERATOR := '+' | '-' | '*' | '/';"
               "PROPERTY := SYMBOL;"
-              "PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;"
+              "PROPERTY-CONDITION := PROPERTY <SPACE> QUALIFIER <SPACE> EXPRESSION | VALUE;"
               "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
-              "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;"
-              "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;"
-              "RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;"
-              "SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;"
+              "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS <SPACE> QUALIFIER;"
+              "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE <SPACE> THAN <SPACE> NUMBER;"
+              "RANGE-EXPRESSION := BETWEEN <SPACE> NUMERIC-EXPRESSION <SPACE> AND <SPACE> NUMERIC-EXPRESSION;"
+              "SIMPLE-EXPRESSION := QUALIFIER <SPACE> EXPRESSION | VALUE;"
               "SPACE := #'[ \\t]+';"
               "VALUE := SYMBOL | NUMBER;"
               "VALUE := SYMBOL | NUMBER;"
-              "WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"]))
+              "WITHIN-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> WITHIN <SPACE> NUMBER <SPACE> IS <SPACE> PROPERTY-CONDITION-OR-EXPRESSION;"]))
 
 (def keywords-en
   "English language keyword literals used in rules - both in production
@@ -132,10 +135,20 @@
   ([^Locale _locale]
    keywords-en))
 
-(def parse
-  "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
+(def ^:private raw-parser
   (parser (join "\n" [ruleset-grammar rule-grammar flow-grammar common-grammar (keywords-for-locale)])))
 
+(defn parse
+  "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
+  [arg]
+  (let [parse-tree-or-error (raw-parser arg :total true)]
+    (if (failure? parse-tree-or-error)
+      (throw (ex-info (format "Some rules were not understood:\n%s" 
+                              (pprint-failure (get-failure parse-tree-or-error)))
+                      {:source arg
+                       :failure (get-failure parse-tree-or-error)}))
+      parse-tree-or-error)))
+
 (defn- compile-rule
   "Compile a rule function from this `parse-tree` derived from this `source`
    at the zero-based line number `n` in the source file; return a compiled
@@ -143,18 +156,24 @@
    
    * `:rule-type` : the type of rule the function represents;
    * `:parse` : this `parse-tree`;
+   * `:source` : the rule source from which the parse tree was derived;
    * `:lisp` : the lisp source generated from this `parse-tree`;
    * `:line : the one-based line number of the definition in the source file,
      i.e. `(inc n)`."
   [parse-tree source n]
-  (when-not (keyword? parse-tree)
+  (if (#{:COMMENT :LINE} (first parse-tree))
+    (do 
+      (l/info (format "Skipping line %d, `%s`, parse-tree %s." 
+                      (inc n) source parse-tree))
+      nil)
     (let [lisp (generate parse-tree)
           line-no (inc n)]
+      (l/info (format "Compiling rule at line %d, `%s`." line-no source))
       (try
         (if (#{'fn 'fn*} (first lisp))
           (vary-meta
            (eval lisp)
-           merge (meta lisp) {:src source :lisp lisp :line line-no})
+           merge (meta lisp) {:source source :lisp lisp :line line-no})
           (throw
            (Exception.
             (format "Parse of `%s` did not return a function: %s" source lisp))))
diff --git a/src/mw_parser/generate.clj b/src/mw_parser/generate.clj
index 700053c..2e2cb8d 100644
--- a/src/mw_parser/generate.clj
+++ b/src/mw_parser/generate.clj
@@ -212,15 +212,6 @@
   (assert-type tree :ACTIONS)
   (generate-action (first (rest tree)) (second (rest tree))))
 
-(defn generate-disjunct-value
-  "Generate a disjunct value. Essentially what we need here is to generate a
-  flat list of values, since the `member` has already been taken care of."
-  [tree]
-  (assert-type tree :DISJUNCT-VALUE)
-  (if (= (count tree) 4)
-    (cons (generate (second tree)) (generate (nth tree 3)))
-    (list (generate (second tree)))))
-
 (defn generate-numeric-expression
   "From this `tree`, assumed to be a syntactically correct numeric expression,
   generate and return the appropriate clojure fragment."
@@ -289,18 +280,6 @@
        :LESS (let [value (generate (nth quantifier 3))]
                (generate-neighbours-condition '< value pc distance))))))
 
-(defn- generate-disjunct-expression
-  [tree]
-  (assert-type tree :DISJUNCT-EXPRESSION)
-  (try
-    (set (map generate (rest tree)))
-    (catch Exception x
-      (throw
-       (ex-info
-        "Failed to compile :DISJUNCT-EXPRESSION"
-        {:tree tree}
-        x)))))
-
 ;;; Flow rules. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;; A flow rule DOES NOT return a modified cell; instead, it 
 ;;; returns a PLAN to modify the world, in the form of a sequence of `flow`
@@ -410,8 +389,8 @@
       :CONDITIONS (generate-conditions tree)
       :CONJUNCT-CONDITION (generate-conjunct-condition tree)
       :DISJUNCT-CONDITION (generate-disjunct-condition tree)
-      :DISJUNCT-EXPRESSION (generate-disjunct-expression tree)
-      :DISJUNCT-VALUE (generate-disjunct-value tree)
+      :DISJUNCT-EXPRESSION (set (generate (second tree)))
+      :DISJUNCT-VALUE (map generate (rest tree))
       :EQUIVALENCE '=
       :EXPRESSION (generate (second tree))
       :FLOW-RULE (generate-flow tree)
diff --git a/src/mw_parser/simplify.clj b/src/mw_parser/simplify.clj
index d73e729..e9c3886 100644
--- a/src/mw_parser/simplify.clj
+++ b/src/mw_parser/simplify.clj
@@ -65,39 +65,18 @@
    (coll? tree)
     (case (first tree)
       :ACTION (simplify-second-of-two tree)
-      :ACTIONS (cons (first tree) (simplify (rest tree)))
-      :AND nil
-      :CHANCE-IN nil
-      :COMMENT nil
       :COMPARATIVE (simplify-second-of-two tree)
       :CONDITION (simplify-second-of-two tree)
       :CONDITIONS (simplify-second-of-two tree)
-      :CR nil
-      :DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE)
+      ;; :DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE)
       :EXPRESSION (simplify-second-of-two tree)
       :FLOW-CONDITIONS (simplify-second-of-two tree)
-      :IN nil
       ;; this is like simplify-second-of-two except if there isn't
       ;; a second element it returns nil
-      :LINE (when (= (count tree) 2) (simplify (nth tree 1)))
-      :LINES (loop [lines tree result '()]
-               (let [line (simplify (second lines))
-                     ;; the reason for putting :BLANK in the result in place
-                     ;; of lines that weren't rules is so that we can keep 
-                     ;; track of the source text of the line we're compiling.
-                     result' (concat result (list (or line :BLANK)))]
-                 (when-not (= :LINES (first lines))
-                   (throw (ex-info "Unexpeced parse tree: LINES"
-                                   {:lines lines})))
-                 (case (count lines)
-                   2 result'
-                   4 (recur (nth lines 3) result')
-                   (throw (ex-info "Unexpeced parse tree: LINES"
-                                   {:lines lines})))))
+      :LINE (if (= (count tree) 2) (simplify (nth tree 1)) tree)
+      :LINES (map simplify (rest tree))
       :PROPERTY (simplify-second-of-two tree)
-      :PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree)
-      :OR nil
-      :SPACE nil
+      :PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree) 
       :STATE (list :PROPERTY-CONDITION
                    (list :SYMBOL "state")
                    '(:QUALIFIER
@@ -105,20 +84,7 @@
                       (:IS "is")))
                    (list :EXPRESSION
                          (list :VALUE (second tree))))
-      :THEN nil
       :VALUE (simplify-second-of-two tree)
+      ;; default
       (remove nil? (map simplify tree)))
     tree))
-
-;; OK, there is a major unresolved problem. If there is a determiner condition,
-;; the tree as parsed from natural language is the wrong shape, and we're 
-;; going to have to restructure it somewhere to being the determiner upstream
-;; of the property conditions. It *may* be possible to do that in `generate`.
-
-(defn simplify-determiner-condition
-  [tree]
-  (apply vector
-         (cons :DETERMINER-CONDITION
-               (cons
-                (simplify-second-of-two (second tree))
-                (rest (rest tree))))))
diff --git a/test/mw_parser/declarative_test.clj b/test/mw_parser/declarative_test.clj
index 6a0bb67..810f3b3 100644
--- a/test/mw_parser/declarative_test.clj
+++ b/test/mw_parser/declarative_test.clj
@@ -42,22 +42,22 @@
 
 (deftest exception-tests
   (testing "Constructions which should cause exceptions to be thrown"
-    (is (thrown-with-msg? Exception #"^I did not understand.*"
+    (is (thrown-with-msg? Exception #"^Parse error at line.*"
                           (parse "the quick brown fox jumped over the lazy dog"))
         "Exception thrown if rule text does not match grammar")
-    (is (thrown-with-msg? Exception #"^I did not understand.*"
+    (is (thrown-with-msg? Exception #"^Parse error at line.*"
                           (parse "if i have a cat on my lap then everything is fine"))
         "Exception thrown if rule text does not match grammar")
     ;; TODO: these two should be moved to generate-test; the exception should be
     ;; being thrown (but isn't) in the generate phase.
     (is (thrown-with-msg?
          Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions"
-         (generate (simplify (parse "if state is new then x should be 0"))
+         (generate (simplify (parse "if state is new then x should be 0")))
         "Exception thrown on attempt to set 'x'")
     (is (thrown-with-msg?
          Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions"
-         (generate (simplify (parse "if state is new then y should be 0"))))
-        "Exception thrown on attempt to set 'y'")))
+         (generate (simplify (parse "if state is new then y should be 0")))
+        "Exception thrown on attempt to set 'y'")))))
 
 (deftest correctness-tests
   ;; these are, in so far as possible, the same as the correctness-tests in core-tests - i.e., the two compilers
@@ -301,7 +301,7 @@
           "Left hand side of world has no high neighbours, so rule should not fire.")))
 
   ;; more than number neighbours have property more than numeric-value
-  (testing "More than number neighbours have property more than symbolic-value"
+  (testing "More than number neighbours have property more than number"
     (let [afn (first (compile "if more than 2 neighbours have altitude more than 10 then state should be beach"))
           world (transform-world
                  (make-world 3 3)
@@ -492,8 +492,8 @@
                  (make-world 5 5)
                  (compile
                   (join "\n"
-                        (list "if x is less than 2 then altitude should be 11 and state should be grassland"
-                              "if x is more than 1 then altitude should be 0 and state should be water"))))]
+                        (list "if state is new and x is less than 2 then altitude should be 11 and state should be grassland"
+                              "if state is new and x is more than 1 then altitude should be 0 and state should be water"))))]
       (is (= (:state (apply afn (list {:x 2 :y 2} world))) :beach)
           "Rule fires when condition is met (strip of altitude 11 down right hand side)")
       (is (nil? (apply afn (list {:x 0 :y 1} world)))