From 3829bd97a92cae2715a1471bd0da013b67b7d387 Mon Sep 17 00:00:00 2001 From: Simon Brooke <simon@journeyman.cc> Date: Sat, 22 Jul 2023 21:11:06 +0100 Subject: [PATCH] Well, I didn't get rid of simplify altogether... But it and the rest of the code are greatly simplified. All correctness tests pass, many others don't. --- src/mw_parser/declarative.clj | 75 ++++++++++++++++++----------- src/mw_parser/generate.clj | 25 +--------- src/mw_parser/simplify.clj | 44 ++--------------- test/mw_parser/declarative_test.clj | 16 +++--- 4 files changed, 62 insertions(+), 98 deletions(-) diff --git a/src/mw_parser/declarative.clj b/src/mw_parser/declarative.clj index 69bda0d..fada891 100644 --- a/src/mw_parser/declarative.clj +++ b/src/mw_parser/declarative.clj @@ -2,10 +2,12 @@ :author "Simon Brooke"} mw-parser.declarative (:require [clojure.string :refer [join split-lines]] - [instaparse.core :refer [parser]] + [instaparse.core :refer [failure? get-failure parser]] + [instaparse.failure :refer [pprint-failure]] [mw-parser.flow :refer [flow-grammar]] [mw-parser.generate :refer [generate]] [mw-parser.simplify :refer [simplify]] + [taoensso.timbre :as l] [trptr.java-wrapper.locale :refer [get-default]]) (:import [java.util Locale])) @@ -39,50 +41,51 @@ (def ruleset-grammar "Experimental: parse a whole file in one go." - (join "\n" ["LINES := LINE | LINE CR LINES;" - "LINE := RULE | FLOW-RULE | CR | COMMENT | '' ;" - "CR := #'[\\r\\n]';" - "COMMENT := #'[;#]+[^\\r\\n]*' | #'/\\*.*\\*/'"])) + ;; TODO: bug here. We're double-counting (some) blank lines + (join "\n" ["LINES := (LINE)+;" + "LINE := RULE <CR> | FLOW-RULE <CR> | COMMENT <CR> | <CR> ;" + "CR := #'[ \\t]*[\\r\\n][- \\t]*';" + "COMMENT := #'[;\\#]+[^\\r\\n]*' | #'/\\*.*\\*/'"])) (def rule-grammar "Basic rule language grammar. in order to simplify translation into other natural languages, all TOKENS within the parser should be unambiguous." - (join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;" - "ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS" + (join "\n" ["RULE := IF <SPACE> CONDITIONS <SPACE> <THEN> <SPACE> ACTIONS;" + "ACTIONS := ACTION | (ACTION <SPACE> <AND> <SPACE> ACTION)+" "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;" - "PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;" - "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"])) + "PROBABLE-ACTION := VALUE <SPACE> <CHANCE-IN> <SPACE> VALUE <SPACE> SIMPLE-ACTION;" + "SIMPLE-ACTION := SYMBOL <SPACE> BECOMES <SPACE> EXPRESSION;"])) (def common-grammar "Grammar rules used both in the rule grammar and in the flow grammar" (join "\n" ["COMPARATIVE := MORE | LESS;" - "COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;" + "COMPARATIVE-QUALIFIER := IS <SPACE> COMPARATIVE <SPACE> THAN | COMPARATIVE <SPACE> THAN;" "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;" "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;" - "CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;" - "DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;" - "DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;" - "DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;" - "EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;" + "CONJUNCT-CONDITION := CONDITION <SPACE> <AND> <SPACE> CONDITIONS;" + "DISJUNCT-CONDITION := CONDITION <SPACE> <OR> <SPACE> CONDITIONS;" + "DISJUNCT-EXPRESSION := <IN> <SPACE> DISJUNCT-VALUE;" + "DISJUNCT-VALUE := (VALUE <SPACE> <OR> <SPACE>)* VALUE;" + "EQUIVALENCE := IS <SPACE> EQUAL | EQUAL | IS ;" "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;" - "NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;" - "NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;" + "NEGATED-QUALIFIER := QUALIFIER <SPACE> NOT | NOT <SPACE> QUALIFIER;" + "NEIGHBOURS-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> IS <SPACE> PROPERTY-CONDITION | QUALIFIER <SPACE> NEIGHBOURS-CONDITION;" "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';" - "NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;" + "NUMERIC-EXPRESSION := VALUE | VALUE <SPACE> OPERATOR <SPACE> NUMERIC-EXPRESSION;" "OPERATOR := '+' | '-' | '*' | '/';" "PROPERTY := SYMBOL;" - "PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;" + "PROPERTY-CONDITION := PROPERTY <SPACE> QUALIFIER <SPACE> EXPRESSION | VALUE;" "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;" - "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;" - "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;" - "RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;" - "SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;" + "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS <SPACE> QUALIFIER;" + "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE <SPACE> THAN <SPACE> NUMBER;" + "RANGE-EXPRESSION := BETWEEN <SPACE> NUMERIC-EXPRESSION <SPACE> AND <SPACE> NUMERIC-EXPRESSION;" + "SIMPLE-EXPRESSION := QUALIFIER <SPACE> EXPRESSION | VALUE;" "SPACE := #'[ \\t]+';" "VALUE := SYMBOL | NUMBER;" "VALUE := SYMBOL | NUMBER;" - "WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"])) + "WITHIN-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> WITHIN <SPACE> NUMBER <SPACE> IS <SPACE> PROPERTY-CONDITION-OR-EXPRESSION;"])) (def keywords-en "English language keyword literals used in rules - both in production @@ -132,10 +135,20 @@ ([^Locale _locale] keywords-en)) -(def parse - "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." +(def ^:private raw-parser (parser (join "\n" [ruleset-grammar rule-grammar flow-grammar common-grammar (keywords-for-locale)]))) +(defn parse + "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." + [arg] + (let [parse-tree-or-error (raw-parser arg :total true)] + (if (failure? parse-tree-or-error) + (throw (ex-info (format "Some rules were not understood:\n%s" + (pprint-failure (get-failure parse-tree-or-error))) + {:source arg + :failure (get-failure parse-tree-or-error)})) + parse-tree-or-error))) + (defn- compile-rule "Compile a rule function from this `parse-tree` derived from this `source` at the zero-based line number `n` in the source file; return a compiled @@ -143,18 +156,24 @@ * `:rule-type` : the type of rule the function represents; * `:parse` : this `parse-tree`; + * `:source` : the rule source from which the parse tree was derived; * `:lisp` : the lisp source generated from this `parse-tree`; * `:line : the one-based line number of the definition in the source file, i.e. `(inc n)`." [parse-tree source n] - (when-not (keyword? parse-tree) + (if (#{:COMMENT :LINE} (first parse-tree)) + (do + (l/info (format "Skipping line %d, `%s`, parse-tree %s." + (inc n) source parse-tree)) + nil) (let [lisp (generate parse-tree) line-no (inc n)] + (l/info (format "Compiling rule at line %d, `%s`." line-no source)) (try (if (#{'fn 'fn*} (first lisp)) (vary-meta (eval lisp) - merge (meta lisp) {:src source :lisp lisp :line line-no}) + merge (meta lisp) {:source source :lisp lisp :line line-no}) (throw (Exception. (format "Parse of `%s` did not return a function: %s" source lisp)))) diff --git a/src/mw_parser/generate.clj b/src/mw_parser/generate.clj index 700053c..2e2cb8d 100644 --- a/src/mw_parser/generate.clj +++ b/src/mw_parser/generate.clj @@ -212,15 +212,6 @@ (assert-type tree :ACTIONS) (generate-action (first (rest tree)) (second (rest tree)))) -(defn generate-disjunct-value - "Generate a disjunct value. Essentially what we need here is to generate a - flat list of values, since the `member` has already been taken care of." - [tree] - (assert-type tree :DISJUNCT-VALUE) - (if (= (count tree) 4) - (cons (generate (second tree)) (generate (nth tree 3))) - (list (generate (second tree))))) - (defn generate-numeric-expression "From this `tree`, assumed to be a syntactically correct numeric expression, generate and return the appropriate clojure fragment." @@ -289,18 +280,6 @@ :LESS (let [value (generate (nth quantifier 3))] (generate-neighbours-condition '< value pc distance)))))) -(defn- generate-disjunct-expression - [tree] - (assert-type tree :DISJUNCT-EXPRESSION) - (try - (set (map generate (rest tree))) - (catch Exception x - (throw - (ex-info - "Failed to compile :DISJUNCT-EXPRESSION" - {:tree tree} - x))))) - ;;; Flow rules. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; A flow rule DOES NOT return a modified cell; instead, it ;;; returns a PLAN to modify the world, in the form of a sequence of `flow` @@ -410,8 +389,8 @@ :CONDITIONS (generate-conditions tree) :CONJUNCT-CONDITION (generate-conjunct-condition tree) :DISJUNCT-CONDITION (generate-disjunct-condition tree) - :DISJUNCT-EXPRESSION (generate-disjunct-expression tree) - :DISJUNCT-VALUE (generate-disjunct-value tree) + :DISJUNCT-EXPRESSION (set (generate (second tree))) + :DISJUNCT-VALUE (map generate (rest tree)) :EQUIVALENCE '= :EXPRESSION (generate (second tree)) :FLOW-RULE (generate-flow tree) diff --git a/src/mw_parser/simplify.clj b/src/mw_parser/simplify.clj index d73e729..e9c3886 100644 --- a/src/mw_parser/simplify.clj +++ b/src/mw_parser/simplify.clj @@ -65,39 +65,18 @@ (coll? tree) (case (first tree) :ACTION (simplify-second-of-two tree) - :ACTIONS (cons (first tree) (simplify (rest tree))) - :AND nil - :CHANCE-IN nil - :COMMENT nil :COMPARATIVE (simplify-second-of-two tree) :CONDITION (simplify-second-of-two tree) :CONDITIONS (simplify-second-of-two tree) - :CR nil - :DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE) + ;; :DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE) :EXPRESSION (simplify-second-of-two tree) :FLOW-CONDITIONS (simplify-second-of-two tree) - :IN nil ;; this is like simplify-second-of-two except if there isn't ;; a second element it returns nil - :LINE (when (= (count tree) 2) (simplify (nth tree 1))) - :LINES (loop [lines tree result '()] - (let [line (simplify (second lines)) - ;; the reason for putting :BLANK in the result in place - ;; of lines that weren't rules is so that we can keep - ;; track of the source text of the line we're compiling. - result' (concat result (list (or line :BLANK)))] - (when-not (= :LINES (first lines)) - (throw (ex-info "Unexpeced parse tree: LINES" - {:lines lines}))) - (case (count lines) - 2 result' - 4 (recur (nth lines 3) result') - (throw (ex-info "Unexpeced parse tree: LINES" - {:lines lines}))))) + :LINE (if (= (count tree) 2) (simplify (nth tree 1)) tree) + :LINES (map simplify (rest tree)) :PROPERTY (simplify-second-of-two tree) - :PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree) - :OR nil - :SPACE nil + :PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree) :STATE (list :PROPERTY-CONDITION (list :SYMBOL "state") '(:QUALIFIER @@ -105,20 +84,7 @@ (:IS "is"))) (list :EXPRESSION (list :VALUE (second tree)))) - :THEN nil :VALUE (simplify-second-of-two tree) + ;; default (remove nil? (map simplify tree))) tree)) - -;; OK, there is a major unresolved problem. If there is a determiner condition, -;; the tree as parsed from natural language is the wrong shape, and we're -;; going to have to restructure it somewhere to being the determiner upstream -;; of the property conditions. It *may* be possible to do that in `generate`. - -(defn simplify-determiner-condition - [tree] - (apply vector - (cons :DETERMINER-CONDITION - (cons - (simplify-second-of-two (second tree)) - (rest (rest tree)))))) diff --git a/test/mw_parser/declarative_test.clj b/test/mw_parser/declarative_test.clj index 6a0bb67..810f3b3 100644 --- a/test/mw_parser/declarative_test.clj +++ b/test/mw_parser/declarative_test.clj @@ -42,22 +42,22 @@ (deftest exception-tests (testing "Constructions which should cause exceptions to be thrown" - (is (thrown-with-msg? Exception #"^I did not understand.*" + (is (thrown-with-msg? Exception #"^Parse error at line.*" (parse "the quick brown fox jumped over the lazy dog")) "Exception thrown if rule text does not match grammar") - (is (thrown-with-msg? Exception #"^I did not understand.*" + (is (thrown-with-msg? Exception #"^Parse error at line.*" (parse "if i have a cat on my lap then everything is fine")) "Exception thrown if rule text does not match grammar") ;; TODO: these two should be moved to generate-test; the exception should be ;; being thrown (but isn't) in the generate phase. (is (thrown-with-msg? Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions" - (generate (simplify (parse "if state is new then x should be 0")) + (generate (simplify (parse "if state is new then x should be 0"))) "Exception thrown on attempt to set 'x'") (is (thrown-with-msg? Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions" - (generate (simplify (parse "if state is new then y should be 0")))) - "Exception thrown on attempt to set 'y'"))) + (generate (simplify (parse "if state is new then y should be 0"))) + "Exception thrown on attempt to set 'y'"))))) (deftest correctness-tests ;; these are, in so far as possible, the same as the correctness-tests in core-tests - i.e., the two compilers @@ -301,7 +301,7 @@ "Left hand side of world has no high neighbours, so rule should not fire."))) ;; more than number neighbours have property more than numeric-value - (testing "More than number neighbours have property more than symbolic-value" + (testing "More than number neighbours have property more than number" (let [afn (first (compile "if more than 2 neighbours have altitude more than 10 then state should be beach")) world (transform-world (make-world 3 3) @@ -492,8 +492,8 @@ (make-world 5 5) (compile (join "\n" - (list "if x is less than 2 then altitude should be 11 and state should be grassland" - "if x is more than 1 then altitude should be 0 and state should be water"))))] + (list "if state is new and x is less than 2 then altitude should be 11 and state should be grassland" + "if state is new and x is more than 1 then altitude should be 0 and state should be water"))))] (is (= (:state (apply afn (list {:x 2 :y 2} world))) :beach) "Rule fires when condition is met (strip of altitude 11 down right hand side)") (is (nil? (apply afn (list {:x 0 :y 1} world)))