Well, I didn't get rid of simplify altogether...

But it and the rest of the code are greatly simplified. All correctness tests pass, many others don't.
This commit is contained in:
Simon Brooke 2023-07-22 21:11:06 +01:00
parent a436499d98
commit 3829bd97a9
4 changed files with 62 additions and 98 deletions

View file

@ -2,10 +2,12 @@
:author "Simon Brooke"} :author "Simon Brooke"}
mw-parser.declarative mw-parser.declarative
(:require [clojure.string :refer [join split-lines]] (:require [clojure.string :refer [join split-lines]]
[instaparse.core :refer [parser]] [instaparse.core :refer [failure? get-failure parser]]
[instaparse.failure :refer [pprint-failure]]
[mw-parser.flow :refer [flow-grammar]] [mw-parser.flow :refer [flow-grammar]]
[mw-parser.generate :refer [generate]] [mw-parser.generate :refer [generate]]
[mw-parser.simplify :refer [simplify]] [mw-parser.simplify :refer [simplify]]
[taoensso.timbre :as l]
[trptr.java-wrapper.locale :refer [get-default]]) [trptr.java-wrapper.locale :refer [get-default]])
(:import [java.util Locale])) (:import [java.util Locale]))
@ -39,50 +41,51 @@
(def ruleset-grammar (def ruleset-grammar
"Experimental: parse a whole file in one go." "Experimental: parse a whole file in one go."
(join "\n" ["LINES := LINE | LINE CR LINES;" ;; TODO: bug here. We're double-counting (some) blank lines
"LINE := RULE | FLOW-RULE | CR | COMMENT | '' ;" (join "\n" ["LINES := (LINE)+;"
"CR := #'[\\r\\n]';" "LINE := RULE <CR> | FLOW-RULE <CR> | COMMENT <CR> | <CR> ;"
"COMMENT := #'[;#]+[^\\r\\n]*' | #'/\\*.*\\*/'"])) "CR := #'[ \\t]*[\\r\\n][- \\t]*';"
"COMMENT := #'[;\\#]+[^\\r\\n]*' | #'/\\*.*\\*/'"]))
(def rule-grammar (def rule-grammar
"Basic rule language grammar. "Basic rule language grammar.
in order to simplify translation into other natural languages, all in order to simplify translation into other natural languages, all
TOKENS within the parser should be unambiguous." TOKENS within the parser should be unambiguous."
(join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;" (join "\n" ["RULE := IF <SPACE> CONDITIONS <SPACE> <THEN> <SPACE> ACTIONS;"
"ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS" "ACTIONS := ACTION | (ACTION <SPACE> <AND> <SPACE> ACTION)+"
"ACTION := SIMPLE-ACTION | PROBABLE-ACTION;" "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;"
"PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;" "PROBABLE-ACTION := VALUE <SPACE> <CHANCE-IN> <SPACE> VALUE <SPACE> SIMPLE-ACTION;"
"SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"])) "SIMPLE-ACTION := SYMBOL <SPACE> BECOMES <SPACE> EXPRESSION;"]))
(def common-grammar (def common-grammar
"Grammar rules used both in the rule grammar and in the flow grammar" "Grammar rules used both in the rule grammar and in the flow grammar"
(join "\n" ["COMPARATIVE := MORE | LESS;" (join "\n" ["COMPARATIVE := MORE | LESS;"
"COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;" "COMPARATIVE-QUALIFIER := IS <SPACE> COMPARATIVE <SPACE> THAN | COMPARATIVE <SPACE> THAN;"
"CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;" "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
"CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;" "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
"CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;" "CONJUNCT-CONDITION := CONDITION <SPACE> <AND> <SPACE> CONDITIONS;"
"DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;" "DISJUNCT-CONDITION := CONDITION <SPACE> <OR> <SPACE> CONDITIONS;"
"DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;" "DISJUNCT-EXPRESSION := <IN> <SPACE> DISJUNCT-VALUE;"
"DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;" "DISJUNCT-VALUE := (VALUE <SPACE> <OR> <SPACE>)* VALUE;"
"EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;" "EQUIVALENCE := IS <SPACE> EQUAL | EQUAL | IS ;"
"EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;" "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
"NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;" "NEGATED-QUALIFIER := QUALIFIER <SPACE> NOT | NOT <SPACE> QUALIFIER;"
"NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;" "NEIGHBOURS-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> IS <SPACE> PROPERTY-CONDITION | QUALIFIER <SPACE> NEIGHBOURS-CONDITION;"
"NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';" "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
"NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;" "NUMERIC-EXPRESSION := VALUE | VALUE <SPACE> OPERATOR <SPACE> NUMERIC-EXPRESSION;"
"OPERATOR := '+' | '-' | '*' | '/';" "OPERATOR := '+' | '-' | '*' | '/';"
"PROPERTY := SYMBOL;" "PROPERTY := SYMBOL;"
"PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;" "PROPERTY-CONDITION := PROPERTY <SPACE> QUALIFIER <SPACE> EXPRESSION | VALUE;"
"PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;" "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
"QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;" "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS <SPACE> QUALIFIER;"
"QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;" "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE <SPACE> THAN <SPACE> NUMBER;"
"RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;" "RANGE-EXPRESSION := BETWEEN <SPACE> NUMERIC-EXPRESSION <SPACE> AND <SPACE> NUMERIC-EXPRESSION;"
"SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;" "SIMPLE-EXPRESSION := QUALIFIER <SPACE> EXPRESSION | VALUE;"
"SPACE := #'[ \\t]+';" "SPACE := #'[ \\t]+';"
"VALUE := SYMBOL | NUMBER;" "VALUE := SYMBOL | NUMBER;"
"VALUE := SYMBOL | NUMBER;" "VALUE := SYMBOL | NUMBER;"
"WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"])) "WITHIN-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> WITHIN <SPACE> NUMBER <SPACE> IS <SPACE> PROPERTY-CONDITION-OR-EXPRESSION;"]))
(def keywords-en (def keywords-en
"English language keyword literals used in rules - both in production "English language keyword literals used in rules - both in production
@ -132,10 +135,20 @@
([^Locale _locale] ([^Locale _locale]
keywords-en)) keywords-en))
(def parse (def ^:private raw-parser
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(parser (join "\n" [ruleset-grammar rule-grammar flow-grammar common-grammar (keywords-for-locale)]))) (parser (join "\n" [ruleset-grammar rule-grammar flow-grammar common-grammar (keywords-for-locale)])))
(defn parse
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
[arg]
(let [parse-tree-or-error (raw-parser arg :total true)]
(if (failure? parse-tree-or-error)
(throw (ex-info (format "Some rules were not understood:\n%s"
(pprint-failure (get-failure parse-tree-or-error)))
{:source arg
:failure (get-failure parse-tree-or-error)}))
parse-tree-or-error)))
(defn- compile-rule (defn- compile-rule
"Compile a rule function from this `parse-tree` derived from this `source` "Compile a rule function from this `parse-tree` derived from this `source`
at the zero-based line number `n` in the source file; return a compiled at the zero-based line number `n` in the source file; return a compiled
@ -143,18 +156,24 @@
* `:rule-type` : the type of rule the function represents; * `:rule-type` : the type of rule the function represents;
* `:parse` : this `parse-tree`; * `:parse` : this `parse-tree`;
* `:source` : the rule source from which the parse tree was derived;
* `:lisp` : the lisp source generated from this `parse-tree`; * `:lisp` : the lisp source generated from this `parse-tree`;
* `:line : the one-based line number of the definition in the source file, * `:line : the one-based line number of the definition in the source file,
i.e. `(inc n)`." i.e. `(inc n)`."
[parse-tree source n] [parse-tree source n]
(when-not (keyword? parse-tree) (if (#{:COMMENT :LINE} (first parse-tree))
(do
(l/info (format "Skipping line %d, `%s`, parse-tree %s."
(inc n) source parse-tree))
nil)
(let [lisp (generate parse-tree) (let [lisp (generate parse-tree)
line-no (inc n)] line-no (inc n)]
(l/info (format "Compiling rule at line %d, `%s`." line-no source))
(try (try
(if (#{'fn 'fn*} (first lisp)) (if (#{'fn 'fn*} (first lisp))
(vary-meta (vary-meta
(eval lisp) (eval lisp)
merge (meta lisp) {:src source :lisp lisp :line line-no}) merge (meta lisp) {:source source :lisp lisp :line line-no})
(throw (throw
(Exception. (Exception.
(format "Parse of `%s` did not return a function: %s" source lisp)))) (format "Parse of `%s` did not return a function: %s" source lisp))))

View file

@ -212,15 +212,6 @@
(assert-type tree :ACTIONS) (assert-type tree :ACTIONS)
(generate-action (first (rest tree)) (second (rest tree)))) (generate-action (first (rest tree)) (second (rest tree))))
(defn generate-disjunct-value
"Generate a disjunct value. Essentially what we need here is to generate a
flat list of values, since the `member` has already been taken care of."
[tree]
(assert-type tree :DISJUNCT-VALUE)
(if (= (count tree) 4)
(cons (generate (second tree)) (generate (nth tree 3)))
(list (generate (second tree)))))
(defn generate-numeric-expression (defn generate-numeric-expression
"From this `tree`, assumed to be a syntactically correct numeric expression, "From this `tree`, assumed to be a syntactically correct numeric expression,
generate and return the appropriate clojure fragment." generate and return the appropriate clojure fragment."
@ -289,18 +280,6 @@
:LESS (let [value (generate (nth quantifier 3))] :LESS (let [value (generate (nth quantifier 3))]
(generate-neighbours-condition '< value pc distance)))))) (generate-neighbours-condition '< value pc distance))))))
(defn- generate-disjunct-expression
[tree]
(assert-type tree :DISJUNCT-EXPRESSION)
(try
(set (map generate (rest tree)))
(catch Exception x
(throw
(ex-info
"Failed to compile :DISJUNCT-EXPRESSION"
{:tree tree}
x)))))
;;; Flow rules. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Flow rules. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; A flow rule DOES NOT return a modified cell; instead, it ;;; A flow rule DOES NOT return a modified cell; instead, it
;;; returns a PLAN to modify the world, in the form of a sequence of `flow` ;;; returns a PLAN to modify the world, in the form of a sequence of `flow`
@ -410,8 +389,8 @@
:CONDITIONS (generate-conditions tree) :CONDITIONS (generate-conditions tree)
:CONJUNCT-CONDITION (generate-conjunct-condition tree) :CONJUNCT-CONDITION (generate-conjunct-condition tree)
:DISJUNCT-CONDITION (generate-disjunct-condition tree) :DISJUNCT-CONDITION (generate-disjunct-condition tree)
:DISJUNCT-EXPRESSION (generate-disjunct-expression tree) :DISJUNCT-EXPRESSION (set (generate (second tree)))
:DISJUNCT-VALUE (generate-disjunct-value tree) :DISJUNCT-VALUE (map generate (rest tree))
:EQUIVALENCE '= :EQUIVALENCE '=
:EXPRESSION (generate (second tree)) :EXPRESSION (generate (second tree))
:FLOW-RULE (generate-flow tree) :FLOW-RULE (generate-flow tree)

View file

@ -65,39 +65,18 @@
(coll? tree) (coll? tree)
(case (first tree) (case (first tree)
:ACTION (simplify-second-of-two tree) :ACTION (simplify-second-of-two tree)
:ACTIONS (cons (first tree) (simplify (rest tree)))
:AND nil
:CHANCE-IN nil
:COMMENT nil
:COMPARATIVE (simplify-second-of-two tree) :COMPARATIVE (simplify-second-of-two tree)
:CONDITION (simplify-second-of-two tree) :CONDITION (simplify-second-of-two tree)
:CONDITIONS (simplify-second-of-two tree) :CONDITIONS (simplify-second-of-two tree)
:CR nil ;; :DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE)
:DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE)
:EXPRESSION (simplify-second-of-two tree) :EXPRESSION (simplify-second-of-two tree)
:FLOW-CONDITIONS (simplify-second-of-two tree) :FLOW-CONDITIONS (simplify-second-of-two tree)
:IN nil
;; this is like simplify-second-of-two except if there isn't ;; this is like simplify-second-of-two except if there isn't
;; a second element it returns nil ;; a second element it returns nil
:LINE (when (= (count tree) 2) (simplify (nth tree 1))) :LINE (if (= (count tree) 2) (simplify (nth tree 1)) tree)
:LINES (loop [lines tree result '()] :LINES (map simplify (rest tree))
(let [line (simplify (second lines))
;; the reason for putting :BLANK in the result in place
;; of lines that weren't rules is so that we can keep
;; track of the source text of the line we're compiling.
result' (concat result (list (or line :BLANK)))]
(when-not (= :LINES (first lines))
(throw (ex-info "Unexpeced parse tree: LINES"
{:lines lines})))
(case (count lines)
2 result'
4 (recur (nth lines 3) result')
(throw (ex-info "Unexpeced parse tree: LINES"
{:lines lines})))))
:PROPERTY (simplify-second-of-two tree) :PROPERTY (simplify-second-of-two tree)
:PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree) :PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree)
:OR nil
:SPACE nil
:STATE (list :PROPERTY-CONDITION :STATE (list :PROPERTY-CONDITION
(list :SYMBOL "state") (list :SYMBOL "state")
'(:QUALIFIER '(:QUALIFIER
@ -105,20 +84,7 @@
(:IS "is"))) (:IS "is")))
(list :EXPRESSION (list :EXPRESSION
(list :VALUE (second tree)))) (list :VALUE (second tree))))
:THEN nil
:VALUE (simplify-second-of-two tree) :VALUE (simplify-second-of-two tree)
;; default
(remove nil? (map simplify tree))) (remove nil? (map simplify tree)))
tree)) tree))
;; OK, there is a major unresolved problem. If there is a determiner condition,
;; the tree as parsed from natural language is the wrong shape, and we're
;; going to have to restructure it somewhere to being the determiner upstream
;; of the property conditions. It *may* be possible to do that in `generate`.
(defn simplify-determiner-condition
[tree]
(apply vector
(cons :DETERMINER-CONDITION
(cons
(simplify-second-of-two (second tree))
(rest (rest tree))))))

View file

@ -42,22 +42,22 @@
(deftest exception-tests (deftest exception-tests
(testing "Constructions which should cause exceptions to be thrown" (testing "Constructions which should cause exceptions to be thrown"
(is (thrown-with-msg? Exception #"^I did not understand.*" (is (thrown-with-msg? Exception #"^Parse error at line.*"
(parse "the quick brown fox jumped over the lazy dog")) (parse "the quick brown fox jumped over the lazy dog"))
"Exception thrown if rule text does not match grammar") "Exception thrown if rule text does not match grammar")
(is (thrown-with-msg? Exception #"^I did not understand.*" (is (thrown-with-msg? Exception #"^Parse error at line.*"
(parse "if i have a cat on my lap then everything is fine")) (parse "if i have a cat on my lap then everything is fine"))
"Exception thrown if rule text does not match grammar") "Exception thrown if rule text does not match grammar")
;; TODO: these two should be moved to generate-test; the exception should be ;; TODO: these two should be moved to generate-test; the exception should be
;; being thrown (but isn't) in the generate phase. ;; being thrown (but isn't) in the generate phase.
(is (thrown-with-msg? (is (thrown-with-msg?
Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions" Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions"
(generate (simplify (parse "if state is new then x should be 0")) (generate (simplify (parse "if state is new then x should be 0")))
"Exception thrown on attempt to set 'x'") "Exception thrown on attempt to set 'x'")
(is (thrown-with-msg? (is (thrown-with-msg?
Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions" Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions"
(generate (simplify (parse "if state is new then y should be 0")))) (generate (simplify (parse "if state is new then y should be 0")))
"Exception thrown on attempt to set 'y'"))) "Exception thrown on attempt to set 'y'")))))
(deftest correctness-tests (deftest correctness-tests
;; these are, in so far as possible, the same as the correctness-tests in core-tests - i.e., the two compilers ;; these are, in so far as possible, the same as the correctness-tests in core-tests - i.e., the two compilers
@ -301,7 +301,7 @@
"Left hand side of world has no high neighbours, so rule should not fire."))) "Left hand side of world has no high neighbours, so rule should not fire.")))
;; more than number neighbours have property more than numeric-value ;; more than number neighbours have property more than numeric-value
(testing "More than number neighbours have property more than symbolic-value" (testing "More than number neighbours have property more than number"
(let [afn (first (compile "if more than 2 neighbours have altitude more than 10 then state should be beach")) (let [afn (first (compile "if more than 2 neighbours have altitude more than 10 then state should be beach"))
world (transform-world world (transform-world
(make-world 3 3) (make-world 3 3)
@ -492,8 +492,8 @@
(make-world 5 5) (make-world 5 5)
(compile (compile
(join "\n" (join "\n"
(list "if x is less than 2 then altitude should be 11 and state should be grassland" (list "if state is new and x is less than 2 then altitude should be 11 and state should be grassland"
"if x is more than 1 then altitude should be 0 and state should be water"))))] "if state is new and x is more than 1 then altitude should be 0 and state should be water"))))]
(is (= (:state (apply afn (list {:x 2 :y 2} world))) :beach) (is (= (:state (apply afn (list {:x 2 :y 2} world))) :beach)
"Rule fires when condition is met (strip of altitude 11 down right hand side)") "Rule fires when condition is met (strip of altitude 11 down right hand side)")
(is (nil? (apply afn (list {:x 0 :y 1} world))) (is (nil? (apply afn (list {:x 0 :y 1} world)))