Well, I didn't get rid of simplify altogether...

But it and the rest of the code are greatly simplified. All correctness tests pass, many others don't.
This commit is contained in:
Simon Brooke 2023-07-22 21:11:06 +01:00
parent a436499d98
commit 3829bd97a9
4 changed files with 62 additions and 98 deletions

View file

@ -2,10 +2,12 @@
:author "Simon Brooke"}
mw-parser.declarative
(:require [clojure.string :refer [join split-lines]]
[instaparse.core :refer [parser]]
[instaparse.core :refer [failure? get-failure parser]]
[instaparse.failure :refer [pprint-failure]]
[mw-parser.flow :refer [flow-grammar]]
[mw-parser.generate :refer [generate]]
[mw-parser.simplify :refer [simplify]]
[taoensso.timbre :as l]
[trptr.java-wrapper.locale :refer [get-default]])
(:import [java.util Locale]))
@ -39,50 +41,51 @@
(def ruleset-grammar
"Experimental: parse a whole file in one go."
(join "\n" ["LINES := LINE | LINE CR LINES;"
"LINE := RULE | FLOW-RULE | CR | COMMENT | '' ;"
"CR := #'[\\r\\n]';"
"COMMENT := #'[;#]+[^\\r\\n]*' | #'/\\*.*\\*/'"]))
;; TODO: bug here. We're double-counting (some) blank lines
(join "\n" ["LINES := (LINE)+;"
"LINE := RULE <CR> | FLOW-RULE <CR> | COMMENT <CR> | <CR> ;"
"CR := #'[ \\t]*[\\r\\n][- \\t]*';"
"COMMENT := #'[;\\#]+[^\\r\\n]*' | #'/\\*.*\\*/'"]))
(def rule-grammar
"Basic rule language grammar.
in order to simplify translation into other natural languages, all
TOKENS within the parser should be unambiguous."
(join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;"
"ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS"
(join "\n" ["RULE := IF <SPACE> CONDITIONS <SPACE> <THEN> <SPACE> ACTIONS;"
"ACTIONS := ACTION | (ACTION <SPACE> <AND> <SPACE> ACTION)+"
"ACTION := SIMPLE-ACTION | PROBABLE-ACTION;"
"PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;"
"SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"]))
"PROBABLE-ACTION := VALUE <SPACE> <CHANCE-IN> <SPACE> VALUE <SPACE> SIMPLE-ACTION;"
"SIMPLE-ACTION := SYMBOL <SPACE> BECOMES <SPACE> EXPRESSION;"]))
(def common-grammar
"Grammar rules used both in the rule grammar and in the flow grammar"
(join "\n" ["COMPARATIVE := MORE | LESS;"
"COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;"
"COMPARATIVE-QUALIFIER := IS <SPACE> COMPARATIVE <SPACE> THAN | COMPARATIVE <SPACE> THAN;"
"CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
"CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
"CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;"
"DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;"
"DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;"
"DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;"
"EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;"
"CONJUNCT-CONDITION := CONDITION <SPACE> <AND> <SPACE> CONDITIONS;"
"DISJUNCT-CONDITION := CONDITION <SPACE> <OR> <SPACE> CONDITIONS;"
"DISJUNCT-EXPRESSION := <IN> <SPACE> DISJUNCT-VALUE;"
"DISJUNCT-VALUE := (VALUE <SPACE> <OR> <SPACE>)* VALUE;"
"EQUIVALENCE := IS <SPACE> EQUAL | EQUAL | IS ;"
"EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
"NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;"
"NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;"
"NEGATED-QUALIFIER := QUALIFIER <SPACE> NOT | NOT <SPACE> QUALIFIER;"
"NEIGHBOURS-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> IS <SPACE> PROPERTY-CONDITION | QUALIFIER <SPACE> NEIGHBOURS-CONDITION;"
"NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
"NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;"
"NUMERIC-EXPRESSION := VALUE | VALUE <SPACE> OPERATOR <SPACE> NUMERIC-EXPRESSION;"
"OPERATOR := '+' | '-' | '*' | '/';"
"PROPERTY := SYMBOL;"
"PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;"
"PROPERTY-CONDITION := PROPERTY <SPACE> QUALIFIER <SPACE> EXPRESSION | VALUE;"
"PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
"QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;"
"QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;"
"RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;"
"SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;"
"QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS <SPACE> QUALIFIER;"
"QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE <SPACE> THAN <SPACE> NUMBER;"
"RANGE-EXPRESSION := BETWEEN <SPACE> NUMERIC-EXPRESSION <SPACE> AND <SPACE> NUMERIC-EXPRESSION;"
"SIMPLE-EXPRESSION := QUALIFIER <SPACE> EXPRESSION | VALUE;"
"SPACE := #'[ \\t]+';"
"VALUE := SYMBOL | NUMBER;"
"VALUE := SYMBOL | NUMBER;"
"WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"]))
"WITHIN-CONDITION := QUANTIFIER <SPACE> NEIGHBOURS <SPACE> WITHIN <SPACE> NUMBER <SPACE> IS <SPACE> PROPERTY-CONDITION-OR-EXPRESSION;"]))
(def keywords-en
"English language keyword literals used in rules - both in production
@ -132,10 +135,20 @@
([^Locale _locale]
keywords-en))
(def parse
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(def ^:private raw-parser
(parser (join "\n" [ruleset-grammar rule-grammar flow-grammar common-grammar (keywords-for-locale)])))
(defn parse
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
[arg]
(let [parse-tree-or-error (raw-parser arg :total true)]
(if (failure? parse-tree-or-error)
(throw (ex-info (format "Some rules were not understood:\n%s"
(pprint-failure (get-failure parse-tree-or-error)))
{:source arg
:failure (get-failure parse-tree-or-error)}))
parse-tree-or-error)))
(defn- compile-rule
"Compile a rule function from this `parse-tree` derived from this `source`
at the zero-based line number `n` in the source file; return a compiled
@ -143,18 +156,24 @@
* `:rule-type` : the type of rule the function represents;
* `:parse` : this `parse-tree`;
* `:source` : the rule source from which the parse tree was derived;
* `:lisp` : the lisp source generated from this `parse-tree`;
* `:line : the one-based line number of the definition in the source file,
i.e. `(inc n)`."
[parse-tree source n]
(when-not (keyword? parse-tree)
(if (#{:COMMENT :LINE} (first parse-tree))
(do
(l/info (format "Skipping line %d, `%s`, parse-tree %s."
(inc n) source parse-tree))
nil)
(let [lisp (generate parse-tree)
line-no (inc n)]
(l/info (format "Compiling rule at line %d, `%s`." line-no source))
(try
(if (#{'fn 'fn*} (first lisp))
(vary-meta
(eval lisp)
merge (meta lisp) {:src source :lisp lisp :line line-no})
merge (meta lisp) {:source source :lisp lisp :line line-no})
(throw
(Exception.
(format "Parse of `%s` did not return a function: %s" source lisp))))

View file

@ -212,15 +212,6 @@
(assert-type tree :ACTIONS)
(generate-action (first (rest tree)) (second (rest tree))))
(defn generate-disjunct-value
"Generate a disjunct value. Essentially what we need here is to generate a
flat list of values, since the `member` has already been taken care of."
[tree]
(assert-type tree :DISJUNCT-VALUE)
(if (= (count tree) 4)
(cons (generate (second tree)) (generate (nth tree 3)))
(list (generate (second tree)))))
(defn generate-numeric-expression
"From this `tree`, assumed to be a syntactically correct numeric expression,
generate and return the appropriate clojure fragment."
@ -289,18 +280,6 @@
:LESS (let [value (generate (nth quantifier 3))]
(generate-neighbours-condition '< value pc distance))))))
(defn- generate-disjunct-expression
[tree]
(assert-type tree :DISJUNCT-EXPRESSION)
(try
(set (map generate (rest tree)))
(catch Exception x
(throw
(ex-info
"Failed to compile :DISJUNCT-EXPRESSION"
{:tree tree}
x)))))
;;; Flow rules. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; A flow rule DOES NOT return a modified cell; instead, it
;;; returns a PLAN to modify the world, in the form of a sequence of `flow`
@ -410,8 +389,8 @@
:CONDITIONS (generate-conditions tree)
:CONJUNCT-CONDITION (generate-conjunct-condition tree)
:DISJUNCT-CONDITION (generate-disjunct-condition tree)
:DISJUNCT-EXPRESSION (generate-disjunct-expression tree)
:DISJUNCT-VALUE (generate-disjunct-value tree)
:DISJUNCT-EXPRESSION (set (generate (second tree)))
:DISJUNCT-VALUE (map generate (rest tree))
:EQUIVALENCE '=
:EXPRESSION (generate (second tree))
:FLOW-RULE (generate-flow tree)

View file

@ -65,39 +65,18 @@
(coll? tree)
(case (first tree)
:ACTION (simplify-second-of-two tree)
:ACTIONS (cons (first tree) (simplify (rest tree)))
:AND nil
:CHANCE-IN nil
:COMMENT nil
:COMPARATIVE (simplify-second-of-two tree)
:CONDITION (simplify-second-of-two tree)
:CONDITIONS (simplify-second-of-two tree)
:CR nil
:DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE)
;; :DISJUNCT-EXPRESSION (simplify-chained-list tree :DISJUNCT-VALUE :VALUE)
:EXPRESSION (simplify-second-of-two tree)
:FLOW-CONDITIONS (simplify-second-of-two tree)
:IN nil
;; this is like simplify-second-of-two except if there isn't
;; a second element it returns nil
:LINE (when (= (count tree) 2) (simplify (nth tree 1)))
:LINES (loop [lines tree result '()]
(let [line (simplify (second lines))
;; the reason for putting :BLANK in the result in place
;; of lines that weren't rules is so that we can keep
;; track of the source text of the line we're compiling.
result' (concat result (list (or line :BLANK)))]
(when-not (= :LINES (first lines))
(throw (ex-info "Unexpeced parse tree: LINES"
{:lines lines})))
(case (count lines)
2 result'
4 (recur (nth lines 3) result')
(throw (ex-info "Unexpeced parse tree: LINES"
{:lines lines})))))
:LINE (if (= (count tree) 2) (simplify (nth tree 1)) tree)
:LINES (map simplify (rest tree))
:PROPERTY (simplify-second-of-two tree)
:PROPERTY-CONDITION-OR-EXPRESSION (simplify-second-of-two tree)
:OR nil
:SPACE nil
:STATE (list :PROPERTY-CONDITION
(list :SYMBOL "state")
'(:QUALIFIER
@ -105,20 +84,7 @@
(:IS "is")))
(list :EXPRESSION
(list :VALUE (second tree))))
:THEN nil
:VALUE (simplify-second-of-two tree)
;; default
(remove nil? (map simplify tree)))
tree))
;; OK, there is a major unresolved problem. If there is a determiner condition,
;; the tree as parsed from natural language is the wrong shape, and we're
;; going to have to restructure it somewhere to being the determiner upstream
;; of the property conditions. It *may* be possible to do that in `generate`.
(defn simplify-determiner-condition
[tree]
(apply vector
(cons :DETERMINER-CONDITION
(cons
(simplify-second-of-two (second tree))
(rest (rest tree))))))

View file

@ -42,22 +42,22 @@
(deftest exception-tests
(testing "Constructions which should cause exceptions to be thrown"
(is (thrown-with-msg? Exception #"^I did not understand.*"
(is (thrown-with-msg? Exception #"^Parse error at line.*"
(parse "the quick brown fox jumped over the lazy dog"))
"Exception thrown if rule text does not match grammar")
(is (thrown-with-msg? Exception #"^I did not understand.*"
(is (thrown-with-msg? Exception #"^Parse error at line.*"
(parse "if i have a cat on my lap then everything is fine"))
"Exception thrown if rule text does not match grammar")
;; TODO: these two should be moved to generate-test; the exception should be
;; being thrown (but isn't) in the generate phase.
(is (thrown-with-msg?
Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions"
(generate (simplify (parse "if state is new then x should be 0"))
(generate (simplify (parse "if state is new then x should be 0")))
"Exception thrown on attempt to set 'x'")
(is (thrown-with-msg?
Exception #"The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions"
(generate (simplify (parse "if state is new then y should be 0"))))
"Exception thrown on attempt to set 'y'")))
(generate (simplify (parse "if state is new then y should be 0")))
"Exception thrown on attempt to set 'y'")))))
(deftest correctness-tests
;; these are, in so far as possible, the same as the correctness-tests in core-tests - i.e., the two compilers
@ -301,7 +301,7 @@
"Left hand side of world has no high neighbours, so rule should not fire.")))
;; more than number neighbours have property more than numeric-value
(testing "More than number neighbours have property more than symbolic-value"
(testing "More than number neighbours have property more than number"
(let [afn (first (compile "if more than 2 neighbours have altitude more than 10 then state should be beach"))
world (transform-world
(make-world 3 3)
@ -492,8 +492,8 @@
(make-world 5 5)
(compile
(join "\n"
(list "if x is less than 2 then altitude should be 11 and state should be grassland"
"if x is more than 1 then altitude should be 0 and state should be water"))))]
(list "if state is new and x is less than 2 then altitude should be 11 and state should be grassland"
"if state is new and x is more than 1 then altitude should be 0 and state should be water"))))]
(is (= (:state (apply afn (list {:x 2 :y 2} world))) :beach)
"Rule fires when condition is met (strip of altitude 11 down right hand side)")
(is (nil? (apply afn (list {:x 0 :y 1} world)))