Work on flows, but also preparing for i18n.

This commit is contained in:
Simon Brooke 2023-07-09 22:25:50 +01:00
parent fe92045f14
commit bbaca4710b
4 changed files with 116 additions and 65 deletions

2
.gitignore vendored
View file

@ -3,6 +3,6 @@ buildall.tmp.*
.lein-repl-history .lein-repl-history
target/ target/
pom.xml pom.xml
.calva/
.nrepl-port .nrepl-port

View file

@ -5,7 +5,7 @@
[clojure.string :refer [join trim]] [clojure.string :refer [join trim]]
[mw-parser.errors :refer [throw-parse-exception]] [mw-parser.errors :refer [throw-parse-exception]]
[mw-parser.generate :refer [generate]] [mw-parser.generate :refer [generate]]
[mw-parser.simplify :refer [simplify]] [mw-parser.simplify :refer [simplify-rule]]
[mw-parser.utils :refer [rule?]] [mw-parser.utils :refer [rule?]]
[trptr.java-wrapper.locale :refer [get-default]]) [trptr.java-wrapper.locale :refer [get-default]])
(:import [java.util Locale])) (:import [java.util Locale]))
@ -33,85 +33,97 @@
;;;; ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def grammar (def rule-grammar
"Basic rule language grammar. "Basic rule language grammar.
in order to simplify translation into other natural languages, all in order to simplify translation into other natural languages, all
TOKENS within the parser should be unambiguou." TOKENS within the parser should be unambiguou."
(join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;" (join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;"
"CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
"DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;"
"CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;"
"CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
"WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"
"NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;"
"PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
"PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;"
"EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
"SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;"
"DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;"
"RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;"
"NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;"
"NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;"
"COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;"
"QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;"
"QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;"
"EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;"
"COMPARATIVE := MORE | LESS;"
"DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;"
"PROPERTY := SYMBOL;"
"VALUE := SYMBOL | NUMBER;"
"OPERATOR := '+' | '-' | '*' | '/';"
"NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
"ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS" "ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS"
"ACTION := SIMPLE-ACTION | PROBABLE-ACTION;" "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;"
"PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;" "PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;"
"SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;" "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"]))
"SPACE := #'\\s+';"]))
(def common-grammar
"Grammar rules used both in the rule grammar and in the flow grammar"
(join "\n" ["COMPARATIVE := MORE | LESS;"
"COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;"
"CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
"CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
"CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;"
"DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;"
"DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;"
"DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;"
"EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;"
"EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
"NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;"
"NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;"
"NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
"NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;"
"OPERATOR := '+' | '-' | '*' | '/';"
"PROPERTY := SYMBOL;"
"PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;"
"PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
"QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;"
"QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;"
"RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;"
"SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;"
"SPACE := #'\\s+';"
"VALUE := SYMBOL | NUMBER;"
"VALUE := SYMBOL | NUMBER;"
"WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"
]))
(def keywords-en (def keywords-en
"English language keyword literals used in rules. "English language keyword literals used in rules - both in production
rules (this namespace) and in flow rules (see mw-parser.flow).
It's a long term aim that the rule language should be easy to It's a long term aim that the rule language should be easy to
internationalise; this isn't a full solution but it's a step towards internationalise; this isn't a full solution but it's a step towards
a solution." a solution."
(join "\n" ["IF := 'if';" (join "\n" ["ALL := 'all'"
"THEN := 'then';"
"THAN := 'than';"
"OR := 'or';"
"NOT := 'not';"
"AND := 'and';" "AND := 'and';"
"SOME := 'some';"
"NONE := 'no';"
"ALL := 'all'"
"BETWEEN := 'between';"
"WITHIN := 'within';"
"IN := 'in';"
"MORE := 'more' | 'greater';"
"LESS := 'less' | 'fewer';"
"NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';"
"EQUAL := 'equal to';"
"IS := 'is' | 'are' | 'have' | 'has';"
"CHANCE-IN := 'chance in';"
"BECOMES := 'should be' | 'becomes';" "BECOMES := 'should be' | 'becomes';"
"BETWEEN := 'between';"
"CHANCE-IN := 'chance in';"
"EACH := 'each' | 'every' | 'all';"
"EQUAL := 'equal to';"
"FLOW := 'flow' | 'move';"
"FROM := 'from';"
"IF := 'if';"
"IN := 'in';"
"IS := 'is' | 'are' | 'have' | 'has';"
"LEAST := 'least';"
"LESS := 'less' | 'fewer';"
"MORE := 'more' | 'greater';"
"MOST := 'most';"
"NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';"
"NONE := 'no';"
"NOT := 'not';"
"OR := 'or';"
"SOME := 'some';"
;; SYMBOL is in the per-language file so that languages that use ;; SYMBOL is in the per-language file so that languages that use
;; (e.g.) Cyrillic characters can change the definition. ;; (e.g.) Cyrillic characters can change the definition.
"SYMBOL := #'[a-z]+';" "SYMBOL := #'[a-z]+';"
])) "THAN := 'than';"
"THEN := 'then';"
"TO := 'to';"
"WITH := 'with' | 'where' | 'having';"
"WITHIN := 'within';"]))
(defn select-keywords-for-locale (defn keywords-for-locale
"For now, just return `keywords-en`; plan is to have resource files of "For now, just return `keywords-en`; plan is to have resource files of
keywords for different languages in a resource directory, but that isn't keywords for different languages in a resource directory, but that isn't
done yet. It's probably not going to work easily for languages that use done yet. It's probably not going to work easily for languages that use
non-latin alphabets, anyway." non-latin alphabets, anyway."
([] ([]
(select-keywords-for-locale (get-default))) (keywords-for-locale (get-default)))
([^Locale locale] ([^Locale locale]
keywords-en)) keywords-en))
(def parse-rule (def parse-rule
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(insta/parser (join "\n" [grammar (select-keywords-for-locale)]))) (insta/parser (join "\n" [rule-grammar common-grammar (keywords-for-locale)])))
(defn compile-rule (defn compile-rule
"Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules, "Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules,
@ -125,7 +137,7 @@
([rule-text return-tuple?] ([rule-text return-tuple?]
(assert (string? rule-text)) (assert (string? rule-text))
(let [rule (trim rule-text) (let [rule (trim rule-text)
tree (simplify (parse-rule rule)) tree (simplify-rule (parse-rule rule))
afn (if (rule? tree) (eval (generate tree)) afn (if (rule? tree) (eval (generate tree))
;; else ;; else
(throw-parse-exception tree))] (throw-parse-exception tree))]

View file

@ -0,0 +1,26 @@
(ns ^{:doc "A very simple parser which parses flow rules."
:author "Simon Brooke"}
mw-parser.flow
(:require [clojure.string :refer [join]]
[instaparse.core :as insta]
[mw-parser.declarative :refer [common-grammar keywords-for-locale]]))
(def flow-grammar
"Grammar for flow rules"
(join "\n" ["FLOW-RULE := FLOW SPACE QUANTITY SPACE PROPERTY SPACE FROM SPACE SOURCE SPACE TO-HOW SPACE DESTINATION;"
"PERCENTAGE := NUMBER #'%';"
"QUANTITY := PERCENTAGE | NUMBER;"
"SOURCE := STATE | STATE SPACE WITH SPACE CONDITIONS;"
"DESTINATION := STATE | STATE SPACE WITH SPACE FLOW-CONDITIONS;"
"DETERMINER := MOST | LEAST;"
"DETERMINER-CONDITION := DETERMINER SPACE PROPERTY | DETERMINER SPACE PROPERTY SPACE WITHIN SPACE NUMBER;"
"FLOW-CONDITIONS := DETERMINER-CONDITION | CONDITIONS"
"STATE := SYMBOL;"
"TO-HOW := TO | TO-EACH | TO-FIRST;"
"TO-EACH := TO SPACE EACH | TO SPACE ALL;"
"TO-FIRST := TO SPACE EACH"
]))
(def parse-flow
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(insta/parser (join "\n" [flow-grammar common-grammar (keywords-for-locale)])))

View file

@ -26,8 +26,7 @@
;;;; ;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(declare simplify-rule)
(declare simplify)
(defn simplify-qualifier (defn simplify-qualifier
"Given that this `tree` fragment represents a qualifier, what "Given that this `tree` fragment represents a qualifier, what
@ -40,23 +39,21 @@
(coll? (first tree)) (or (simplify-qualifier (first tree)) (coll? (first tree)) (or (simplify-qualifier (first tree))
(simplify-qualifier (rest tree))) (simplify-qualifier (rest tree)))
(coll? tree) (simplify-qualifier (rest tree)) (coll? tree) (simplify-qualifier (rest tree))
true tree)) :else tree))
(defn simplify-second-of-two (defn simplify-second-of-two
"There are a number of possible simplifications such that if the `tree` has "There are a number of possible simplifications such that if the `tree` has
only two elements, the second is semantically sufficient." only two elements, the second is semantically sufficient."
[tree] [tree]
(if (= (count tree) 2) (simplify (nth tree 1)) tree)) (if (= (count tree) 2) (simplify-rule (nth tree 1)) tree))
(defn simplify-quantifier (defn simplify-quantifier
"If this quantifier is a number, 'simplifiy' it into a comparative whose operator is '=' "If this quantifier is a number, 'simplifiy' it into a comparative whose operator is '='
and whose quantity is that number. This is actually more complicated but makes generation easier." and whose quantity is that number. This is actually more complicated but makes generation easier."
[tree] [tree]
(if (number? (second tree)) [:COMPARATIVE '= (second tree)] (simplify (second tree)))) (if (number? (second tree)) [:COMPARATIVE '= (second tree)] (simplify-rule (second tree))))
(defn simplify-rule
(defn simplify
"Simplify/canonicalise this `tree`. Opportunistically replace complex fragments with "Simplify/canonicalise this `tree`. Opportunistically replace complex fragments with
semantically identical simpler fragments" semantically identical simpler fragments"
[tree] [tree]
@ -64,7 +61,7 @@
(coll? tree) (coll? tree)
(case (first tree) (case (first tree)
:ACTION (simplify-second-of-two tree) :ACTION (simplify-second-of-two tree)
:ACTIONS (cons (first tree) (simplify (rest tree))) :ACTIONS (cons (first tree) (simplify-rule (rest tree)))
:CHANCE-IN nil :CHANCE-IN nil
:COMPARATIVE (simplify-second-of-two tree) :COMPARATIVE (simplify-second-of-two tree)
:CONDITION (simplify-second-of-two tree) :CONDITION (simplify-second-of-two tree)
@ -76,6 +73,22 @@
:THEN nil :THEN nil
:AND nil :AND nil
:VALUE (simplify-second-of-two tree) :VALUE (simplify-second-of-two tree)
(remove nil? (map simplify tree))) (remove nil? (map simplify-rule tree)))
tree)) tree))
(defn simplify-flow
[tree]
(if (coll? tree)
(case (first tree)
:DETERMINER (simplify-second-of-two tree)
:SPACE nil
:STATE [:PROPERTY-CONDITION
[:SYMBOL "state"]
[:QUALIFIER
[:EQUIVALENCE
[:IS "is"]]]
[:EXPRESSION
[:VALUE
(second tree)]]]
(remove nil? (map simplify-flow tree)))
tree))