From bbaca4710b1a0158e5f32cc8b899f0d88ab3890b Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Sun, 9 Jul 2023 22:25:50 +0100 Subject: [PATCH] Work on flows, but also preparing for i18n. --- .gitignore | 2 +- src/mw_parser/declarative.clj | 120 +++++++++++++++++++--------------- src/mw_parser/flow.clj | 26 ++++++++ src/mw_parser/simplify.clj | 33 +++++++--- 4 files changed, 116 insertions(+), 65 deletions(-) diff --git a/.gitignore b/.gitignore index ab6836c..88fb07f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ buildall.tmp.* .lein-repl-history target/ pom.xml - +.calva/ .nrepl-port diff --git a/src/mw_parser/declarative.clj b/src/mw_parser/declarative.clj index 15e88b3..3239c62 100644 --- a/src/mw_parser/declarative.clj +++ b/src/mw_parser/declarative.clj @@ -5,7 +5,7 @@ [clojure.string :refer [join trim]] [mw-parser.errors :refer [throw-parse-exception]] [mw-parser.generate :refer [generate]] - [mw-parser.simplify :refer [simplify]] + [mw-parser.simplify :refer [simplify-rule]] [mw-parser.utils :refer [rule?]] [trptr.java-wrapper.locale :refer [get-default]]) (:import [java.util Locale])) @@ -33,85 +33,97 @@ ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(def grammar +(def rule-grammar "Basic rule language grammar. in order to simplify translation into other natural languages, all TOKENS within the parser should be unambiguou." (join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;" - "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;" - "DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;" - "CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;" - "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;" - "WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;" - "NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;" - "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;" - "PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;" - "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;" - "SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;" - "DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;" - "RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;" - "NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;" - "NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;" - "COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;" - "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;" - "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;" - "EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;" - "COMPARATIVE := MORE | LESS;" - "DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;" - "PROPERTY := SYMBOL;" - "VALUE := SYMBOL | NUMBER;" - "OPERATOR := '+' | '-' | '*' | '/';" - "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';" "ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS" "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;" "PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;" - "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;" - "SPACE := #'\\s+';"])) + "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"])) + +(def common-grammar + "Grammar rules used both in the rule grammar and in the flow grammar" + (join "\n" ["COMPARATIVE := MORE | LESS;" + "COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;" + "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;" + "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;" + "CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;" + "DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;" + "DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;" + "DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;" + "EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;" + "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;" + "NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;" + "NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;" + "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';" + "NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;" + "OPERATOR := '+' | '-' | '*' | '/';" + "PROPERTY := SYMBOL;" + "PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;" + "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;" + "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;" + "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;" + "RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;" + "SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;" + "SPACE := #'\\s+';" + "VALUE := SYMBOL | NUMBER;" + "VALUE := SYMBOL | NUMBER;" + "WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;" + ])) (def keywords-en - "English language keyword literals used in rules. + "English language keyword literals used in rules - both in production + rules (this namespace) and in flow rules (see mw-parser.flow). It's a long term aim that the rule language should be easy to internationalise; this isn't a full solution but it's a step towards a solution." - (join "\n" ["IF := 'if';" - "THEN := 'then';" - "THAN := 'than';" - "OR := 'or';" - "NOT := 'not';" - "AND := 'and';" - "SOME := 'some';" - "NONE := 'no';" - "ALL := 'all'" - "BETWEEN := 'between';" - "WITHIN := 'within';" - "IN := 'in';" - "MORE := 'more' | 'greater';" - "LESS := 'less' | 'fewer';" - "NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';" - "EQUAL := 'equal to';" - "IS := 'is' | 'are' | 'have' | 'has';" - "CHANCE-IN := 'chance in';" - "BECOMES := 'should be' | 'becomes';" + (join "\n" ["ALL := 'all'" + "AND := 'and';" + "BECOMES := 'should be' | 'becomes';" + "BETWEEN := 'between';" + "CHANCE-IN := 'chance in';" + "EACH := 'each' | 'every' | 'all';" + "EQUAL := 'equal to';" + "FLOW := 'flow' | 'move';" + "FROM := 'from';" + "IF := 'if';" + "IN := 'in';" + "IS := 'is' | 'are' | 'have' | 'has';" + "LEAST := 'least';" + "LESS := 'less' | 'fewer';" + "MORE := 'more' | 'greater';" + "MOST := 'most';" + "NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';" + "NONE := 'no';" + "NOT := 'not';" + "OR := 'or';" + "SOME := 'some';" ;; SYMBOL is in the per-language file so that languages that use ;; (e.g.) Cyrillic characters can change the definition. - "SYMBOL := #'[a-z]+';" - ])) + "SYMBOL := #'[a-z]+';" + "THAN := 'than';" + "THEN := 'then';" + "TO := 'to';" + "WITH := 'with' | 'where' | 'having';" + "WITHIN := 'within';"])) -(defn select-keywords-for-locale +(defn keywords-for-locale "For now, just return `keywords-en`; plan is to have resource files of keywords for different languages in a resource directory, but that isn't done yet. It's probably not going to work easily for languages that use non-latin alphabets, anyway." ([] - (select-keywords-for-locale (get-default))) + (keywords-for-locale (get-default))) ([^Locale locale] keywords-en)) (def parse-rule "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." - (insta/parser (join "\n" [grammar (select-keywords-for-locale)]))) + (insta/parser (join "\n" [rule-grammar common-grammar (keywords-for-locale)]))) (defn compile-rule "Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules, @@ -125,7 +137,7 @@ ([rule-text return-tuple?] (assert (string? rule-text)) (let [rule (trim rule-text) - tree (simplify (parse-rule rule)) + tree (simplify-rule (parse-rule rule)) afn (if (rule? tree) (eval (generate tree)) ;; else (throw-parse-exception tree))] diff --git a/src/mw_parser/flow.clj b/src/mw_parser/flow.clj index e69de29..80f8b50 100644 --- a/src/mw_parser/flow.clj +++ b/src/mw_parser/flow.clj @@ -0,0 +1,26 @@ +(ns ^{:doc "A very simple parser which parses flow rules." + :author "Simon Brooke"} + mw-parser.flow + (:require [clojure.string :refer [join]] + [instaparse.core :as insta] + [mw-parser.declarative :refer [common-grammar keywords-for-locale]])) + +(def flow-grammar + "Grammar for flow rules" + (join "\n" ["FLOW-RULE := FLOW SPACE QUANTITY SPACE PROPERTY SPACE FROM SPACE SOURCE SPACE TO-HOW SPACE DESTINATION;" + "PERCENTAGE := NUMBER #'%';" + "QUANTITY := PERCENTAGE | NUMBER;" + "SOURCE := STATE | STATE SPACE WITH SPACE CONDITIONS;" + "DESTINATION := STATE | STATE SPACE WITH SPACE FLOW-CONDITIONS;" + "DETERMINER := MOST | LEAST;" + "DETERMINER-CONDITION := DETERMINER SPACE PROPERTY | DETERMINER SPACE PROPERTY SPACE WITHIN SPACE NUMBER;" + "FLOW-CONDITIONS := DETERMINER-CONDITION | CONDITIONS" + "STATE := SYMBOL;" + "TO-HOW := TO | TO-EACH | TO-FIRST;" + "TO-EACH := TO SPACE EACH | TO SPACE ALL;" + "TO-FIRST := TO SPACE EACH" + ])) + +(def parse-flow + "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." + (insta/parser (join "\n" [flow-grammar common-grammar (keywords-for-locale)]))) diff --git a/src/mw_parser/simplify.clj b/src/mw_parser/simplify.clj index 00529a8..e203b0c 100644 --- a/src/mw_parser/simplify.clj +++ b/src/mw_parser/simplify.clj @@ -26,8 +26,7 @@ ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(declare simplify) +(declare simplify-rule) (defn simplify-qualifier "Given that this `tree` fragment represents a qualifier, what @@ -40,23 +39,21 @@ (coll? (first tree)) (or (simplify-qualifier (first tree)) (simplify-qualifier (rest tree))) (coll? tree) (simplify-qualifier (rest tree)) - true tree)) + :else tree)) (defn simplify-second-of-two "There are a number of possible simplifications such that if the `tree` has only two elements, the second is semantically sufficient." [tree] - (if (= (count tree) 2) (simplify (nth tree 1)) tree)) - + (if (= (count tree) 2) (simplify-rule (nth tree 1)) tree)) (defn simplify-quantifier "If this quantifier is a number, 'simplifiy' it into a comparative whose operator is '=' and whose quantity is that number. This is actually more complicated but makes generation easier." [tree] - (if (number? (second tree)) [:COMPARATIVE '= (second tree)] (simplify (second tree)))) + (if (number? (second tree)) [:COMPARATIVE '= (second tree)] (simplify-rule (second tree)))) - -(defn simplify +(defn simplify-rule "Simplify/canonicalise this `tree`. Opportunistically replace complex fragments with semantically identical simpler fragments" [tree] @@ -64,7 +61,7 @@ (coll? tree) (case (first tree) :ACTION (simplify-second-of-two tree) - :ACTIONS (cons (first tree) (simplify (rest tree))) + :ACTIONS (cons (first tree) (simplify-rule (rest tree))) :CHANCE-IN nil :COMPARATIVE (simplify-second-of-two tree) :CONDITION (simplify-second-of-two tree) @@ -76,6 +73,22 @@ :THEN nil :AND nil :VALUE (simplify-second-of-two tree) - (remove nil? (map simplify tree))) + (remove nil? (map simplify-rule tree))) tree)) +(defn simplify-flow + [tree] + (if (coll? tree) + (case (first tree) + :DETERMINER (simplify-second-of-two tree) + :SPACE nil + :STATE [:PROPERTY-CONDITION + [:SYMBOL "state"] + [:QUALIFIER + [:EQUIVALENCE + [:IS "is"]]] + [:EXPRESSION + [:VALUE + (second tree)]]] + (remove nil? (map simplify-flow tree))) + tree)) \ No newline at end of file