001 (ns ^{:doc "A very simple parser which parses production rules."
002 :author "Simon Brooke"}
003 mw-parser.declarative
004 (:require [instaparse.core :refer [parser]]
005 [clojure.string :refer [join trim]]
006 [mw-parser.errors :refer [throw-parse-exception]]
007 [mw-parser.generate :refer [generate]]
008 [mw-parser.simplify :refer [simplify-rule]]
009 [mw-parser.utils :refer [rule?]]
010 [trptr.java-wrapper.locale :refer [get-default]])
011 (:import [java.util Locale]))
012
013 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
014 ;;;;
015 ;;;; mw-parser: a rule parser for MicroWorld.
016 ;;;;
017 ;;;; This program is free software; you can redistribute it and/or
018 ;;;; modify it under the terms of the GNU General Public License
019 ;;;; as published by the Free Software Foundation; either version 2
020 ;;;; of the License, or (at your option) any later version.
021 ;;;;
022 ;;;; This program is distributed in the hope that it will be useful,
023 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
024 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
025 ;;;; GNU General Public License for more details.
026 ;;;;
027 ;;;; You should have received a copy of the GNU General Public License
028 ;;;; along with this program; if not, write to the Free Software
029 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
030 ;;;; USA.
031 ;;;;
032 ;;;; Copyright (C) 2014 Simon Brooke
033 ;;;;
034 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
035
036 (def rule-grammar
037 "Basic rule language grammar.
038
039 in order to simplify translation into other natural languages, all
040 TOKENS within the parser should be unambiguou."
041 (join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;"
042 "ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS"
043 "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;"
044 "PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;"
045 "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"]))
046
047 (def common-grammar
048 "Grammar rules used both in the rule grammar and in the flow grammar"
049 (join "\n" ["COMPARATIVE := MORE | LESS;"
050 "COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;"
051 "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
052 "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
053 "CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;"
054 "DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;"
055 "DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;"
056 "DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;"
057 "EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;"
058 "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
059 "NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;"
060 "NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;"
061 "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
062 "NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;"
063 "OPERATOR := '+' | '-' | '*' | '/';"
064 "PROPERTY := SYMBOL;"
065 "PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;"
066 "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
067 "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;"
068 "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;"
069 "RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;"
070 "SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;"
071 "SPACE := #'\\s+';"
072 "VALUE := SYMBOL | NUMBER;"
073 "VALUE := SYMBOL | NUMBER;"
074 "WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"
075 ]))
076
077 (def keywords-en
078 "English language keyword literals used in rules - both in production
079 rules (this namespace) and in flow rules (see mw-parser.flow).
080
081 It's a long term aim that the rule language should be easy to
082 internationalise; this isn't a full solution but it's a step towards
083 a solution."
084 (join "\n" ["ALL := 'all'"
085 "AND := 'and';"
086 "BECOMES := 'should be' | 'becomes';"
087 "BETWEEN := 'between';"
088 "CHANCE-IN := 'chance in';"
089 "EACH := 'each' | 'every' | 'all';"
090 "EQUAL := 'equal to';"
091 "FIRST := 'first';"
092 "FLOW := 'flow' | 'move';"
093 "FROM := 'from';"
094 "IF := 'if';"
095 "IN := 'in';"
096 "IS := 'is' | 'are' | 'have' | 'has';"
097 "LEAST := 'least';"
098 "LESS := 'less' | 'fewer';"
099 "MORE := 'more' | 'greater';"
100 "MOST := 'most';"
101 "NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';"
102 "NONE := 'no';"
103 "NOT := 'not';"
104 "OR := 'or';"
105 "SOME := 'some';"
106 ;; SYMBOL is in the per-language file so that languages that use
107 ;; (e.g.) Cyrillic characters can change the definition.
108 "SYMBOL := #'[a-z]+';"
109 "THAN := 'than';"
110 "THEN := 'then';"
111 "TO := 'to';"
112 "WITH := 'with' | 'where' | 'having';"
113 "WITHIN := 'within';"]))
114
115 (defn keywords-for-locale
116 "For now, just return `keywords-en`; plan is to have resource files of
117 keywords for different languages in a resource directory, but that isn't
118 done yet. It's probably not going to work easily for languages that use
119 non-latin alphabets, anyway."
120 ([]
121 (keywords-for-locale (get-default)))
122 ([^Locale _locale]
123 keywords-en))
124
125 (defmacro build-parser
126 "Compose this grammar fragment `g` with the common grammar fragments to
127 make a complete grammar, and return a parser for that complete grammar."
128 [g]
129 `(parser (join "\n" [~g common-grammar (keywords-for-locale)])))
130
131 (def parse-rule
132 "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
133 (build-parser rule-grammar))
134
135 (defn compile-rule
136 "Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules,
137 into Clojure source, and then compile it into an anonymous
138 function object, getting round the problem of binding mw-engine.utils in
139 the compiling environment. If `return-tuple?` is present and true, return
140 a list comprising the anonymous function compiled, and the function from
141 which it was compiled.
142
143 Throws an exception if parsing fails."
144 ([rule-text return-tuple?]
145 (assert (string? rule-text))
146 (let [rule (trim rule-text)
147 tree (simplify-rule (parse-rule rule))
148 afn (if (rule? tree) (eval (generate tree))
149 ;; else
150 (throw-parse-exception tree))]
151 (if return-tuple?
152 (list afn rule)
153 ;; else
154 afn)))
155 ([rule-text]
156 (compile-rule rule-text false)))
157