mw-parser/src/mw_parser/declarative.clj

174 lines
7.8 KiB
Clojure

(ns ^{:doc "A very simple parser which parses production rules."
:author "Simon Brooke"}
mw-parser.declarative
(:require [clojure.string :refer [join split trim]]
[instaparse.core :refer [parser]]
[mw-parser.errors :refer [throw-parse-exception]]
[mw-parser.flow :refer [flow-grammar]]
[mw-parser.generate :refer [generate]]
[mw-parser.simplify :refer [simplify]]
[mw-parser.utils :refer [rule?]]
[trptr.java-wrapper.locale :refer [get-default]])
(:import [java.util Locale]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;
;;;; mw-parser: a rule parser for MicroWorld.
;;;;
;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU General Public License
;;;; as published by the Free Software Foundation; either version 2
;;;; of the License, or (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this program; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
;;;; USA.
;;;;
;;;; Copyright (C) 2014 Simon Brooke
;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def rule-grammar
"Basic rule language grammar.
in order to simplify translation into other natural languages, all
TOKENS within the parser should be unambiguou."
(join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;"
"ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS"
"ACTION := SIMPLE-ACTION | PROBABLE-ACTION;"
"PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;"
"SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"]))
(def common-grammar
"Grammar rules used both in the rule grammar and in the flow grammar"
(join "\n" ["COMPARATIVE := MORE | LESS;"
"COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;"
"CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;"
"CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;"
"CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;"
"DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;"
"DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;"
"DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;"
"EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;"
"EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;"
"NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;"
"NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;"
"NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';"
"NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;"
"OPERATOR := '+' | '-' | '*' | '/';"
"PROPERTY := SYMBOL;"
"PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;"
"PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;"
"QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;"
"QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;"
"RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;"
"SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;"
"SPACE := #'\\s+';"
"VALUE := SYMBOL | NUMBER;"
"VALUE := SYMBOL | NUMBER;"
"WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"]))
(def keywords-en
"English language keyword literals used in rules - both in production
rules (this namespace) and in flow rules (see mw-parser.flow).
It's a long term aim that the rule language should be easy to
internationalise; this isn't a full solution but it's a step towards
a solution."
(join "\n" ["ALL := 'all'"
"AND := 'and';"
"BECOMES := 'should be' | 'becomes';"
"BETWEEN := 'between';"
"CHANCE-IN := 'chance in';"
"EACH := 'each' | 'every' | 'all';"
"EQUAL := 'equal to';"
"FIRST := 'first';"
"FLOW := 'flow' | 'move';"
"FROM := 'from';"
"IF := 'if';"
"IN := 'in';"
"IS := 'is' | 'are' | 'have' | 'has';"
"LEAST := 'least';"
"LESS := 'less' | 'fewer';"
"MORE := 'more' | 'greater';"
"MOST := 'most';"
"NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';"
"NONE := 'no';"
"NOT := 'not';"
"OR := 'or';"
"SOME := 'some';"
;; SYMBOL is in the per-language file so that languages that use
;; (e.g.) Cyrillic characters can change the definition.
"SYMBOL := #'[a-z]+';"
"THAN := 'than';"
"THEN := 'then';"
"TO := 'to';"
"WITH := 'with' | 'where' | 'having';"
"WITHIN := 'within';"]))
(defn keywords-for-locale
"For now, just return `keywords-en`; plan is to have resource files of
keywords for different languages in a resource directory, but that isn't
done yet. It's probably not going to work easily for languages that use
non-latin alphabets, anyway."
([]
(keywords-for-locale (get-default)))
([^Locale _locale]
keywords-en))
(defmacro build-parser
"Compose this grammar fragment `g` with the common grammar fragments to
make a complete grammar, and return a parser for that complete grammar."
[g]
`(parser (join "\n" [~g common-grammar (keywords-for-locale)])))
(def parse-rule
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(build-parser rule-grammar))
(def parse-flow
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(build-parser flow-grammar))
(defn parse
"Top level parser function: parse this `text` as either a production or a flow rule;
return a raw parse tree."
[^String rule-text]
(let [text (trim rule-text)]
(when-not (zero? (count text))
(case (first (split text #"\s+"))
"if" (parse-rule text)
"flow" (parse-flow text)
";;" nil
(throw (ex-info "Rule text was not recognised" {:text text}))))))
(defn compile-rule
"Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules,
into Clojure source, and then compile it into an anonymous
function object, getting round the problem of binding mw-engine.utils in
the compiling environment. If `return-tuple?` is present and true, return
a list comprising the anonymous function compiled, and the function from
which it was compiled.
Throws an exception if parsing fails."
([rule-text return-tuple?]
(assert (string? rule-text))
(let [rule (trim rule-text)
tree (simplify (parse-rule rule))
afn (if (rule? tree) (eval (generate tree))
;; else
(throw-parse-exception tree))]
(if return-tuple?
(list afn rule)
;; else
afn)))
([rule-text]
(compile-rule rule-text false)))