(ns ^{:doc "A very simple parser which parses production rules." :author "Simon Brooke"} mw-parser.declarative (:require [clojure.string :refer [join split trim]] [instaparse.core :refer [parser]] [mw-parser.errors :refer [throw-parse-exception]] [mw-parser.flow :refer [flow-grammar]] [mw-parser.generate :refer [generate]] [mw-parser.simplify :refer [simplify]] [mw-parser.utils :refer [rule?]] [trptr.java-wrapper.locale :refer [get-default]]) (:import [java.util Locale])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; ;;;; mw-parser: a rule parser for MicroWorld. ;;;; ;;;; This program is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU General Public License ;;;; as published by the Free Software Foundation; either version 2 ;;;; of the License, or (at your option) any later version. ;;;; ;;;; This program is distributed in the hope that it will be useful, ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;;; GNU General Public License for more details. ;;;; ;;;; You should have received a copy of the GNU General Public License ;;;; along with this program; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, ;;;; USA. ;;;; ;;;; Copyright (C) 2014 Simon Brooke ;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (def rule-grammar "Basic rule language grammar. in order to simplify translation into other natural languages, all TOKENS within the parser should be unambiguou." (join "\n" ["RULE := IF SPACE CONDITIONS SPACE THEN SPACE ACTIONS;" "ACTIONS := ACTION | ACTION SPACE AND SPACE ACTIONS" "ACTION := SIMPLE-ACTION | PROBABLE-ACTION;" "PROBABLE-ACTION := VALUE SPACE CHANCE-IN SPACE VALUE SPACE SIMPLE-ACTION;" "SIMPLE-ACTION := SYMBOL SPACE BECOMES SPACE EXPRESSION;"])) (def common-grammar "Grammar rules used both in the rule grammar and in the flow grammar" (join "\n" ["COMPARATIVE := MORE | LESS;" "COMPARATIVE-QUALIFIER := IS SPACE COMPARATIVE SPACE THAN | COMPARATIVE SPACE THAN;" "CONDITION := WITHIN-CONDITION | NEIGHBOURS-CONDITION | PROPERTY-CONDITION;" "CONDITIONS := DISJUNCT-CONDITION | CONJUNCT-CONDITION | CONDITION ;" "CONJUNCT-CONDITION := CONDITION SPACE AND SPACE CONDITIONS;" "DISJUNCT-CONDITION := CONDITION SPACE OR SPACE CONDITIONS;" "DISJUNCT-EXPRESSION := IN SPACE DISJUNCT-VALUE;" "DISJUNCT-VALUE := VALUE | VALUE SPACE OR SPACE DISJUNCT-VALUE;" "EQUIVALENCE := IS SPACE EQUAL | EQUAL | IS ;" "EXPRESSION := SIMPLE-EXPRESSION | RANGE-EXPRESSION | NUMERIC-EXPRESSION | DISJUNCT-EXPRESSION | VALUE;" "NEGATED-QUALIFIER := QUALIFIER SPACE NOT | NOT SPACE QUALIFIER;" "NEIGHBOURS-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE IS SPACE PROPERTY-CONDITION | QUALIFIER SPACE NEIGHBOURS-CONDITION;" "NUMBER := #'[0-9]+' | #'[0-9]+.[0-9]+';" "NUMERIC-EXPRESSION := VALUE | VALUE SPACE OPERATOR SPACE NUMERIC-EXPRESSION;" "OPERATOR := '+' | '-' | '*' | '/';" "PROPERTY := SYMBOL;" "PROPERTY-CONDITION := PROPERTY SPACE QUALIFIER SPACE EXPRESSION | VALUE;" "PROPERTY-CONDITION-OR-EXPRESSION := PROPERTY-CONDITION | EXPRESSION;" "QUALIFIER := COMPARATIVE-QUALIFIER | NEGATED-QUALIFIER | EQUIVALENCE | IS SPACE QUALIFIER;" "QUANTIFIER := NUMBER | SOME | NONE | ALL | COMPARATIVE SPACE THAN SPACE NUMBER;" "RANGE-EXPRESSION := BETWEEN SPACE NUMERIC-EXPRESSION SPACE AND SPACE NUMERIC-EXPRESSION;" "SIMPLE-EXPRESSION := QUALIFIER SPACE EXPRESSION | VALUE;" "SPACE := #'\\s+';" "VALUE := SYMBOL | NUMBER;" "VALUE := SYMBOL | NUMBER;" "WITHIN-CONDITION := QUANTIFIER SPACE NEIGHBOURS SPACE WITHIN SPACE NUMBER SPACE IS SPACE PROPERTY-CONDITION-OR-EXPRESSION;"])) (def keywords-en "English language keyword literals used in rules - both in production rules (this namespace) and in flow rules (see mw-parser.flow). It's a long term aim that the rule language should be easy to internationalise; this isn't a full solution but it's a step towards a solution." (join "\n" ["ALL := 'all'" "AND := 'and';" "BECOMES := 'should be' | 'becomes';" "BETWEEN := 'between';" "CHANCE-IN := 'chance in';" "EACH := 'each' | 'every' | 'all';" "EQUAL := 'equal to';" "FIRST := 'first';" "FLOW := 'flow' | 'move';" "FROM := 'from';" "IF := 'if';" "IN := 'in';" "IS := 'is' | 'are' | 'have' | 'has';" "LEAST := 'least';" "LESS := 'less' | 'fewer';" "MORE := 'more' | 'greater';" "MOST := 'most';" "NEIGHBOURS := 'neighbour' | 'neighbor' | 'neighbours' | 'neighbors';" "NONE := 'no';" "NOT := 'not';" "OR := 'or';" "SOME := 'some';" ;; SYMBOL is in the per-language file so that languages that use ;; (e.g.) Cyrillic characters can change the definition. "SYMBOL := #'[a-z]+';" "THAN := 'than';" "THEN := 'then';" "TO := 'to';" "WITH := 'with' | 'where' | 'having';" "WITHIN := 'within';"])) (defn keywords-for-locale "For now, just return `keywords-en`; plan is to have resource files of keywords for different languages in a resource directory, but that isn't done yet. It's probably not going to work easily for languages that use non-latin alphabets, anyway." ([] (keywords-for-locale (get-default))) ([^Locale _locale] keywords-en)) (defmacro build-parser "Compose this grammar fragment `g` with the common grammar fragments to make a complete grammar, and return a parser for that complete grammar." [g] `(parser (join "\n" [~g common-grammar (keywords-for-locale)]))) (def parse-rule "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." (build-parser rule-grammar)) (def parse-flow "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." (build-parser flow-grammar)) (defn parse "Top level parser function: parse this `text` as either a production or a flow rule; return a raw parse tree." [^String rule-text] (let [text (trim rule-text)] (when-not (zero? (count text)) (case (first (split text #"\s+")) "if" (parse-rule text) "flow" (parse-flow text) ";;" nil (throw (ex-info "Rule text was not recognised" {:text text})))))) (defn compile-rule "Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules, into Clojure source, and then compile it into an anonymous function object, getting round the problem of binding mw-engine.utils in the compiling environment. If `return-tuple?` is present and true, return a list comprising the anonymous function compiled, and the function from which it was compiled. Throws an exception if parsing fails." ([rule-text return-tuple?] (assert (string? rule-text)) (let [rule (trim rule-text) tree (simplify (parse-rule rule)) afn (if (rule? tree) (eval (generate tree)) ;; else (throw-parse-exception tree))] (if return-tuple? (list afn rule) ;; else afn))) ([rule-text] (compile-rule rule-text false)))