001 (ns ^{:doc "A very simple parser which parses production rules."
002 :author "Simon Brooke"}
003 mw-parser.core
004 (:use mw-engine.utils
005 [clojure.string :only [split trim triml]])
006 (:gen-class)
007 )
008
009 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
010 ;;;;
011 ;;;; mw-parser: a rule parser for MicroWorld.
012 ;;;;
013 ;;;; This program is free software; you can redistribute it and/or
014 ;;;; modify it under the terms of the GNU General Public License
015 ;;;; as published by the Free Software Foundation; either version 2
016 ;;;; of the License, or (at your option) any later version.
017 ;;;;
018 ;;;; This program is distributed in the hope that it will be useful,
019 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
020 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
021 ;;;; GNU General Public License for more details.
022 ;;;;
023 ;;;; You should have received a copy of the GNU General Public License
024 ;;;; along with this program; if not, write to the Free Software
025 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
026 ;;;; USA.
027 ;;;;
028 ;;;; Copyright (C) 2014 Simon Brooke
029 ;;;;
030 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
031 ;;;;
032 ;;;; A very simple parser which parses production rules of the following forms:
033 ;;;;
034 ;;;; * "if altitude is less than 100 and state is forest then state should be climax and deer should be 3"
035 ;;;; * "if altitude is 100 or fertility is 25 then state should be heath and fertility should be 24.3"
036 ;;;; * "if altitude is 100 or fertility is 25 then state should be heath"
037 ;;;; * "if deer is more than 2 and wolves is 0 and fertility is more than 20 then deer should be deer + 2"
038 ;;;; * "if deer is more than 1 and wolves is more than 1 then deer should be deer - wolves"
039 ;;;; * "if state is grassland and 4 neighbours have state equal to water then state should be village"
040 ;;;; * "if state is forest and fertility is between 55 and 75 then state should be climax"
041 ;;;; * "if 6 neighbours have state equal to water then state should be village"
042 ;;;; * "if state is in grassland or pasture or heath and 4 neighbours are water then state should be village"
043 ;;;; * "if state is forest or state is climax and some neighbours have state equal to fire then 3 in 5 chance that state should be fire"
044 ;;;; * "if state is pasture and more than 3 neighbours have state equal to scrub then state should be scrub"
045 ;;;; *
046 ;;;;
047 ;;;; it generates rules in the form expected by `mw-engine.core`, q.v.
048 ;;;;
049 ;;;; It is, as I say, very simple; it generates a complete rule, or it fails completely, returning nil.
050 ;;;; Very occasionally it generates a wrong rule - one which is not a correct translation of the rule
051 ;;;; semantics - but that is buggy behaviour, which I'll try to fix over the next few weeks, not a
052 ;;;; design fault.
053 ;;;;
054 ;;;; More significantly it does not generate useful error messages on failure.
055 ;;;;
056 ;;;; This parser is now obsolete, but is retained in the codebase for now in
057 ;;;; case it is of use to anyone. Prefer the declarative.clj parser.
058 ;;;;
059 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
060
061 (declare parse-conditions)
062 (declare parse-not-condition)
063 (declare parse-simple-condition)
064
065 ;; a regular expression which matches string representation of positive numbers
066 (def re-number #"^[0-9.]*$")
067
068 ;; error thrown when an attempt is made to set a reserved property
069 (def reserved-properties-error
070 "The properties 'x' and 'y' of a cell are reserved and should not be set in rule actions")
071 ;; error thrown when a rule cannot be parsed
072 (def bad-parse-error "I did not understand '%s'")
073
074 (defn- keyword-or-numeric
075 "If this token appears to represent an explicit number, return that number;
076 otherwise, make a keyword of it and return that."
077 [token]
078 (cond
079 (re-matches re-number token) (read-string token)
080 (keyword? token) token
081 true (keyword token)))
082
083 ;; Generally all functions in this file with names beginning 'parse-' take a
084 ;; sequence of tokens (and in some cases other optional arguments) and return a
085 ;; vector comprising
086 ;;
087 ;; 1. A code fragment parsed from the front of the sequence of tokens, and
088 ;; 2. the remaining tokens which were not consumed in constructing that fragment.
089 ;;
090 ;; In every case if the function cannot parse the desired construct from the
091 ;; front of the sequence of tokens it returns nil.
092
093
094 (defn parse-numeric-value
095 "Parse a number."
096 [[value & remainder]]
097 (if (and value (re-matches re-number value)) [(read-string value) remainder]))
098
099 (defn parse-property-int
100 "Parse a token assumed to be the name of a property of the current cell,
101 whose value is assumed to be an integer."
102 [[value & remainder]]
103 (if value [(list 'get-int 'cell (keyword value)) remainder]))
104
105 (defn parse-property-value
106 "Parse a token assumed to be the name of a property of the current cell."
107 [[value & remainder]]
108 (if value [(list (keyword value) 'cell) remainder]))
109
110 (defn parse-token-value
111 "Parse a token assumed to be a simple token value."
112 [[value & remainder]]
113 (if value [(keyword value) remainder]))
114
115 (defn parse-simple-value
116 "Parse a value from the first of these `tokens`. If `expect-int` is true, return
117 an integer or something which will evaluate to an integer."
118 ([tokens expect-int]
119 (or
120 (parse-numeric-value tokens)
121 (cond expect-int
122 (parse-property-int tokens)
123 true (parse-token-value tokens))))
124 ([tokens]
125 (parse-simple-value tokens false)))
126
127 (defn gen-token-value
128 "Parse a single value from this single token and return just the generated
129 code, not a pair."
130 [token expect-int]
131 (first (parse-simple-value (list token) expect-int)))
132
133 (defn parse-disjunct-value
134 "Parse a list of values from among these `tokens`. If `expect-int` is true, return
135 integers or things which will evaluate to integers."
136 [[OR token & tokens] expect-int]
137 (cond (member? OR '("or" "in"))
138 (let [value (first (parse-simple-value (list token) expect-int))
139 seek-others (= (first tokens) "or")]
140 (cond seek-others
141 (let [[others remainder] (parse-disjunct-value tokens expect-int)]
142 [(cons value others) remainder])
143 true
144 [(list value) tokens]))))
145
146 (defn parse-value
147 "Parse a value from among these `tokens`. If `expect-int` is true, return
148 an integer or something which will evaluate to an integer."
149 ([tokens expect-int]
150 (or
151 (parse-disjunct-value tokens expect-int)
152 (parse-simple-value tokens expect-int)))
153 ([tokens]
154 (parse-value tokens false)))
155
156 (defn parse-member-condition
157 "Parses a condition of the form '[property] in [value] or [value]...'"
158 [[property IS IN & rest]]
159 (if (and (member? IS '("is" "are")) (= IN "in"))
160 (let [[l remainder] (parse-disjunct-value (cons "in" rest) false)]
161 [(list 'member? (list (keyword property) 'cell) (list 'quote l)) remainder])))
162
163 (defn- parse-less-condition
164 "Parse '[property] less than [value]'."
165 [[property IS LESS THAN & rest]]
166 (cond (and (member? IS '("is" "are")) (member? LESS '("less" "fewer")) (= THAN "than"))
167 (let [[value remainder] (parse-value rest true)]
168 [(list '< (list 'get-int 'cell (keyword property)) value) remainder])))
169
170 (defn- parse-more-condition
171 "Parse '[property] more than [value]'."
172 [[property IS MORE THAN & rest]]
173 (cond (and (member? IS '("is" "are")) (member? MORE '("more" "greater")) (= THAN "than"))
174 (let [[value remainder] (parse-value rest true)]
175 [(list '> (list 'get-int 'cell (keyword property)) value) remainder])))
176
177 (defn- parse-between-condition
178 [[p IS BETWEEN v1 AND v2 & rest]]
179 (cond (and (member? IS '("is" "are")) (= BETWEEN "between") (= AND "and") (not (nil? v2)))
180 (let [property (first (parse-simple-value (list p) true))
181 value1 (first (parse-simple-value (list v1) true))
182 value2 (first (parse-simple-value (list v2) true))]
183 [(list 'or
184 (list '< value1 property value2)
185 (list '> value1 property value2)) rest])))
186
187 (defn- parse-is-condition
188 "Parse clauses of the form 'x is y', 'x is in y or z...',
189 'x is between y and z', 'x is more than y' or 'x is less than y'.
190 It is necessary to disambiguate whether value is a numeric or keyword."
191 [[property IS value & rest]]
192 (cond
193 (member? IS '("is" "are"))
194 (let [tokens (cons property (cons value rest))]
195 (cond
196 (re-matches re-number value) [(list '= (list 'get-int 'cell (keyword property)) (read-string value)) rest]
197 value [(list '= (list (keyword property) 'cell) (keyword value)) rest]))))
198
199 (defn- parse-not-condition
200 "Parse the negation of a simple condition."
201 [[property IS NOT & rest]]
202 (cond (and (member? IS '("is" "are")) (= NOT "not"))
203 (let [partial (parse-simple-condition (cons property (cons "is" rest)))]
204 (cond partial
205 (let [[condition remainder] partial]
206 [(list 'not condition) remainder])))))
207
208 (defn- gen-neighbours-condition
209 ([comp1 quantity property value remainder comp2 distance]
210 [(list comp1
211 (list 'count
212 (list 'get-neighbours-with-property-value 'world
213 '(cell :x) '(cell :y) distance
214 (keyword property) (keyword-or-numeric value) comp2))
215 quantity)
216 remainder])
217 ([comp1 quantity property value remainder comp2]
218 (gen-neighbours-condition comp1 quantity property value remainder comp2 1)))
219
220 (defn parse-comparator-neighbours-condition
221 "Parse conditions of the form '...more than 6 neighbours are [condition]'"
222 [[MORE THAN n NEIGHBOURS WITHIN distance have-or-are & rest]]
223 (let [quantity (first (parse-numeric-value (list n)))
224 comparator (cond (= MORE "more") '>
225 (member? MORE '("fewer" "less")) '<)]
226 (cond
227 (not= WITHIN "within")
228 (parse-comparator-neighbours-condition
229 (flatten
230 ;; two tokens were mis-parsed as 'within distance' that weren't
231 ;; actually 'within' and a distance. Splice in 'within 1' and try
232 ;; again.
233 (list MORE THAN n NEIGHBOURS "within" "1" WITHIN distance have-or-are rest)))
234 (and quantity
235 comparator
236 (= THAN "than")
237 (= NEIGHBOURS "neighbours"))
238 (cond
239 (= have-or-are "are")
240 (let [[value & remainder] rest
241 dist (gen-token-value distance true)]
242 (gen-neighbours-condition comparator quantity :state value remainder = dist))
243 (= have-or-are "have")
244 (let [[property comp1 comp2 value & remainder] rest
245 dist (gen-token-value distance true)]
246 (cond (and (= comp1 "equal") (= comp2 "to"))
247 (gen-neighbours-condition comparator quantity property
248 value remainder = dist)
249 (and (= comp1 "more") (= comp2 "than"))
250 (gen-neighbours-condition comparator quantity property
251 value remainder > dist)
252 (and (= comp1 "less") (= comp2 "than"))
253 (gen-neighbours-condition comparator quantity property
254 value remainder < dist)
255 ))))))
256
257 (defn parse-some-neighbours-condition
258 [[SOME NEIGHBOURS & rest]]
259 (cond
260 (and (= SOME "some") (= NEIGHBOURS "neighbours"))
261 (parse-comparator-neighbours-condition (concat '("more" "than" "0" "neighbours") rest))))
262
263 (defn parse-simple-neighbours-condition
264 "Parse conditions of the form '...6 neighbours are [condition]'"
265 [[n NEIGHBOURS WITHIN distance have-or-are & rest]]
266 (let [quantity (first (parse-numeric-value (list n)))]
267 (cond
268 (and quantity (= NEIGHBOURS "neighbours"))
269 (cond
270 (not= WITHIN "within")
271 (parse-simple-neighbours-condition
272 (flatten
273 ;; two tokens were mis-parsed as 'within distance' that weren't
274 ;; actually 'within' and a distance. Splice in 'within 1' and try
275 ;; again.
276 (list n NEIGHBOURS "within" "1" WITHIN distance have-or-are rest)))
277 (= have-or-are "are")
278 (let [[value & remainder] rest
279 dist (gen-token-value distance true)]
280 (gen-neighbours-condition '= quantity :state value remainder = dist))
281 (= have-or-are "have")
282 (let [[property comp1 comp2 value & remainder] rest
283 dist (gen-token-value distance true)]
284 (cond (and (= comp1 "equal") (= comp2 "to"))
285 (gen-neighbours-condition '= quantity property value remainder =
286 dist)
287 (and (= comp1 "more") (= comp2 "than"))
288 (gen-neighbours-condition '= quantity property value remainder >
289 dist)
290 (and (= comp1 "less") (= comp2 "than"))
291 (gen-neighbours-condition '= quantity property value remainder <
292 dist)
293 ))))))
294
295 (defn parse-neighbours-condition
296 "Parse conditions referring to neighbours"
297 [tokens]
298 (or
299 (parse-simple-neighbours-condition tokens)
300 (parse-comparator-neighbours-condition tokens)
301 (parse-some-neighbours-condition tokens)
302 ))
303
304 (defn parse-simple-condition
305 "Parse conditions of the form '[property] [comparison] [value]'."
306 [tokens]
307 (or
308 (parse-neighbours-condition tokens)
309 (parse-member-condition tokens)
310 (parse-not-condition tokens)
311 (parse-less-condition tokens)
312 (parse-more-condition tokens)
313 (parse-between-condition tokens)
314 (parse-is-condition tokens)))
315
316 (defn- parse-disjunction-condition
317 "Parse '... or [condition]' from `tokens`, where `left` is the already parsed first disjunct."
318 [left tokens]
319 (let [partial (parse-conditions tokens)]
320 (if partial
321 (let [[right remainder] partial]
322 [(list 'or left right) remainder]))))
323
324 (defn- parse-conjunction-condition
325 "Parse '... and [condition]' from `tokens`, where `left` is the already parsed first conjunct."
326 [left tokens]
327 (let [partial (parse-conditions tokens)]
328 (if partial
329 (let [[right remainder] partial]
330 [(list 'and left right) remainder]))))
331
332 (defn- parse-conditions
333 "Parse conditions from `tokens`, where conditions may be linked by either 'and' or 'or'."
334 [tokens]
335 (let [partial (parse-simple-condition tokens)]
336 (if partial
337 (let [[left [next & remainder]] partial]
338 (cond
339 (= next "and") (parse-conjunction-condition left remainder)
340 (= next "or") (parse-disjunction-condition left remainder)
341 true partial)))))
342
343 (defn- parse-left-hand-side
344 "Parse the left hand side ('if...') of a production rule."
345 [[IF & tokens]]
346 (if
347 (= IF "if")
348 (parse-conditions tokens)))
349
350 (defn- parse-arithmetic-action
351 "Parse actions of the form '[property] should be [property] [arithmetic-operator] [value]',
352 e.g. 'fertility should be fertility + 1', or 'deer should be deer - wolves'."
353 [previous [prop1 SHOULD BE prop2 operator value & rest]]
354 (cond
355 (member? prop1 '("x" "y"))
356 (throw
357 (Exception. reserved-properties-error))
358 (and (= SHOULD "should")
359 (= BE "be")
360 (member? operator '("+" "-" "*" "/")))
361 [(list 'merge (or previous 'cell)
362 {(keyword prop1) (list 'int
363 (list (symbol operator) (list 'get-int 'cell (keyword prop2))
364 (cond
365 (re-matches re-number value) (read-string value)
366 true (list 'get-int 'cell (keyword value)))))}) rest]))
367
368 (defn- parse-set-action
369 "Parse actions of the form '[property] should be [value].'"
370 [previous [property SHOULD BE value & rest]]
371 (cond
372 (member? property '("x" "y"))
373 (throw
374 (Exception. reserved-properties-error))
375 (and (= SHOULD "should") (= BE "be"))
376 [(list 'merge (or previous 'cell)
377 {(keyword property) (cond (re-matches re-number value) (read-string value) true (keyword value))}) rest]))
378
379 (defn- parse-simple-action [previous tokens]
380 (or (parse-arithmetic-action previous tokens)
381 (parse-set-action previous tokens)))
382
383 (defn- parse-actions
384 "Parse actions from tokens."
385 [previous tokens]
386 (let [[left remainder] (parse-simple-action previous tokens)]
387 (cond left
388 (cond (= (first remainder) "and")
389 (parse-actions left (rest remainder))
390 true (list left)))))
391
392 (defn- parse-probability
393 "Parse a probability of an action from this collection of tokens"
394 [previous [n CHANCE IN m & tokens]]
395 (cond
396 (and (= CHANCE "chance")(= IN "in"))
397 (let [[action remainder] (parse-actions previous tokens)]
398 (cond action
399 [(list 'cond
400 (list '<
401 (list 'rand
402 (first (parse-simple-value (list m) true)))
403 (first (parse-simple-value (list n) true)))
404 action) remainder]))))
405
406 (defn- parse-right-hand-side
407 "Parse the right hand side ('then...') of a production rule."
408 [[THEN & tokens]]
409 (if (= THEN "then")
410 (or
411 (parse-probability nil tokens)
412 (parse-actions nil tokens))))
413
414 (defn parse-rule
415 "Parse a complete rule from this `line`, expected to be either a string or a
416 sequence of string tokens. Return the rule in the form of an S-expression.
417
418 Throws an exception if parsing fails."
419 [line]
420 (cond
421 (string? line)
422 (let [rule (parse-rule (split (triml line) #"\s+"))]
423 (cond rule rule
424 true (throw (Exception. (format bad-parse-error line)))))
425 true
426 (let [[left remainder] (parse-left-hand-side line)
427 [right junk] (parse-right-hand-side remainder)]
428 (cond
429 ;; there should be a valide left hand side and a valid right hand side
430 ;; there shouldn't be anything left over (junk should be empty)
431 (and left right (empty? junk))
432 (list 'fn ['cell 'world] (list 'if left right))))))
433
434 (defn compile-rule
435 "Parse this `rule-text`, a string conforming to the grammar of MicroWorld rules,
436 into Clojure source, and then compile it into an anonymous
437 function object, getting round the problem of binding mw-engine.utils in
438 the compiling environment. If `return-tuple?` is present and true, return
439 a list comprising the anonymous function compiled, and the function from
440 which it was compiled.
441
442 Throws an exception if parsing fails."
443 ([rule-text return-tuple?]
444 (do
445 (use 'mw-engine.utils)
446 (let [afn (eval (parse-rule rule-text))]
447 (cond
448 (and afn return-tuple?)(list afn (trim rule-text))
449 true afn))))
450 ([rule-text]
451 (compile-rule rule-text false)))