diff --git a/README.md b/README.md
index f7c44f7..b26bfab 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ Download from http://example.com/FIXME.
## Usage
-**WARNING**: does not even nearly work yet, and may nver do so. The value of this archive is probably mainly as something to mine for algorithms and ideas, especially for those doing natural language interpreteation and generation.
+**WARNING**: does not even nearly work yet, and may never do so. The value of this archive is probably mainly as something to mine for algorithms and ideas, especially for those doing natural language interpreteation and generation.
**WARNING**: nothing in this is stable. Nothing should be treated as an API. Feel free to steal ideas and code, but don't depend on `wwui` as a library.
diff --git a/docs/cloverage/index.html b/docs/cloverage/index.html
index ed8ae42..529264a 100644
--- a/docs/cloverage/index.html
+++ b/docs/cloverage/index.html
@@ -30,7 +30,7 @@
- wwui.propositions 484
+
wwui/parser.clj
+
+
+
+ 001 (ns wwui.parser
+
+
+ 002 (:require [clojure.math.combinatorics :as combi]
+
+
+ 003 [clojure.pprint :refer [pprint]]
+
+
+ 004 [clojure.string :as s]
+
+
+ 005 [opennlp.nlp :as nlp]
+
+
+ 006 [opennlp.treebank :as tb]
+
+
+ 007 [taoensso.timbre :as log :refer [debug error info spy]]
+
+
+ 008 [wildwood.knowledge-accessor :refer [Accessor]]))
+
+
+ 009
+
+
+ 010 ;; Position tags used by OpenNLP for English are documented here:
+
+
+ 011 ;; https://dpdearing.com/posts/2011/12/opennlp-part-of-speech-pos-tags-penn-english-treebank/
+
+
+ 012
+
+
+ 013 (def get-sentences (nlp/make-sentence-detector "models/en-sent.bin"))
+
+
+ 014 (def tokenize (nlp/make-tokenizer "models/en-token.bin"))
+
+
+ 015 (def pos-tag (nlp/make-pos-tagger "models/en-pos-maxent.bin"))
+
+
+ 016 (def name-find (nlp/make-name-finder "models/namefind/en-ner-person.bin"))
+
+
+ 017 ;; (def chunker (make-treebank-chunker "models/en-chunker.bin"))
+
+
+ 018
+
+
+ 019 (def grammar
+
+
+ 020 "The objective of this grammar is to allow us to take a sequence of tagged symbols, and
+
+
+ 021 produce a higher-level tagging of parts of speech, and ultimately propositions, from them.
+
+
+ 022
+
+
+ 023 *NOTE THAT* tags in this grammar are always keywords, to distinguish them from OpenNLP
+
+
+ 024 tags, which tag individual tokens and are represented as strings."
+
+
+ 025 {:contextual-reference [["PRP"]] ;; the documentation says PRP is 'peronal pronoun',
+
+
+ 026 ;; but it seems to be all pronouns.
+
+
+ 027 :noun [["NN"]["NNS"]["NNP"]["NNPS"]]
+
+
+ 028 :full-name [["NNP"]
+
+
+ 029 ["NNP" :full-name]] ;; an unpunctuated sequence of proper nouns
+
+
+ 030 ;; probably represents a full name
+
+
+ 031 :noun-phrase [[:contextual-reference]
+
+
+ 032 [:noun]
+
+
+ 033 [:full-name]
+
+
+ 034 ["DT" :noun]
+
+
+ 035 [:adjectives :noun]
+
+
+ 036 ["DT" :adjectives :noun]]
+
+
+ 037 :noun-phrases [[:noun-phrase]
+
+
+ 038 [:noun-phrase "CC" :noun-phrases]
+
+
+ 039 [:noun-phrase "," :noun-phrases]]
+
+
+ 040 :adjective [["JJ"]["JJR"]["JJS"]]
+
+
+ 041 :adjectives [[:adjective]
+
+
+ 042 [:adjective :adjectives]
+
+
+ 043 [:adjective "," :adjectives]
+
+
+ 044 [:adjective "CC" :adjectives]]
+
+
+ 045 :verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]]
+
+
+ 046 :adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs
+
+
+ 047 :adverbs [[:adverb]
+
+
+ 048 [:adverb "," :adverbs]
+
+
+ 049 [:adverb "CC" :adverbs]]
+
+
+ 050 :verb-phrase [[:verb]
+
+
+ 051 [:adverbs :verb]
+
+
+ 052 [:verb :adverbs :verb]
+
+
+ 053 [:verb :adverbs]
+
+
+ 054 [:verb :adverbs :verb "TO"]]
+
+
+ 055 :locator [["IN" :noun-phrases]]
+
+
+ 056 :locators [[:locator]
+
+
+ 057 [:locator :locators]
+
+
+ 058 [:locator "," :locators]]
+
+
+ 059 :location [[:locators]]
+
+
+ 060 :subject [[:noun-phrases]]
+
+
+ 061 :object [[:noun-phrases]]
+
+
+ 062 :proposition [[:subject :verb-phrase :object]
+
+
+ 063 [:location "," :subject :verb-phrase :object]
+
+
+ 064 [:subject "," :location "," :verb-phrase :object]
+
+
+ 065 [:subject :verb-phrase :object :location]]
+
+
+ 066 :propositions [[:proposition]
+
+
+ 067 [:proposition "CC" :propositions]
+
+
+ 068 [:proposition "," "CC" :propositions]]})
+
+
+ 069
+
+
+ 070 (declare reparse rdp-seek)
+
+
+ 071
+
+
+ 072 (defn rdp-seek
+
+
+ 073 "Seek a phrase which satisfies this `goal` (expected to be a keyword) in
+
+
+ 074 this `tagged-sentence` using this `grammar`.
+
+
+ 075
+
+
+ 076 Return a cons comprising
+
+
+ 077 1. the first matching phrase for the goal, tagged with the goal, or `nil` if
+
+
+ 078 no match;
+
+
+ 079 2. the tail of the sentence when the parts comprising the phrase are removed."
+
+
+ 080 [tagged-sentence grammar goal]
+
+
+ 081 (if (keyword? goal)
+
+
+ 082 (when (not (empty? tagged-sentence))
+
+
+ 083 (when-let [result (first
+
+
+ 084 (sort
+
+
+ 085 #(< (count %1) (count %2))
+
+
+ 086 (remove
+
+
+ 087 empty?
+
+
+ 088 (map
+
+
+ 089 #(reparse tagged-sentence grammar %)
+
+
+ 090 (goal grammar)))))]
+
+
+ 091 (cons (cons (first result) (list goal)) (rest result))))
+
+
+ 092 (throw (Exception. (str "Non-keyword passed to rdp-seek: `" goal "` (type " (or (type goal) "nil") ")")))))
+
+
+ 093
+
+
+ 094 (defmacro tag
+
+
+ 095 "The tag, on a `tagged-token`, is just the second element. Written as a macro
+
+
+ 096 for readability."
+
+
+ 097 [tagged-token]
+
+
+ 098 `(nth ~tagged-token 1))
+
+
+ 099
+
+
+ 100 (defmacro coll-or-nil?
+
+
+ 101 [o]
+
+
+ 102 "For fuck's sake, `nil` isn't a collection? What planet are these people on?"
+
+
+ 103 `(or (nil? ~o) (coll? ~o)))
+
+
+ 104
+
+
+ 105 (defn rdp-extend
+
+
+ 106 "Seek a phrase which satisfies this `goal` (expected to be a collection of tags) in
+
+
+ 107 this `tagged-sentence` using this `grammar`.
+
+
+ 108
+
+
+ 109 Return a cons comprising
+
+
+ 110 1. the first matching phrase for the goal, or `nil` if no match;
+
+
+ 111 2. the tail of the sentence when the parts comprising the phrase are removed."
+
+
+ 112 [tagged-sentence grammar goal]
+
+
+ 113 (cond
+
+
+ 114 (not (coll-or-nil? goal))
+
+
+ 115 (throw (Exception. (str "Non-collection passed to rdp-extend: `" goal "` (type " (or (type goal) "nil") ")")))
+
+
+ 116 (empty? goal)
+
+
+ 117 (cons (list) tagged-sentence)
+
+
+ 118 (not (empty? tagged-sentence))
+
+
+ 119 (let [[tt & st] tagged-sentence
+
+
+ 120 [target & gt] goal]
+
+
+ 121 (cond
+
+
+ 122 (keyword? target)
+
+
+ 123 (when-let [[h & t](reparse tagged-sentence grammar target)]
+
+
+ 124 (when-let [[dh & dt] (reparse t grammar gt)]
+
+
+ 125 (cons (cons h dh) dt)))
+
+
+ 126 (= target (tag tt))
+
+
+ 127 (when-let [[dh & dt] (reparse st grammar gt)]
+
+
+ 128 (cons (cons tt dh) dt))))))
+
+
+ 129
+
+
+ 130 (defn reparse
+
+
+ 131 "Reparse this `tagged-sentence` using this grammar to seek this `goal`.
+
+
+ 132 Parse greedily, seeking the most extended goal.
+
+
+ 133
+
+
+ 134 Return a sequence comprising
+
+
+ 135 1. the first matching phrase for the goal, tagged with the goal, or `nil`
+
+
+ 136 if no match;
+
+
+ 137 2. the tail of the sentence when the parts comprising the phrase are removed.
+
+
+ 138
+
+
+ 139 This function is called `reparse` because:
+
+
+ 140 1. it is designed to parse sentences which have already been parsed by
+
+
+ 141 OpenNLP: it will not work on raw sentences;
+
+
+ 142 2. it is a recursive descent parser."
+
+
+ 143 [tagged-sentence grammar goal]
+
+
+ 144 (log/debug "=> Choosing strategy for "
+
+
+ 145 goal " in " (with-out-str (pprint tagged-sentence)))
+
+
+ 146 (let [r (cond
+
+
+ 147 (keyword? goal) (rdp-seek tagged-sentence grammar goal)
+
+
+ 148 (coll-or-nil? goal) (rdp-extend tagged-sentence grammar goal))]
+
+
+ 149 (log/debug "<= " goal " in "
+
+
+ 150 (s/trim (with-out-str (pprint tagged-sentence)))
+
+
+ 151 " returned " (s/trim (with-out-str (pprint r))))
+
+
+ 152 r))
+
+
+ 153
+
+
+ 154 (defn identify
+
+
+ 155 [parse-tree knowledge-accessor]
+
+
+ 156 ;; TODO: we don't yet have a working knowledge accessor. When we do,
+
+
+ 157 ;; construct a query from the contents of this parse-tree, and pass it
+
+
+ 158 ;; to the knowledge accessor in the hope of finding a true name.
+
+
+ 159 parse-tree)
+
+
+ 160
+
+
+ 161 (defn normalise
+
+
+ 162 [parse-tree ka]
+
+
+ 163 (if
+
+
+ 164 (and (coll? parse-tree) (= (count parse-tree) 2)(keyword? (nth parse-tree 1)))
+
+
+ 165 (case (nth parse-tree 1)
+
+
+ 166 :proposition (list
+
+
+ 167 (reduce
+
+
+ 168 merge
+
+
+ 169 {}
+
+
+ 170 (map
+
+
+ 171 ;; TODO: use combinatorics to extract all propositions from
+
+
+ 172 ;; a proposition having multiple locations, multiple subject,
+
+
+ 173 ;; objects and/or verbs
+
+
+ 174 #(assoc {} (nth % 1) (identify (first %) ka))
+
+
+ 175 (map #(normalise % ka) (first parse-tree)))))
+
+
+ 176 (:location :subject :object)
+
+
+ 177 (cons
+
+
+ 178 (reduce
+
+
+ 179 concat
+
+
+ 180 (remove
+
+
+ 181 empty?
+
+
+ 182 (map #(normalise % ka) (first parse-tree))))
+
+
+ 183 (list (nth parse-tree 1)))
+
+
+ 184 (:propositions :locators :noun-phrases :verbs)
+
+
+ 185 (reduce
+
+
+ 186 concat
+
+
+ 187 (remove
+
+
+ 188 empty?
+
+
+ 189 (map #(normalise % ka) (first parse-tree))))
+
+
+ 190 ;; else
+
+
+ 191 parse-tree)
+
+
+ 192 parse-tree))
+
+
+ 193
+
+
+ 194 (defn propositions
+
+
+ 195 "Given a `tagged-sentence`, return a list of propositions detected in that
+
+
+ 196 sentence; if `knowledge-accessor` is passed, try to resolve names and noun
+
+
+ 197 phrases to entities known to that knowledge accessor.
+
+
+ 198
+
+
+ 199 TODO: Note that if `:subject`, `:object` or `:locator` resolves to multiple
+
+
+ 200 objects, then that is essentially one proposition for each unique
+
+
+ 201 combination. This is not yet implemented!"
+
+
+ 202 ([tagged-sentence]
+
+
+ 203 (propositions tagged-sentence nil))
+
+
+ 204 ([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
+
+
+ 205 knowledge-accessor]
+
+
+ 206 ;; TODO: doesn't work yet.
+
+
+ 207 (reduce
+
+
+ 208 concat
+
+
+ 209 (remove
+
+
+ 210 empty?
+
+
+ 211 (map
+
+
+ 212 #(normalise % knowledge-accessor)
+
+
+ 213 (first (first (reparse tagged-sentence grammar :propositions))))))))
+
+
+ 214
+
+
+ 215 (defn propositions-from-file
+
+
+ 216 [file-path]
+
+
+ 217 (reduce
+
+
+ 218 concat
+
+
+ 219 (remove
+
+
+ 220 empty?
+
+
+ 221 (map
+
+
+ 222 #(propositions (pos-tag (tokenize %)))
+
+
+ 223 (get-sentences (slurp file-path))))))
+
+
+ 224
+
+
+ 225 ;; (reparse (pos-tag (tokenize "True love is the daughter of temperance, and temperance is utterly opposed to the madness of bodily pleasure.")) grammar :propositions)
+
+
+ 226 ;; (reparse [["temperance" "NN"] ["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :subject)
+
+
+ 227 ;; (reparse [["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :verb-phrase)
+
+
+ 228 ;; (reparse [["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :verb-phrase)
+
+
+