with a brake that I make with a string kind of thing...

It's a good kind of brake but it doesn't work yet.
This commit is contained in:
Simon Brooke 2020-04-27 23:23:50 +01:00
parent d924ef17c6
commit fb00ec31a0
No known key found for this signature in database
GPG key ID: A7A4F18D1D4DF987
4 changed files with 87 additions and 45 deletions

13
.gitignore vendored Normal file
View file

@ -0,0 +1,13 @@
/models
/target
/classes
/checkouts
profiles.clj
pom.xml
pom.xml.asc
*.jar
*.class
/.lein-*
/.nrepl-port
.hgignore
.hg/

View file

@ -9,4 +9,8 @@
[wildwood "0.1.0-SNAPSHOT"]] [wildwood "0.1.0-SNAPSHOT"]]
:main ^:skip-aot wwui.core :main ^:skip-aot wwui.core
:target-path "target/%s" :target-path "target/%s"
:plugins [[lein-cloverage "1.1.1"]
[lein-codox "0.10.7"]
[lein-cucumber "1.0.2"]
[lein-gorilla "0.4.0"]]
:profiles {:uberjar {:aot :all}}) :profiles {:uberjar {:aot :all}})

View file

@ -27,42 +27,45 @@
[:noun] [:noun]
["DT" :noun] ["DT" :noun]
[:adjectives :noun] [:adjectives :noun]
["DT" :adjectives :noun] ["DT" :adjectives :noun]]
[:noun-phrase "CC" :noun-phrase] ;; :noun-phrases [[:noun-phrase]
[:noun-phrase "IN" :noun-phrase] ;; [:noun-phrase "CC" :noun-phrases]
[:noun-phrase "," :noun-phrase]] ;; [:noun-phrase "IN" :noun-phrases]
;; [:noun-phrase "," :noun-phrases]]
:adjective [["JJ"]["JJR"]["JJS"]] :adjective [["JJ"]["JJR"]["JJS"]]
:adjectives [[:adjective] :adjectives [[:adjective]
[:adjectives "CC" :adjective]] [:adjective :adjectives]
[:adjective "," :adjectives]
[:adjective "CC" :adjectives]]
:verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]] :verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]]
:adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs :adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs
:adverbs [[:adverb] :adverbs [[:adverb]
[:adverbs "," :adverb] [:adverb "," :adverbs]
[:adverbs "CC" :adverb]] [:adverb "CC" :adverbs]]
:verb-phrase [[:verb] :verb-phrase [[:verb]
[:adverbs :verb] [:adverbs :verb]
[:verb :adverb :verb] [:verb :adverb :verb]
[:verb :adverbs]] [:verb :adverbs]]
:locator [["IN" :noun-phrase]] :locator [["IN" :noun-phrase]]
:locators [[:locator] :locators [[:locator]
[:locator :locator] [:locators :locator]
[:locator "," :locator]] [:locators "," :locator]]
:subject [[:noun-phrase]] :subject [[:noun-phrase]]
:object [[:noun-phrase]] :object [[:noun-phrase]]
:proposition [[:subject :verb :object] :proposition [[:subject :verb :object]
[:locators "," :subject :verb :object] [:locator "," :subject :verb :object]
[:subject "," :locators "," :verb :object] [:subject "," :locator "," :verb :object]
[:subject :verb-phrase :object :locators]] [:subject :verb-phrase :object :locator]]
:propositions [[:proposition] :propositions [[:proposition]
[:propositions "CC" :proposition]]}) [:proposition "CC" :propositions]]})
(declare recursive-descent-parser rdp-seek) (declare reparse rdp-seek)
(defn rdp-seek (defn rdp-seek
"Seek a phrase which satisfies this `goal` (expected to be a keyword) in "Seek a phrase which satisfies this `goal` (expected to be a keyword) in
this `tagged-sentence` using this `grammar`. this `tagged-sentence` using this `grammar`.
Return a sequence comprising Return a cons comprising
1. the first matching phrase for the goal, tagged with the goal, or `nil` if 1. the first matching phrase for the goal, tagged with the goal, or `nil` if
no match; no match;
2. the tail of the sentence when the parts comprising the phrase are removed." 2. the tail of the sentence when the parts comprising the phrase are removed."
@ -74,56 +77,82 @@
(sort (sort
#(> (count %1) (count %2)) #(> (count %1) (count %2))
(map (map
#(recursive-descent-parser tagged-sentence grammar %) #(reparse tagged-sentence grammar %)
(goal grammar))))] (goal grammar))))]
(cons (cons (first result) (list goal)) (rest result)))) (cons (cons (first result) (list goal)) (rest result))))
(throw (Exception. (str "Non-keyword passed to rdp-seek: " goal))))) (throw (Exception. (str "Non-keyword passed to rdp-seek: `" goal "` (type " (or (type goal) "nil") ")")))))
;; (rdp-seek [["The" "DT"] ["Forum" "NNP"]] grammar :noun-phrase) ;; (rdp-seek [["The" "DT"] ["Forum" "NNP"]] grammar :noun-phrase)
;; (recursive-descent-parser [["The" "DT"] ["Forum" "NNP"]] grammar ["DT" "NNP"]) ;; (reparse [["The" "DT"] ["Forum" "NNP"]] grammar ["DT" "NNP"])
;; (:noun-phrase grammar) ;; (:noun-phrase grammar)
(defmacro tag (defmacro tag
"The tag, on a `tagged-token`, is just the second element. Written as a macro "The tag, on a `tagged-token`, is just the second element. Written as a macro
for readability." for readability."
[tagged-token] [tagged-token]
`(nth ~tagged-token 1)) `(nth ~tagged-token 1))
(defmacro coll-or-nil?
[o]
"For fuck's sake, nil isn't a collection? What planet are these people on?"
`(or (nil? ~o) (coll? ~o)))
(defn rdp-extend (defn rdp-extend
"Seek a phrase which satisfies this `goal` (expected to be a collection of tags) in
this `tagged-sentence` using this `grammar`.
Return a cons comprising
1. the first matching phrase for the goal, or `nil` if no match;
2. the tail of the sentence when the parts comprising the phrase are removed."
[tagged-sentence grammar goal] [tagged-sentence grammar goal]
(l/info "Extending " goal " in " (with-out-str (pprint tagged-sentence))) (l/info "Extending " goal " in " (with-out-str (pprint tagged-sentence)))
(cond (cond
(not (coll-or-nil? goal))
(throw (Exception. (str "Non-collection passed to rdp-extend: `" goal "` (type " (or (type goal) "nil") ")")))
(empty? goal) (empty? goal)
(cons (list) tagged-sentence) (cons (list) tagged-sentence)
(not (empty? tagged-sentence)) (not (empty? tagged-sentence))
(let [[tt & st] tagged-sentence (let [[tt & st] tagged-sentence
[target & gt] goal] [target & gt] goal]
;; (pprint {:tagged-token tt
;; :sentence-tail st
;; :target target
;; :goal-tail gt})
(cond (cond
(= target (tag tt)) (= target (tag tt))
(when-let [[dh & dt] (rdp-extend st grammar gt)] (when-let [[dh & dt] (rdp-extend st grammar gt)]
(cons (cons tt dh) dt)) (cons (cons tt dh) dt))
(keyword? target) (keyword? target)
(when-let [[dh & dt] (rdp-seek st grammar target)] (when-let [[ph & pt] (rdp-seek tagged-sentence grammar target)] ;; it's this branch which is wrong. Getting it right, however, is not easy
(cons (cons tt dh) dt)))))) (let [[dh & dt] (rdp-extend st grammar pt)]
(cons (cons (cons ph dh) (list target)) dt)))))))
;; As of 23:00 on 20160427, I'm getting
;; (rdp-seek [["the" "DT"] ["forum" "NN"]] grammar :noun-phrase)
;; => (((["the" "DT"] (((["forum" "NN"]) :noun)) :noun) :noun-phrase))
;; it's wrong, but it's interestingly and probably tractably wrong.
;; Seeking a keyword target whose definition in the grammar comprises only
;; string targets seems to produce the right result...
;; but seeking a keyword target whose definition expands to further keyword
;; targets (e.g. :noun-phrase being defined in terms of :noun) blows up.
;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar []) ;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar [])
;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar ["DT"]) ;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar ["DT"])
;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "NNP"]) ;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "NNP"])
;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "FOO"]) ;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "FOO"])
(defn recursive-descent-parser (defn reparse
"Reparse this `tagged-sentence` using this grammar to seek this `goal`. "Reparse this `tagged-sentence` using this grammar to seek this `goal`.
Parse greedily, seeking the most extended goal. Parse greedily, seeking the most extended goal.
Return a sequence comprising Return a sequence comprising
1. the first matching phrase for the goal, tagged with the goal, or `nil` 1. the first matching phrase for the goal, tagged with the goal, or `nil`
if no match; if no match;
2. the tail of the sentence when the parts comprising the phrase are removed." 2. the tail of the sentence when the parts comprising the phrase are removed.
This function is called `reparse` because:
1. it is designed to parse sentences which have already been parsed by
OpenNLP: it will not work on raw sentences;
2. it is a recursive descent parser."
[tagged-sentence grammar goal] [tagged-sentence grammar goal]
(l/info "Choosing strategy for " goal " in " (with-out-str (pprint tagged-sentence))) (l/info "Choosing strategy for " goal " in " (with-out-str (pprint tagged-sentence)))
(cond (cond
@ -131,7 +160,7 @@
;; nil ;; nil
(keyword? goal) (keyword? goal)
(rdp-seek tagged-sentence grammar goal) (rdp-seek tagged-sentence grammar goal)
(coll? goal) (coll-or-nil? goal)
(rdp-extend tagged-sentence grammar goal))) (rdp-extend tagged-sentence grammar goal)))
(defn propositions (defn propositions
@ -139,7 +168,7 @@
sentence; if `knowledge-accessor` is passed, try to resolve names and noun sentence; if `knowledge-accessor` is passed, try to resolve names and noun
phrases to entities known to that knowledge accessor." phrases to entities known to that knowledge accessor."
([tagged-sentence] ([tagged-sentence]
(recursive-descent-parser tagged-sentence grammar :propositions)) (reparse tagged-sentence grammar :propositions))
([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor ([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
knowledge-accessor] knowledge-accessor]
;; TODO: doesn't work yet. ;; TODO: doesn't work yet.
@ -155,29 +184,29 @@
#(propositions (pos-tag (tokenize %))) #(propositions (pos-tag (tokenize %)))
(get-sentences (slurp file-path)))))) (get-sentences (slurp file-path))))))
;; (recursive-descent-parser [] grammar :noun) ;; (reparse [] grammar :noun)
;; (rdp-seek (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun) ;; (rdp-seek (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun)
;; (coll? ["NPP"]) ;; (coll? ["NPP"])
;; (recursive-descent-parser (pos-tag (tokenize "killed Caesar")) grammar :verb) ;; (reparse (pos-tag (tokenize "killed Caesar")) grammar :verb)
(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar :noun-phrase) ;; (reparse (pos-tag (tokenize "The Forum")) grammar :noun-phrase)
(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"]) ;; (reparse (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
(recursive-descent-parser [["Forum" "NNP"]] grammar :noun-phrase) ;; (reparse [["Forum" "NNP"]] grammar :noun-phrase)
(map ;; (map
#(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar %) ;; #(reparse (pos-tag (tokenize "Forum")) grammar %)
(:noun-phrase grammar)) ;; (:noun-phrase grammar))
(rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"]) ;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
;; (nil nil ;; (nil nil
;; ((["The" "DT"]) ["Forum" "NNP"]) ;; ((["The" "DT"]) ["Forum" "NNP"])
;; nil ;; nil
;; ((["The" "DT"]) ["Forum" "NNP"]) nil nil nil) ;; ((["The" "DT"]) ["Forum" "NNP"]) nil nil nil)
;; (recursive-descent-parser (pos-tag (tokenize "in the Forum")) grammar :locator) ;; (reparse (pos-tag (tokenize "in the Forum")) grammar :locator)
;; (recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"]) ;; (reparse (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" :noun]) ;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" :noun])
;; (let [deeper (rdp-extend (pos-tag (tokenize "Forum on Sunday")) grammar ["NNP"])] ;; (let [deeper (rdp-extend (pos-tag (tokenize "Forum on Sunday")) grammar ["NNP"])]

View file

@ -2,11 +2,7 @@
(:require [clojure.test :refer :all] (:require [clojure.test :refer :all]
[wwui.propositions :refer :all])) [wwui.propositions :refer :all]))
(deftest a-test (deftest reparser-tests
(testing "FIXME, I fail."
(is (= 0 1))))
(deftest "RDP tests"
(testing "Simplest constructs" (testing "Simplest constructs"
(is (= (recursive-descent-parser [] grammar :noun) nil)) (is (= (recursive-descent-parser [] grammar :noun) nil))
(is (is