with a brake that I make with a string kind of thing...
It's a good kind of brake but it doesn't work yet.
This commit is contained in:
parent
d924ef17c6
commit
fb00ec31a0
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
/models
|
||||||
|
/target
|
||||||
|
/classes
|
||||||
|
/checkouts
|
||||||
|
profiles.clj
|
||||||
|
pom.xml
|
||||||
|
pom.xml.asc
|
||||||
|
*.jar
|
||||||
|
*.class
|
||||||
|
/.lein-*
|
||||||
|
/.nrepl-port
|
||||||
|
.hgignore
|
||||||
|
.hg/
|
|
@ -9,4 +9,8 @@
|
||||||
[wildwood "0.1.0-SNAPSHOT"]]
|
[wildwood "0.1.0-SNAPSHOT"]]
|
||||||
:main ^:skip-aot wwui.core
|
:main ^:skip-aot wwui.core
|
||||||
:target-path "target/%s"
|
:target-path "target/%s"
|
||||||
|
:plugins [[lein-cloverage "1.1.1"]
|
||||||
|
[lein-codox "0.10.7"]
|
||||||
|
[lein-cucumber "1.0.2"]
|
||||||
|
[lein-gorilla "0.4.0"]]
|
||||||
:profiles {:uberjar {:aot :all}})
|
:profiles {:uberjar {:aot :all}})
|
||||||
|
|
|
@ -27,42 +27,45 @@
|
||||||
[:noun]
|
[:noun]
|
||||||
["DT" :noun]
|
["DT" :noun]
|
||||||
[:adjectives :noun]
|
[:adjectives :noun]
|
||||||
["DT" :adjectives :noun]
|
["DT" :adjectives :noun]]
|
||||||
[:noun-phrase "CC" :noun-phrase]
|
;; :noun-phrases [[:noun-phrase]
|
||||||
[:noun-phrase "IN" :noun-phrase]
|
;; [:noun-phrase "CC" :noun-phrases]
|
||||||
[:noun-phrase "," :noun-phrase]]
|
;; [:noun-phrase "IN" :noun-phrases]
|
||||||
|
;; [:noun-phrase "," :noun-phrases]]
|
||||||
:adjective [["JJ"]["JJR"]["JJS"]]
|
:adjective [["JJ"]["JJR"]["JJS"]]
|
||||||
:adjectives [[:adjective]
|
:adjectives [[:adjective]
|
||||||
[:adjectives "CC" :adjective]]
|
[:adjective :adjectives]
|
||||||
|
[:adjective "," :adjectives]
|
||||||
|
[:adjective "CC" :adjectives]]
|
||||||
:verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]]
|
:verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]]
|
||||||
:adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs
|
:adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs
|
||||||
:adverbs [[:adverb]
|
:adverbs [[:adverb]
|
||||||
[:adverbs "," :adverb]
|
[:adverb "," :adverbs]
|
||||||
[:adverbs "CC" :adverb]]
|
[:adverb "CC" :adverbs]]
|
||||||
:verb-phrase [[:verb]
|
:verb-phrase [[:verb]
|
||||||
[:adverbs :verb]
|
[:adverbs :verb]
|
||||||
[:verb :adverb :verb]
|
[:verb :adverb :verb]
|
||||||
[:verb :adverbs]]
|
[:verb :adverbs]]
|
||||||
:locator [["IN" :noun-phrase]]
|
:locator [["IN" :noun-phrase]]
|
||||||
:locators [[:locator]
|
:locators [[:locator]
|
||||||
[:locator :locator]
|
[:locators :locator]
|
||||||
[:locator "," :locator]]
|
[:locators "," :locator]]
|
||||||
:subject [[:noun-phrase]]
|
:subject [[:noun-phrase]]
|
||||||
:object [[:noun-phrase]]
|
:object [[:noun-phrase]]
|
||||||
:proposition [[:subject :verb :object]
|
:proposition [[:subject :verb :object]
|
||||||
[:locators "," :subject :verb :object]
|
[:locator "," :subject :verb :object]
|
||||||
[:subject "," :locators "," :verb :object]
|
[:subject "," :locator "," :verb :object]
|
||||||
[:subject :verb-phrase :object :locators]]
|
[:subject :verb-phrase :object :locator]]
|
||||||
:propositions [[:proposition]
|
:propositions [[:proposition]
|
||||||
[:propositions "CC" :proposition]]})
|
[:proposition "CC" :propositions]]})
|
||||||
|
|
||||||
(declare recursive-descent-parser rdp-seek)
|
(declare reparse rdp-seek)
|
||||||
|
|
||||||
(defn rdp-seek
|
(defn rdp-seek
|
||||||
"Seek a phrase which satisfies this `goal` (expected to be a keyword) in
|
"Seek a phrase which satisfies this `goal` (expected to be a keyword) in
|
||||||
this `tagged-sentence` using this `grammar`.
|
this `tagged-sentence` using this `grammar`.
|
||||||
|
|
||||||
Return a sequence comprising
|
Return a cons comprising
|
||||||
1. the first matching phrase for the goal, tagged with the goal, or `nil` if
|
1. the first matching phrase for the goal, tagged with the goal, or `nil` if
|
||||||
no match;
|
no match;
|
||||||
2. the tail of the sentence when the parts comprising the phrase are removed."
|
2. the tail of the sentence when the parts comprising the phrase are removed."
|
||||||
|
@ -74,56 +77,82 @@
|
||||||
(sort
|
(sort
|
||||||
#(> (count %1) (count %2))
|
#(> (count %1) (count %2))
|
||||||
(map
|
(map
|
||||||
#(recursive-descent-parser tagged-sentence grammar %)
|
#(reparse tagged-sentence grammar %)
|
||||||
(goal grammar))))]
|
(goal grammar))))]
|
||||||
(cons (cons (first result) (list goal)) (rest result))))
|
(cons (cons (first result) (list goal)) (rest result))))
|
||||||
(throw (Exception. (str "Non-keyword passed to rdp-seek: " goal)))))
|
(throw (Exception. (str "Non-keyword passed to rdp-seek: `" goal "` (type " (or (type goal) "nil") ")")))))
|
||||||
|
|
||||||
;; (rdp-seek [["The" "DT"] ["Forum" "NNP"]] grammar :noun-phrase)
|
;; (rdp-seek [["The" "DT"] ["Forum" "NNP"]] grammar :noun-phrase)
|
||||||
;; (recursive-descent-parser [["The" "DT"] ["Forum" "NNP"]] grammar ["DT" "NNP"])
|
;; (reparse [["The" "DT"] ["Forum" "NNP"]] grammar ["DT" "NNP"])
|
||||||
;; (:noun-phrase grammar)
|
;; (:noun-phrase grammar)
|
||||||
|
|
||||||
|
|
||||||
(defmacro tag
|
(defmacro tag
|
||||||
"The tag, on a `tagged-token`, is just the second element. Written as a macro
|
"The tag, on a `tagged-token`, is just the second element. Written as a macro
|
||||||
for readability."
|
for readability."
|
||||||
[tagged-token]
|
[tagged-token]
|
||||||
`(nth ~tagged-token 1))
|
`(nth ~tagged-token 1))
|
||||||
|
|
||||||
|
(defmacro coll-or-nil?
|
||||||
|
[o]
|
||||||
|
"For fuck's sake, nil isn't a collection? What planet are these people on?"
|
||||||
|
`(or (nil? ~o) (coll? ~o)))
|
||||||
|
|
||||||
(defn rdp-extend
|
(defn rdp-extend
|
||||||
|
"Seek a phrase which satisfies this `goal` (expected to be a collection of tags) in
|
||||||
|
this `tagged-sentence` using this `grammar`.
|
||||||
|
|
||||||
|
Return a cons comprising
|
||||||
|
1. the first matching phrase for the goal, or `nil` if no match;
|
||||||
|
2. the tail of the sentence when the parts comprising the phrase are removed."
|
||||||
[tagged-sentence grammar goal]
|
[tagged-sentence grammar goal]
|
||||||
(l/info "Extending " goal " in " (with-out-str (pprint tagged-sentence)))
|
(l/info "Extending " goal " in " (with-out-str (pprint tagged-sentence)))
|
||||||
(cond
|
(cond
|
||||||
|
(not (coll-or-nil? goal))
|
||||||
|
(throw (Exception. (str "Non-collection passed to rdp-extend: `" goal "` (type " (or (type goal) "nil") ")")))
|
||||||
(empty? goal)
|
(empty? goal)
|
||||||
(cons (list) tagged-sentence)
|
(cons (list) tagged-sentence)
|
||||||
(not (empty? tagged-sentence))
|
(not (empty? tagged-sentence))
|
||||||
(let [[tt & st] tagged-sentence
|
(let [[tt & st] tagged-sentence
|
||||||
[target & gt] goal]
|
[target & gt] goal]
|
||||||
;; (pprint {:tagged-token tt
|
|
||||||
;; :sentence-tail st
|
|
||||||
;; :target target
|
|
||||||
;; :goal-tail gt})
|
|
||||||
(cond
|
(cond
|
||||||
(= target (tag tt))
|
(= target (tag tt))
|
||||||
(when-let [[dh & dt] (rdp-extend st grammar gt)]
|
(when-let [[dh & dt] (rdp-extend st grammar gt)]
|
||||||
(cons (cons tt dh) dt))
|
(cons (cons tt dh) dt))
|
||||||
(keyword? target)
|
(keyword? target)
|
||||||
(when-let [[dh & dt] (rdp-seek st grammar target)]
|
(when-let [[ph & pt] (rdp-seek tagged-sentence grammar target)] ;; it's this branch which is wrong. Getting it right, however, is not easy
|
||||||
(cons (cons tt dh) dt))))))
|
(let [[dh & dt] (rdp-extend st grammar pt)]
|
||||||
|
(cons (cons (cons ph dh) (list target)) dt)))))))
|
||||||
|
|
||||||
|
;; As of 23:00 on 20160427, I'm getting
|
||||||
|
;; (rdp-seek [["the" "DT"] ["forum" "NN"]] grammar :noun-phrase)
|
||||||
|
;; => (((["the" "DT"] (((["forum" "NN"]) :noun)) :noun) :noun-phrase))
|
||||||
|
;; it's wrong, but it's interestingly and probably tractably wrong.
|
||||||
|
|
||||||
|
;; Seeking a keyword target whose definition in the grammar comprises only
|
||||||
|
;; string targets seems to produce the right result...
|
||||||
|
;; but seeking a keyword target whose definition expands to further keyword
|
||||||
|
;; targets (e.g. :noun-phrase being defined in terms of :noun) blows up.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar [])
|
;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar [])
|
||||||
;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar ["DT"])
|
;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar ["DT"])
|
||||||
;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "NNP"])
|
;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "NNP"])
|
||||||
;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "FOO"])
|
;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "FOO"])
|
||||||
|
|
||||||
(defn recursive-descent-parser
|
(defn reparse
|
||||||
"Reparse this `tagged-sentence` using this grammar to seek this `goal`.
|
"Reparse this `tagged-sentence` using this grammar to seek this `goal`.
|
||||||
Parse greedily, seeking the most extended goal.
|
Parse greedily, seeking the most extended goal.
|
||||||
|
|
||||||
Return a sequence comprising
|
Return a sequence comprising
|
||||||
1. the first matching phrase for the goal, tagged with the goal, or `nil`
|
1. the first matching phrase for the goal, tagged with the goal, or `nil`
|
||||||
if no match;
|
if no match;
|
||||||
2. the tail of the sentence when the parts comprising the phrase are removed."
|
2. the tail of the sentence when the parts comprising the phrase are removed.
|
||||||
|
|
||||||
|
This function is called `reparse` because:
|
||||||
|
1. it is designed to parse sentences which have already been parsed by
|
||||||
|
OpenNLP: it will not work on raw sentences;
|
||||||
|
2. it is a recursive descent parser."
|
||||||
[tagged-sentence grammar goal]
|
[tagged-sentence grammar goal]
|
||||||
(l/info "Choosing strategy for " goal " in " (with-out-str (pprint tagged-sentence)))
|
(l/info "Choosing strategy for " goal " in " (with-out-str (pprint tagged-sentence)))
|
||||||
(cond
|
(cond
|
||||||
|
@ -131,7 +160,7 @@
|
||||||
;; nil
|
;; nil
|
||||||
(keyword? goal)
|
(keyword? goal)
|
||||||
(rdp-seek tagged-sentence grammar goal)
|
(rdp-seek tagged-sentence grammar goal)
|
||||||
(coll? goal)
|
(coll-or-nil? goal)
|
||||||
(rdp-extend tagged-sentence grammar goal)))
|
(rdp-extend tagged-sentence grammar goal)))
|
||||||
|
|
||||||
(defn propositions
|
(defn propositions
|
||||||
|
@ -139,7 +168,7 @@
|
||||||
sentence; if `knowledge-accessor` is passed, try to resolve names and noun
|
sentence; if `knowledge-accessor` is passed, try to resolve names and noun
|
||||||
phrases to entities known to that knowledge accessor."
|
phrases to entities known to that knowledge accessor."
|
||||||
([tagged-sentence]
|
([tagged-sentence]
|
||||||
(recursive-descent-parser tagged-sentence grammar :propositions))
|
(reparse tagged-sentence grammar :propositions))
|
||||||
([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
|
([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
|
||||||
knowledge-accessor]
|
knowledge-accessor]
|
||||||
;; TODO: doesn't work yet.
|
;; TODO: doesn't work yet.
|
||||||
|
@ -155,29 +184,29 @@
|
||||||
#(propositions (pos-tag (tokenize %)))
|
#(propositions (pos-tag (tokenize %)))
|
||||||
(get-sentences (slurp file-path))))))
|
(get-sentences (slurp file-path))))))
|
||||||
|
|
||||||
;; (recursive-descent-parser [] grammar :noun)
|
;; (reparse [] grammar :noun)
|
||||||
;; (rdp-seek (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun)
|
;; (rdp-seek (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun)
|
||||||
;; (coll? ["NPP"])
|
;; (coll? ["NPP"])
|
||||||
;; (recursive-descent-parser (pos-tag (tokenize "killed Caesar")) grammar :verb)
|
;; (reparse (pos-tag (tokenize "killed Caesar")) grammar :verb)
|
||||||
(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar :noun-phrase)
|
;; (reparse (pos-tag (tokenize "The Forum")) grammar :noun-phrase)
|
||||||
(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
|
;; (reparse (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
|
||||||
|
|
||||||
(recursive-descent-parser [["Forum" "NNP"]] grammar :noun-phrase)
|
;; (reparse [["Forum" "NNP"]] grammar :noun-phrase)
|
||||||
|
|
||||||
(map
|
;; (map
|
||||||
#(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar %)
|
;; #(reparse (pos-tag (tokenize "Forum")) grammar %)
|
||||||
(:noun-phrase grammar))
|
;; (:noun-phrase grammar))
|
||||||
|
|
||||||
(rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
|
;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
|
||||||
|
|
||||||
;; (nil nil
|
;; (nil nil
|
||||||
;; ((["The" "DT"]) ["Forum" "NNP"])
|
;; ((["The" "DT"]) ["Forum" "NNP"])
|
||||||
;; nil
|
;; nil
|
||||||
;; ((["The" "DT"]) ["Forum" "NNP"]) nil nil nil)
|
;; ((["The" "DT"]) ["Forum" "NNP"]) nil nil nil)
|
||||||
|
|
||||||
;; (recursive-descent-parser (pos-tag (tokenize "in the Forum")) grammar :locator)
|
;; (reparse (pos-tag (tokenize "in the Forum")) grammar :locator)
|
||||||
|
|
||||||
;; (recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
|
;; (reparse (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"])
|
||||||
|
|
||||||
;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" :noun])
|
;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" :noun])
|
||||||
;; (let [deeper (rdp-extend (pos-tag (tokenize "Forum on Sunday")) grammar ["NNP"])]
|
;; (let [deeper (rdp-extend (pos-tag (tokenize "Forum on Sunday")) grammar ["NNP"])]
|
||||||
|
|
|
@ -2,11 +2,7 @@
|
||||||
(:require [clojure.test :refer :all]
|
(:require [clojure.test :refer :all]
|
||||||
[wwui.propositions :refer :all]))
|
[wwui.propositions :refer :all]))
|
||||||
|
|
||||||
(deftest a-test
|
(deftest reparser-tests
|
||||||
(testing "FIXME, I fail."
|
|
||||||
(is (= 0 1))))
|
|
||||||
|
|
||||||
(deftest "RDP tests"
|
|
||||||
(testing "Simplest constructs"
|
(testing "Simplest constructs"
|
||||||
(is (= (recursive-descent-parser [] grammar :noun) nil))
|
(is (= (recursive-descent-parser [] grammar :noun) nil))
|
||||||
(is
|
(is
|
||||||
|
|
Loading…
Reference in a new issue