Actually added the documentation to the repository. D'oh!
This commit is contained in:
parent
12a2eb71c8
commit
38a4610f0e
9 changed files with 757 additions and 8 deletions
|
|
@ -30,7 +30,7 @@
|
|||
<td class="with-number">7</td><td class="with-number">1</td><td class="with-number">3</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><a href="wwui/propositions.clj.html">wwui.propositions</a></td><td class="with-bar"><div class="covered"
|
||||
<td><a href="wwui/parser.clj.html">wwui.parser</a></td><td class="with-bar"><div class="covered"
|
||||
style="width:58.17307692307692%;
|
||||
float:left;"> 484 </div><div class="not-covered"
|
||||
style="width:41.82692307692308%;
|
||||
|
|
|
|||
692
docs/cloverage/wwui/parser.clj.html
Normal file
692
docs/cloverage/wwui/parser.clj.html
Normal file
|
|
@ -0,0 +1,692 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
||||
<link rel="stylesheet" href="../coverage.css"/> <title> wwui/parser.clj </title>
|
||||
</head>
|
||||
<body>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
001 (ns wwui.parser
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
002 (:require [clojure.math.combinatorics :as combi]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
003 [clojure.pprint :refer [pprint]]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
004 [clojure.string :as s]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
005 [opennlp.nlp :as nlp]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
006 [opennlp.treebank :as tb]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
007 [taoensso.timbre :as log :refer [debug error info spy]]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
008 [wildwood.knowledge-accessor :refer [Accessor]]))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
009
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
010 ;; Position tags used by OpenNLP for English are documented here:
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
011 ;; https://dpdearing.com/posts/2011/12/opennlp-part-of-speech-pos-tags-penn-english-treebank/
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
012
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
013 (def get-sentences (nlp/make-sentence-detector "models/en-sent.bin"))
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
014 (def tokenize (nlp/make-tokenizer "models/en-token.bin"))
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
015 (def pos-tag (nlp/make-pos-tagger "models/en-pos-maxent.bin"))
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
016 (def name-find (nlp/make-name-finder "models/namefind/en-ner-person.bin"))
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
017 ;; (def chunker (make-treebank-chunker "models/en-chunker.bin"))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
018
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
019 (def grammar
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
020 "The objective of this grammar is to allow us to take a sequence of tagged symbols, and
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
021 produce a higher-level tagging of parts of speech, and ultimately propositions, from them.
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
022
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
023 *NOTE THAT* tags in this grammar are always keywords, to distinguish them from OpenNLP
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
024 tags, which tag individual tokens and are represented as strings."
|
||||
</span><br/>
|
||||
<span class="covered" title="22 out of 22 forms covered">
|
||||
025 {:contextual-reference [["PRP"]] ;; the documentation says PRP is 'peronal pronoun',
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
026 ;; but it seems to be all pronouns.
|
||||
</span><br/>
|
||||
<span class="covered" title="9 out of 9 forms covered">
|
||||
027 :noun [["NN"]["NNS"]["NNP"]["NNPS"]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
028 :full-name [["NNP"]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
029 ["NNP" :full-name]] ;; an unpunctuated sequence of proper nouns
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
030 ;; probably represents a full name
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
031 :noun-phrase [[:contextual-reference]
|
||||
</span><br/>
|
||||
<span class="covered" title="2 out of 2 forms covered">
|
||||
032 [:noun]
|
||||
</span><br/>
|
||||
<span class="covered" title="2 out of 2 forms covered">
|
||||
033 [:full-name]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
034 ["DT" :noun]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
035 [:adjectives :noun]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
036 ["DT" :adjectives :noun]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
037 :noun-phrases [[:noun-phrase]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
038 [:noun-phrase "CC" :noun-phrases]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
039 [:noun-phrase "," :noun-phrases]]
|
||||
</span><br/>
|
||||
<span class="covered" title="7 out of 7 forms covered">
|
||||
040 :adjective [["JJ"]["JJR"]["JJS"]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
041 :adjectives [[:adjective]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
042 [:adjective :adjectives]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
043 [:adjective "," :adjectives]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
044 [:adjective "CC" :adjectives]]
|
||||
</span><br/>
|
||||
<span class="covered" title="13 out of 13 forms covered">
|
||||
045 :verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]]
|
||||
</span><br/>
|
||||
<span class="covered" title="7 out of 7 forms covered">
|
||||
046 :adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
047 :adverbs [[:adverb]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
048 [:adverb "," :adverbs]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
049 [:adverb "CC" :adverbs]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
050 :verb-phrase [[:verb]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
051 [:adverbs :verb]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
052 [:verb :adverbs :verb]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
053 [:verb :adverbs]
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
054 [:verb :adverbs :verb "TO"]]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
055 :locator [["IN" :noun-phrases]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
056 :locators [[:locator]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
057 [:locator :locators]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
058 [:locator "," :locators]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
059 :location [[:locators]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
060 :subject [[:noun-phrases]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
061 :object [[:noun-phrases]]
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
062 :proposition [[:subject :verb-phrase :object]
|
||||
</span><br/>
|
||||
<span class="covered" title="6 out of 6 forms covered">
|
||||
063 [:location "," :subject :verb-phrase :object]
|
||||
</span><br/>
|
||||
<span class="covered" title="7 out of 7 forms covered">
|
||||
064 [:subject "," :location "," :verb-phrase :object]
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
065 [:subject :verb-phrase :object :location]]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
066 :propositions [[:proposition]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
067 [:proposition "CC" :propositions]
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
068 [:proposition "," "CC" :propositions]]})
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
069
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
070 (declare reparse rdp-seek)
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
071
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
072 (defn rdp-seek
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
073 "Seek a phrase which satisfies this `goal` (expected to be a keyword) in
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
074 this `tagged-sentence` using this `grammar`.
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
075
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
076 Return a cons comprising
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
077 1. the first matching phrase for the goal, tagged with the goal, or `nil` if
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
078 no match;
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
079 2. the tail of the sentence when the parts comprising the phrase are removed."
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
080 [tagged-sentence grammar goal]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
081 (if (keyword? goal)
|
||||
</span><br/>
|
||||
<span class="covered" title="7 out of 7 forms covered">
|
||||
082 (when (not (empty? tagged-sentence))
|
||||
</span><br/>
|
||||
<span class="covered" title="8 out of 8 forms covered">
|
||||
083 (when-let [result (first
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
084 (sort
|
||||
</span><br/>
|
||||
<span class="covered" title="8 out of 8 forms covered">
|
||||
085 #(< (count %1) (count %2))
|
||||
</span><br/>
|
||||
<span class="covered" title="2 out of 2 forms covered">
|
||||
086 (remove
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
087 empty?
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
088 (map
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
089 #(reparse tagged-sentence grammar %)
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
090 (goal grammar)))))]
|
||||
</span><br/>
|
||||
<span class="covered" title="13 out of 13 forms covered">
|
||||
091 (cons (cons (first result) (list goal)) (rest result))))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 16 forms covered">
|
||||
092 (throw (Exception. (str "Non-keyword passed to rdp-seek: `" goal "` (type " (or (type goal) "nil") ")")))))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
093
|
||||
</span><br/>
|
||||
<span class="covered" title="19 out of 19 forms covered">
|
||||
094 (defmacro tag
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
095 "The tag, on a `tagged-token`, is just the second element. Written as a macro
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
096 for readability."
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
097 [tagged-token]
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
098 `(nth ~tagged-token 1))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
099
|
||||
</span><br/>
|
||||
<span class="covered" title="44 out of 44 forms covered">
|
||||
100 (defmacro coll-or-nil?
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
101 [o]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
102 "For fuck's sake, `nil` isn't a collection? What planet are these people on?"
|
||||
</span><br/>
|
||||
<span class="covered" title="2 out of 2 forms covered">
|
||||
103 `(or (nil? ~o) (coll? ~o)))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
104
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
105 (defn rdp-extend
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
106 "Seek a phrase which satisfies this `goal` (expected to be a collection of tags) in
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
107 this `tagged-sentence` using this `grammar`.
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
108
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
109 Return a cons comprising
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
110 1. the first matching phrase for the goal, or `nil` if no match;
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
111 2. the tail of the sentence when the parts comprising the phrase are removed."
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
112 [tagged-sentence grammar goal]
|
||||
</span><br/>
|
||||
<span class="covered" title="4 out of 4 forms covered">
|
||||
113 (cond
|
||||
</span><br/>
|
||||
<span class="covered" title="10 out of 10 forms covered">
|
||||
114 (not (coll-or-nil? goal))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 16 forms covered">
|
||||
115 (throw (Exception. (str "Non-collection passed to rdp-extend: `" goal "` (type " (or (type goal) "nil") ")")))
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
116 (empty? goal)
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
117 (cons (list) tagged-sentence)
|
||||
</span><br/>
|
||||
<span class="covered" title="5 out of 5 forms covered">
|
||||
118 (not (empty? tagged-sentence))
|
||||
</span><br/>
|
||||
<span class="covered" title="2 out of 2 forms covered">
|
||||
119 (let [[tt & st] tagged-sentence
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
120 [target & gt] goal]
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
121 (cond
|
||||
</span><br/>
|
||||
<span class="covered" title="3 out of 3 forms covered">
|
||||
122 (keyword? target)
|
||||
</span><br/>
|
||||
<span class="covered" title="11 out of 11 forms covered">
|
||||
123 (when-let [[h & t](reparse tagged-sentence grammar target)]
|
||||
</span><br/>
|
||||
<span class="covered" title="11 out of 11 forms covered">
|
||||
124 (when-let [[dh & dt] (reparse t grammar gt)]
|
||||
</span><br/>
|
||||
<span class="covered" title="7 out of 7 forms covered">
|
||||
125 (cons (cons h dh) dt)))
|
||||
</span><br/>
|
||||
<span class="covered" title="6 out of 6 forms covered">
|
||||
126 (= target (tag tt))
|
||||
</span><br/>
|
||||
<span class="covered" title="11 out of 11 forms covered">
|
||||
127 (when-let [[dh & dt] (reparse st grammar gt)]
|
||||
</span><br/>
|
||||
<span class="covered" title="7 out of 7 forms covered">
|
||||
128 (cons (cons tt dh) dt))))))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
129
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
130 (defn reparse
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
131 "Reparse this `tagged-sentence` using this grammar to seek this `goal`.
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
132 Parse greedily, seeking the most extended goal.
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
133
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
134 Return a sequence comprising
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
135 1. the first matching phrase for the goal, tagged with the goal, or `nil`
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
136 if no match;
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
137 2. the tail of the sentence when the parts comprising the phrase are removed.
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
138
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
139 This function is called `reparse` because:
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
140 1. it is designed to parse sentences which have already been parsed by
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
141 OpenNLP: it will not work on raw sentences;
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
142 2. it is a recursive descent parser."
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
143 [tagged-sentence grammar goal]
|
||||
</span><br/>
|
||||
<span class="partial" title="13 out of 16 forms covered">
|
||||
144 (log/debug "=> Choosing strategy for "
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 19 forms covered">
|
||||
145 goal " in " (with-out-str (pprint tagged-sentence)))
|
||||
</span><br/>
|
||||
<span class="partial" title="3 out of 4 forms covered">
|
||||
146 (let [r (cond
|
||||
</span><br/>
|
||||
<span class="covered" title="8 out of 8 forms covered">
|
||||
147 (keyword? goal) (rdp-seek tagged-sentence grammar goal)
|
||||
</span><br/>
|
||||
<span class="covered" title="13 out of 13 forms covered">
|
||||
148 (coll-or-nil? goal) (rdp-extend tagged-sentence grammar goal))]
|
||||
</span><br/>
|
||||
<span class="partial" title="13 out of 18 forms covered">
|
||||
149 (log/debug "<= " goal " in "
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 20 forms covered">
|
||||
150 (s/trim (with-out-str (pprint tagged-sentence)))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 20 forms covered">
|
||||
151 " returned " (s/trim (with-out-str (pprint r))))
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
152 r))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
153
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
154 (defn identify
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
155 [parse-tree knowledge-accessor]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
156 ;; TODO: we don't yet have a working knowledge accessor. When we do,
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
157 ;; construct a query from the contents of this parse-tree, and pass it
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
158 ;; to the knowledge accessor in the hope of finding a true name.
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
159 parse-tree)
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
160
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
161 (defn normalise
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
162 [parse-tree ka]
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
163 (if
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 23 forms covered">
|
||||
164 (and (coll? parse-tree) (= (count parse-tree) 2)(keyword? (nth parse-tree 1)))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 6 forms covered">
|
||||
165 (case (nth parse-tree 1)
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 2 forms covered">
|
||||
166 :proposition (list
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 2 forms covered">
|
||||
167 (reduce
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
168 merge
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
169 {}
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 3 forms covered">
|
||||
170 (map
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
171 ;; TODO: use combinatorics to extract all propositions from
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
172 ;; a proposition having multiple locations, multiple subject,
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
173 ;; objects and/or verbs
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 13 forms covered">
|
||||
174 #(assoc {} (nth % 1) (identify (first %) ka))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 10 forms covered">
|
||||
175 (map #(normalise % ka) (first parse-tree)))))
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
176 (:location :subject :object)
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 6 forms covered">
|
||||
177 (cons
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 6 forms covered">
|
||||
178 (reduce
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 3 forms covered">
|
||||
179 concat
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 6 forms covered">
|
||||
180 (remove
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 3 forms covered">
|
||||
181 empty?
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 30 forms covered">
|
||||
182 (map #(normalise % ka) (first parse-tree))))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 18 forms covered">
|
||||
183 (list (nth parse-tree 1)))
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
184 (:propositions :locators :noun-phrases :verbs)
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 8 forms covered">
|
||||
185 (reduce
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 4 forms covered">
|
||||
186 concat
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 8 forms covered">
|
||||
187 (remove
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 4 forms covered">
|
||||
188 empty?
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 40 forms covered">
|
||||
189 (map #(normalise % ka) (first parse-tree))))
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
190 ;; else
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
191 parse-tree)
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
192 parse-tree))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
193
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
194 (defn propositions
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
195 "Given a `tagged-sentence`, return a list of propositions detected in that
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
196 sentence; if `knowledge-accessor` is passed, try to resolve names and noun
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
197 phrases to entities known to that knowledge accessor.
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
198
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
199 TODO: Note that if `:subject`, `:object` or `:locator` resolves to multiple
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
200 objects, then that is essentially one proposition for each unique
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
201 combination. This is not yet implemented!"
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
202 ([tagged-sentence]
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 4 forms covered">
|
||||
203 (propositions tagged-sentence nil))
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
204 ([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
205 knowledge-accessor]
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
206 ;; TODO: doesn't work yet.
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 2 forms covered">
|
||||
207 (reduce
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
208 concat
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 2 forms covered">
|
||||
209 (remove
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
210 empty?
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 3 forms covered">
|
||||
211 (map
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 4 forms covered">
|
||||
212 #(normalise % knowledge-accessor)
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 9 forms covered">
|
||||
213 (first (first (reparse tagged-sentence grammar :propositions))))))))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
214
|
||||
</span><br/>
|
||||
<span class="covered" title="1 out of 1 forms covered">
|
||||
215 (defn propositions-from-file
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
216 [file-path]
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 2 forms covered">
|
||||
217 (reduce
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
218 concat
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 2 forms covered">
|
||||
219 (remove
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 1 forms covered">
|
||||
220 empty?
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 3 forms covered">
|
||||
221 (map
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 7 forms covered">
|
||||
222 #(propositions (pos-tag (tokenize %)))
|
||||
</span><br/>
|
||||
<span class="not-covered" title="0 out of 5 forms covered">
|
||||
223 (get-sentences (slurp file-path))))))
|
||||
</span><br/>
|
||||
<span class="blank" title="0 out of 0 forms covered">
|
||||
224
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
225 ;; (reparse (pos-tag (tokenize "True love is the daughter of temperance, and temperance is utterly opposed to the madness of bodily pleasure.")) grammar :propositions)
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
226 ;; (reparse [["temperance" "NN"] ["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :subject)
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
227 ;; (reparse [["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :verb-phrase)
|
||||
</span><br/>
|
||||
<span class="not-tracked" title="0 out of 0 forms covered">
|
||||
228 ;; (reparse [["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :verb-phrase)
|
||||
</span><br/>
|
||||
</body>
|
||||
</html>
|
||||
Loading…
Add table
Add a link
Reference in a new issue