Now parsing propositions well, working on refinements.
This commit is contained in:
parent
2e1855eaf5
commit
479c186c46
|
@ -4,6 +4,7 @@
|
||||||
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
||||||
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
||||||
:dependencies [[org.clojure/clojure "1.8.0"]
|
:dependencies [[org.clojure/clojure "1.8.0"]
|
||||||
|
[org.clojure/math.combinatorics "0.1.6"]
|
||||||
[clojure-opennlp "0.5.0"]
|
[clojure-opennlp "0.5.0"]
|
||||||
[com.taoensso/timbre "4.10.0"]
|
[com.taoensso/timbre "4.10.0"]
|
||||||
[wildwood "0.1.0-SNAPSHOT"]]
|
[wildwood "0.1.0-SNAPSHOT"]]
|
||||||
|
|
21
resources/locators.edn
Normal file
21
resources/locators.edn
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
(def locators '((((["of" "IN"]
|
||||||
|
((((((["idealism" "NN"]) :noun)) :noun-phrase)) :noun-phrases))
|
||||||
|
:locator)
|
||||||
|
((((["in" "IN"]
|
||||||
|
((((((["philosophy" "NN"]) :noun)) :noun-phrase))
|
||||||
|
:noun-phrases))
|
||||||
|
:locator)
|
||||||
|
["," ","]
|
||||||
|
((((["in" "IN"]
|
||||||
|
((((((["politics" "NNS"]) :noun)) :noun-phrase))
|
||||||
|
:noun-phrases))
|
||||||
|
:locator)
|
||||||
|
["," ","]
|
||||||
|
((((["in" "IN"]
|
||||||
|
((((((["literature" "NN"]) :noun)) :noun-phrase))
|
||||||
|
:noun-phrases))
|
||||||
|
:locator))
|
||||||
|
:locators))
|
||||||
|
:locators))
|
||||||
|
:locators))
|
||||||
|
:locators))
|
2834
resources/plato_propositions.edn
Normal file
2834
resources/plato_propositions.edn
Normal file
File diff suppressed because it is too large
Load diff
15837
resources/platos_republic.txt
Normal file
15837
resources/platos_republic.txt
Normal file
File diff suppressed because it is too large
Load diff
24692
resources/platos_republic_with_intro_and_analysis.txt
Normal file
24692
resources/platos_republic_with_intro_and_analysis.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,5 +1,6 @@
|
||||||
(ns wwui.propositions
|
(ns wwui.propositions
|
||||||
(:require [clojure.pprint :refer [pprint]]
|
(:require [clojure.math.combinatorics :as combi]
|
||||||
|
[clojure.pprint :refer [pprint]]
|
||||||
[clojure.string :as s]
|
[clojure.string :as s]
|
||||||
[opennlp.nlp :as nlp]
|
[opennlp.nlp :as nlp]
|
||||||
[opennlp.treebank :as tb]
|
[opennlp.treebank :as tb]
|
||||||
|
@ -48,20 +49,23 @@
|
||||||
[:adverb "CC" :adverbs]]
|
[:adverb "CC" :adverbs]]
|
||||||
:verb-phrase [[:verb]
|
:verb-phrase [[:verb]
|
||||||
[:adverbs :verb]
|
[:adverbs :verb]
|
||||||
[:verb :adverb :verb]
|
[:verb :adverbs :verb]
|
||||||
[:verb :adverbs]]
|
[:verb :adverbs]
|
||||||
|
[:verb :adverbs :verb "TO"]]
|
||||||
:locator [["IN" :noun-phrases]]
|
:locator [["IN" :noun-phrases]]
|
||||||
:locators [[:locator]
|
:locators [[:locator]
|
||||||
[:locator :locators]
|
[:locator :locators]
|
||||||
[:locator "," :locators]]
|
[:locator "," :locators]]
|
||||||
|
:location [[:locators]]
|
||||||
:subject [[:noun-phrases]]
|
:subject [[:noun-phrases]]
|
||||||
:object [[:noun-phrases]]
|
:object [[:noun-phrases]]
|
||||||
:proposition [[:subject :verb-phrase :object]
|
:proposition [[:subject :verb-phrase :object]
|
||||||
[:locators "," :subject :verb-phrase :object]
|
[:location "," :subject :verb-phrase :object]
|
||||||
[:subject "," :locators "," :verb-phrase :object]
|
[:subject "," :location "," :verb-phrase :object]
|
||||||
[:subject :verb-phrase :object :locators]]
|
[:subject :verb-phrase :object :location]]
|
||||||
:propositions [[:proposition]
|
:propositions [[:proposition]
|
||||||
[:proposition "CC" :propositions]]})
|
[:proposition "CC" :propositions]
|
||||||
|
[:proposition "," "CC" :propositions]]})
|
||||||
|
|
||||||
(declare reparse rdp-seek)
|
(declare reparse rdp-seek)
|
||||||
|
|
||||||
|
@ -154,22 +158,38 @@
|
||||||
;; to the knowledge accessor in the hope of finding a true name.
|
;; to the knowledge accessor in the hope of finding a true name.
|
||||||
parse-tree)
|
parse-tree)
|
||||||
|
|
||||||
(defn normalise-proposition
|
(defn normalise
|
||||||
[parse-tree ka]
|
[parse-tree ka]
|
||||||
(when
|
(if
|
||||||
(= (nth parse-tree 1) :proposition)
|
(and (coll? parse-tree) (= (count parse-tree) 2)(keyword? (nth parse-tree 1)))
|
||||||
|
(case (nth parse-tree 1)
|
||||||
|
:proposition (list
|
||||||
(reduce
|
(reduce
|
||||||
merge
|
merge
|
||||||
{}
|
{}
|
||||||
(map
|
(map
|
||||||
|
;; TODO: use combinatorics to extract all propositions from
|
||||||
|
;; a proposition having multiple locations, multiple subject,
|
||||||
|
;; objects and/or verbs
|
||||||
#(assoc {} (nth % 1) (identify (first %) ka))
|
#(assoc {} (nth % 1) (identify (first %) ka))
|
||||||
(first parse-tree)))))
|
(map #(normalise % ka) (first parse-tree)))))
|
||||||
|
(:location :subject :object)
|
||||||
;; (defn normalise
|
(cons
|
||||||
;; [parse-tree ka]
|
(reduce
|
||||||
;; (if
|
concat
|
||||||
;; (and (coll? parse-tree) (keyword? (nth parse-tree 1)))
|
(remove
|
||||||
;; (case (nth parse-tree 1)
|
empty?
|
||||||
|
(map #(normalise % ka) (first parse-tree))))
|
||||||
|
(list (nth parse-tree 1)))
|
||||||
|
(:propositions :locators :noun-phrases :verbs)
|
||||||
|
(reduce
|
||||||
|
concat
|
||||||
|
(remove
|
||||||
|
empty?
|
||||||
|
(map #(normalise % ka) (first parse-tree))))
|
||||||
|
;; else
|
||||||
|
parse-tree)
|
||||||
|
parse-tree))
|
||||||
|
|
||||||
(defn propositions
|
(defn propositions
|
||||||
"Given a `tagged-sentence`, return a list of propositions detected in that
|
"Given a `tagged-sentence`, return a list of propositions detected in that
|
||||||
|
@ -184,17 +204,25 @@
|
||||||
([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
|
([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor
|
||||||
knowledge-accessor]
|
knowledge-accessor]
|
||||||
;; TODO: doesn't work yet.
|
;; TODO: doesn't work yet.
|
||||||
|
(reduce
|
||||||
|
concat
|
||||||
|
(remove
|
||||||
|
empty?
|
||||||
(map
|
(map
|
||||||
#(normalise-proposition % knowledge-accessor)
|
#(normalise % knowledge-accessor)
|
||||||
(first (first (reparse tagged-sentence grammar :propositions))))))
|
(first (first (reparse tagged-sentence grammar :propositions))))))))
|
||||||
|
|
||||||
(defn propositions-from-file
|
(defn propositions-from-file
|
||||||
[file-path]
|
[file-path]
|
||||||
(reduce
|
(reduce
|
||||||
concat
|
concat
|
||||||
(remove
|
(remove
|
||||||
nil?
|
empty?
|
||||||
(map
|
(map
|
||||||
#(propositions (pos-tag (tokenize %)))
|
#(propositions (pos-tag (tokenize %)))
|
||||||
(get-sentences (slurp file-path))))))
|
(get-sentences (slurp file-path))))))
|
||||||
|
|
||||||
|
;; (reparse (pos-tag (tokenize "True love is the daughter of temperance, and temperance is utterly opposed to the madness of bodily pleasure.")) grammar :propositions)
|
||||||
|
;; (reparse [["temperance" "NN"] ["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :subject)
|
||||||
|
;; (reparse [["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :verb-phrase)
|
||||||
|
;; (reparse [["is" "VBZ"] ["utterly" "RB"] ["opposed" "VBN"] ["to" "TO"] ["the" "DT"] ["madness" "NN"] ["of" "IN"] ["bodily" "JJ"] ["pleasure" "NN"]] grammar :verb-phrase)
|
||||||
|
|
Loading…
Reference in a new issue