From d924ef17c66c7575ca4d02be87b4a697488b5909 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Mon, 27 Apr 2020 09:40:58 +0100 Subject: [PATCH] Trying to get this working... harder than it should be! --- CHANGELOG.md | 24 ++ LICENSE | 277 ++++++++++++++++++ README.md | 44 +++ doc/intro.md | 3 + project.clj | 12 + .../test/sample-proposition-sentences.txt | 24 ++ src/wwui/core.clj | 7 + src/wwui/propositions.clj | 186 ++++++++++++ test/wwui/core_test.clj | 7 + test/wwui/propositions_test.clj | 31 ++ 10 files changed, 615 insertions(+) create mode 100644 CHANGELOG.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 doc/intro.md create mode 100644 project.clj create mode 100644 resources/test/sample-proposition-sentences.txt create mode 100644 src/wwui/core.clj create mode 100644 src/wwui/propositions.clj create mode 100644 test/wwui/core_test.clj create mode 100644 test/wwui/propositions_test.clj diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5449b29 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,24 @@ +# Change Log +All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). + +## [Unreleased] +### Changed +- Add a new arity to `make-widget-async` to provide a different widget shape. + +## [0.1.1] - 2020-04-26 +### Changed +- Documentation on how to make the widgets. + +### Removed +- `make-widget-sync` - we're all async, all the time. + +### Fixed +- Fixed widget maker to keep working when daylight savings switches over. + +## 0.1.0 - 2020-04-26 +### Added +- Files from the new template. +- Widget maker public API - `make-widget-sync`. + +[Unreleased]: https://github.com/your-name/wwui/compare/0.1.1...HEAD +[0.1.1]: https://github.com/your-name/wwui/compare/0.1.0...0.1.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d3087e4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,277 @@ +Eclipse Public License - v 2.0 + + THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE + PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION + OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a) in the case of the initial Contributor, the initial content + Distributed under this Agreement, and + + b) in the case of each subsequent Contributor: + i) changes to the Program, and + ii) additions to the Program; + where such changes and/or additions to the Program originate from + and are Distributed by that particular Contributor. A Contribution + "originates" from a Contributor if it was added to the Program by + such Contributor itself or anyone acting on such Contributor's behalf. + Contributions do not include changes or additions to the Program that + are not Modified Works. + +"Contributor" means any person or entity that Distributes the Program. + +"Licensed Patents" mean patent claims licensable by a Contributor which +are necessarily infringed by the use or sale of its Contribution alone +or when combined with the Program. + +"Program" means the Contributions Distributed in accordance with this +Agreement. + +"Recipient" means anyone who receives the Program under this Agreement +or any Secondary License (as applicable), including Contributors. + +"Derivative Works" shall mean any work, whether in Source Code or other +form, that is based on (or derived from) the Program and for which the +editorial revisions, annotations, elaborations, or other modifications +represent, as a whole, an original work of authorship. + +"Modified Works" shall mean any work in Source Code or other form that +results from an addition to, deletion from, or modification of the +contents of the Program, including, for purposes of clarity any new file +in Source Code form that contains any contents of the Program. Modified +Works shall not include works that contain only declarations, +interfaces, types, classes, structures, or files of the Program solely +in each case in order to link to, bind by name, or subclass the Program +or Modified Works thereof. + +"Distribute" means the acts of a) distributing or b) making available +in any manner that enables the transfer of a copy. + +"Source Code" means the form of a Program preferred for making +modifications, including but not limited to software source code, +documentation source, and configuration files. + +"Secondary License" means either the GNU General Public License, +Version 2.0, or any later versions of that license, including any +exceptions or additional permissions as identified by the initial +Contributor. + +2. GRANT OF RIGHTS + + a) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare Derivative Works of, publicly display, + publicly perform, Distribute and sublicense the Contribution of such + Contributor, if any, and such Derivative Works. + + b) Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, + if any, in Source Code or other form. This patent license shall + apply to the combination of the Contribution and the Program if, at + the time the Contribution is added by the Contributor, such addition + of the Contribution causes such combination to be covered by the + Licensed Patents. The patent license shall not apply to any other + combinations which include the Contribution. No hardware per se is + licensed hereunder. + + c) Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. + Each Contributor disclaims any liability to Recipient for claims + brought by any other entity based on infringement of intellectual + property rights or otherwise. As a condition to exercising the + rights and licenses granted hereunder, each Recipient hereby + assumes sole responsibility to secure any other intellectual + property rights needed, if any. For example, if a third party + patent license is required to allow Recipient to Distribute the + Program, it is Recipient's responsibility to acquire that license + before distributing the Program. + + d) Each Contributor represents that to its knowledge it has + sufficient copyright rights in its Contribution, if any, to grant + the copyright license set forth in this Agreement. + + e) Notwithstanding the terms of any Secondary License, no + Contributor makes additional grants to any Recipient (other than + those set forth in this Agreement) as a result of such Recipient's + receipt of the Program under the terms of a Secondary License + (if permitted under the terms of Section 3). + +3. REQUIREMENTS + +3.1 If a Contributor Distributes the Program in any form, then: + + a) the Program must also be made available as Source Code, in + accordance with section 3.2, and the Contributor must accompany + the Program with a statement that the Source Code for the Program + is available under this Agreement, and informs Recipients how to + obtain it in a reasonable manner on or through a medium customarily + used for software exchange; and + + b) the Contributor may Distribute the Program under a license + different than this Agreement, provided that such license: + i) effectively disclaims on behalf of all other Contributors all + warranties and conditions, express and implied, including + warranties or conditions of title and non-infringement, and + implied warranties or conditions of merchantability and fitness + for a particular purpose; + + ii) effectively excludes on behalf of all other Contributors all + liability for damages, including direct, indirect, special, + incidental and consequential damages, such as lost profits; + + iii) does not attempt to limit or alter the recipients' rights + in the Source Code under section 3.2; and + + iv) requires any subsequent distribution of the Program by any + party to be under a license that satisfies the requirements + of this section 3. + +3.2 When the Program is Distributed as Source Code: + + a) it must be made available under this Agreement, or if the + Program (i) is combined with other material in a separate file or + files made available under a Secondary License, and (ii) the initial + Contributor attached to the Source Code the notice described in + Exhibit A of this Agreement, then the Program may be made available + under the terms of such Secondary Licenses, and + + b) a copy of this Agreement must be included with each copy of + the Program. + +3.3 Contributors may not remove or alter any copyright, patent, +trademark, attribution notices, disclaimers of warranty, or limitations +of liability ("notices") contained within the Program from any copy of +the Program which they Distribute, provided that Contributors may add +their own appropriate notices. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain responsibilities +with respect to end users, business partners and the like. While this +license is intended to facilitate the commercial use of the Program, +the Contributor who includes the Program in a commercial product +offering should do so in a manner which does not create potential +liability for other Contributors. Therefore, if a Contributor includes +the Program in a commercial product offering, such Contributor +("Commercial Contributor") hereby agrees to defend and indemnify every +other Contributor ("Indemnified Contributor") against any losses, +damages and costs (collectively "Losses") arising from claims, lawsuits +and other legal actions brought by a third party against the Indemnified +Contributor to the extent caused by the acts or omissions of such +Commercial Contributor in connection with its distribution of the Program +in a commercial product offering. The obligations in this section do not +apply to any claims or Losses relating to any actual or alleged +intellectual property infringement. In order to qualify, an Indemnified +Contributor must: a) promptly notify the Commercial Contributor in +writing of such claim, and b) allow the Commercial Contributor to control, +and cooperate with the Commercial Contributor in, the defense and any +related settlement negotiations. The Indemnified Contributor may +participate in any such claim at its own expense. + +For example, a Contributor might include the Program in a commercial +product offering, Product X. That Contributor is then a Commercial +Contributor. If that Commercial Contributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Contributor's responsibility +alone. Under this section, the Commercial Contributor would have to +defend claims against the other Contributors related to those performance +claims and warranties, and if a court requires any other Contributor to +pay any damages as a result, the Commercial Contributor must pay +those damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" +BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR +IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF +TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR +PURPOSE. Each Recipient is solely responsible for determining the +appropriateness of using and distributing the Program and assumes all +risks associated with its exercise of rights under this Agreement, +including but not limited to the risks and costs of program errors, +compliance with applicable laws, damage to or loss of data, programs +or equipment, and unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT +PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS +SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST +PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE +EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + +7. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against any entity +(including a cross-claim or counterclaim in a lawsuit) alleging that the +Program itself (excluding combinations of the Program with other software +or hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and survive. + +Everyone is permitted to copy and distribute copies of this Agreement, +but in order to avoid inconsistency the Agreement is copyrighted and +may only be modified in the following manner. The Agreement Steward +reserves the right to publish new versions (including revisions) of +this Agreement from time to time. No one other than the Agreement +Steward has the right to modify this Agreement. The Eclipse Foundation +is the initial Agreement Steward. The Eclipse Foundation may assign the +responsibility to serve as the Agreement Steward to a suitable separate +entity. Each new version of the Agreement will be given a distinguishing +version number. The Program (including Contributions) may always be +Distributed subject to the version of the Agreement under which it was +received. In addition, after a new version of the Agreement is published, +Contributor may elect to Distribute the Program (including its +Contributions) under the new version. + +Except as expressly stated in Sections 2(a) and 2(b) above, Recipient +receives no rights or licenses to the intellectual property of any +Contributor under this Agreement, whether expressly, by implication, +estoppel or otherwise. All rights in the Program not expressly granted +under this Agreement are reserved. Nothing in this Agreement is intended +to be enforceable by any entity that is not a Contributor or Recipient. +No third-party beneficiary rights are created under this Agreement. + +Exhibit A - Form of Secondary Licenses Notice + +"This Source Code may also be made available under the following +Secondary Licenses when the conditions for such availability set forth +in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), +version(s), and exceptions or additional permissions here}." + + Simply including a copy of this Agreement, including this Exhibit A + is not sufficient to license the Source Code under Secondary Licenses. + + If it is not possible or desirable to put the notice in a particular + file, then You may include the notice in a location (such as a LICENSE + file in a relevant directory) where a recipient would be likely to + look for such a notice. + + You may add additional accurate notices of copyright ownership. diff --git a/README.md b/README.md new file mode 100644 index 0000000..03e29ad --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# wwui + +FIXME: description + +## Installation + +Download from http://example.com/FIXME. + +## Usage + +FIXME: explanation + + $ java -jar wwui-0.1.0-standalone.jar [args] + +## Options + +FIXME: listing of options this app accepts. + +## Examples + +... + +### Bugs + +... + +### Any Other Sections +### That You Think +### Might be Useful + +## License + +Copyright © 2020 FIXME + +This program and the accompanying materials are made available under the +terms of the Eclipse Public License 2.0 which is available at +http://www.eclipse.org/legal/epl-2.0. + +This Source Code may also be made available under the following Secondary +Licenses when the conditions for such availability set forth in the Eclipse +Public License, v. 2.0 are satisfied: GNU General Public License as published by +the Free Software Foundation, either version 2 of the License, or (at your +option) any later version, with the GNU Classpath Exception which is available +at https://www.gnu.org/software/classpath/license.html. diff --git a/doc/intro.md b/doc/intro.md new file mode 100644 index 0000000..3c8291c --- /dev/null +++ b/doc/intro.md @@ -0,0 +1,3 @@ +# Introduction to wwui + +TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) diff --git a/project.clj b/project.clj new file mode 100644 index 0000000..e4e9263 --- /dev/null +++ b/project.clj @@ -0,0 +1,12 @@ +(defproject wwui "0.1.0-SNAPSHOT" + :description "Experimental work towards a conversational interface to Wildwood" + :url "http://example.com/FIXME" + :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" + :url "https://www.eclipse.org/legal/epl-2.0/"} + :dependencies [[org.clojure/clojure "1.8.0"] + [clojure-opennlp "0.5.0"] + [com.taoensso/timbre "4.10.0"] + [wildwood "0.1.0-SNAPSHOT"]] + :main ^:skip-aot wwui.core + :target-path "target/%s" + :profiles {:uberjar {:aot :all}}) diff --git a/resources/test/sample-proposition-sentences.txt b/resources/test/sample-proposition-sentences.txt new file mode 100644 index 0000000..8c1a5fb --- /dev/null +++ b/resources/test/sample-proposition-sentences.txt @@ -0,0 +1,24 @@ +Socrates is a man. + +Brutus killed Caesar. +Brutus used the dagger. +The dagger caused the wound. +Caesar died from the wound. + +Brutus killed Caesar in the Forum on the Ides of March. +Brutus used the Dagger in the Forum on the Ides of March. +The Dagger caused the Wound in the Forum on the Ides of March. +Caesar doed of the wound in the Forum on the Ides of March. + +Calpurnia married Caesar. +Calpurnia is married to Caesar. + +Calpurnia said "Brutus killed Caesar in the Forum on the Ides of March". +Calpurnia said proposition one. + +Drusilla heard Calpurnia say "Brutus killed Caesar in the Forum on the Ides of March". +Drusilla heard "Brutus killed Caesar in the Forum on the Ides of March". +Drusilla heard proposition one. + +Dirck, Joris and I carried the good news from Ghent to Aix. + diff --git a/src/wwui/core.clj b/src/wwui/core.clj new file mode 100644 index 0000000..a3b5425 --- /dev/null +++ b/src/wwui/core.clj @@ -0,0 +1,7 @@ +(ns wwui.core + (:gen-class)) + +(defn -main + "I don't do a whole lot ... yet." + [& args] + (println "Hello, World!")) diff --git a/src/wwui/propositions.clj b/src/wwui/propositions.clj new file mode 100644 index 0000000..db178e0 --- /dev/null +++ b/src/wwui/propositions.clj @@ -0,0 +1,186 @@ +(ns wwui.propositions + (:require [clojure.pprint :refer [pprint]] + [opennlp.nlp :as nlp] + [opennlp.treebank :as tb] + [taoensso.timbre :as l :refer [info error spy]] + [wildwood.knowledge-accessor :refer [Accessor]])) + +;; Position tags used by OpenNLP for English are documented here: +;; https://dpdearing.com/posts/2011/12/opennlp-part-of-speech-pos-tags-penn-english-treebank/ + +(def get-sentences (nlp/make-sentence-detector "models/en-sent.bin")) +(def tokenize (nlp/make-tokenizer "models/en-token.bin")) +(def pos-tag (nlp/make-pos-tagger "models/en-pos-maxent.bin")) +(def name-find (nlp/make-name-finder "models/namefind/en-ner-person.bin")) +;; (def chunker (make-treebank-chunker "models/en-chunker.bin")) + +(def grammar + "The objective of this grammar is to allow us to take a sequence of tagged symbols, and + produce a higher-level tagging of parts of speech, and ultimately propositions, from them. + + *NOTE THAT* tags in this grammar are always keywords, to distinguish them from OpenNLP + tags, which tag individual tokens and are represented as strings." + {:contextual-reference [["PRP"]] ;; the documentation says PRP is 'peronal pronoun', + ;; but it seems to be all pronouns. + :noun [["NN"]["NNS"]["NNP"]["NNPS"]] + :noun-phrase [[:contextual-reference] + [:noun] + ["DT" :noun] + [:adjectives :noun] + ["DT" :adjectives :noun] + [:noun-phrase "CC" :noun-phrase] + [:noun-phrase "IN" :noun-phrase] + [:noun-phrase "," :noun-phrase]] + :adjective [["JJ"]["JJR"]["JJS"]] + :adjectives [[:adjective] + [:adjectives "CC" :adjective]] + :verb [["VB"]["VBD"]["VBG"]["VBN"]["VBP"]["VBZ"]] + :adverb [["RB"]["RBR"]["RBS"]] ;; beware here that negation and qualification show up only as adverbs + :adverbs [[:adverb] + [:adverbs "," :adverb] + [:adverbs "CC" :adverb]] + :verb-phrase [[:verb] + [:adverbs :verb] + [:verb :adverb :verb] + [:verb :adverbs]] + :locator [["IN" :noun-phrase]] + :locators [[:locator] + [:locator :locator] + [:locator "," :locator]] + :subject [[:noun-phrase]] + :object [[:noun-phrase]] + :proposition [[:subject :verb :object] + [:locators "," :subject :verb :object] + [:subject "," :locators "," :verb :object] + [:subject :verb-phrase :object :locators]] + :propositions [[:proposition] + [:propositions "CC" :proposition]]}) + +(declare recursive-descent-parser rdp-seek) + +(defn rdp-seek + "Seek a phrase which satisfies this `goal` (expected to be a keyword) in + this `tagged-sentence` using this `grammar`. + + Return a sequence comprising + 1. the first matching phrase for the goal, tagged with the goal, or `nil` if + no match; + 2. the tail of the sentence when the parts comprising the phrase are removed." + [tagged-sentence grammar goal] + (l/info "Seeking " goal " in " (with-out-str (pprint tagged-sentence))) + (if (keyword? goal) + (when (not (empty? tagged-sentence)) + (when-let [result (first + (sort + #(> (count %1) (count %2)) + (map + #(recursive-descent-parser tagged-sentence grammar %) + (goal grammar))))] + (cons (cons (first result) (list goal)) (rest result)))) + (throw (Exception. (str "Non-keyword passed to rdp-seek: " goal))))) + +;; (rdp-seek [["The" "DT"] ["Forum" "NNP"]] grammar :noun-phrase) +;; (recursive-descent-parser [["The" "DT"] ["Forum" "NNP"]] grammar ["DT" "NNP"]) +;; (:noun-phrase grammar) + + +(defmacro tag + "The tag, on a `tagged-token`, is just the second element. Written as a macro + for readability." + [tagged-token] + `(nth ~tagged-token 1)) + +(defn rdp-extend + [tagged-sentence grammar goal] + (l/info "Extending " goal " in " (with-out-str (pprint tagged-sentence))) + (cond + (empty? goal) + (cons (list) tagged-sentence) + (not (empty? tagged-sentence)) + (let [[tt & st] tagged-sentence + [target & gt] goal] +;; (pprint {:tagged-token tt +;; :sentence-tail st +;; :target target +;; :goal-tail gt}) + (cond + (= target (tag tt)) + (when-let [[dh & dt] (rdp-extend st grammar gt)] + (cons (cons tt dh) dt)) + (keyword? target) + (when-let [[dh & dt] (rdp-seek st grammar target)] + (cons (cons tt dh) dt)))))) + +;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar []) +;; (rdp-extend [["The" "DT"] ["Forum" "NNP"]] grammar ["DT"]) +;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "NNP"]) +;; (rdp-extend '(["The" "DT"] ["Forum" "NNP"]) grammar ["DT" "FOO"]) + +(defn recursive-descent-parser + "Reparse this `tagged-sentence` using this grammar to seek this `goal`. + Parse greedily, seeking the most extended goal. + + Return a sequence comprising + 1. the first matching phrase for the goal, tagged with the goal, or `nil` + if no match; + 2. the tail of the sentence when the parts comprising the phrase are removed." + [tagged-sentence grammar goal] + (l/info "Choosing strategy for " goal " in " (with-out-str (pprint tagged-sentence))) + (cond + ;; (empty? tagged-sentence) + ;; nil + (keyword? goal) + (rdp-seek tagged-sentence grammar goal) + (coll? goal) + (rdp-extend tagged-sentence grammar goal))) + +(defn propositions + "Given a `tagged-sentence`, return a list of propositions detected in that + sentence; if `knowledge-accessor` is passed, try to resolve names and noun + phrases to entities known to that knowledge accessor." + ([tagged-sentence] + (recursive-descent-parser tagged-sentence grammar :propositions)) + ([tagged-sentence ;; ^wildwood.knowledge-accessor.Accessor + knowledge-accessor] + ;; TODO: doesn't work yet. + nil)) + +(defn propositions-from-file + [file-path] + (reduce + concat + (remove + nil? + (map + #(propositions (pos-tag (tokenize %))) + (get-sentences (slurp file-path)))))) + +;; (recursive-descent-parser [] grammar :noun) +;; (rdp-seek (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun) +;; (coll? ["NPP"]) +;; (recursive-descent-parser (pos-tag (tokenize "killed Caesar")) grammar :verb) +(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar :noun-phrase) +(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"]) + +(recursive-descent-parser [["Forum" "NNP"]] grammar :noun-phrase) + +(map + #(recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar %) + (:noun-phrase grammar)) + +(rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"]) + +;; (nil nil +;; ((["The" "DT"]) ["Forum" "NNP"]) +;; nil +;; ((["The" "DT"]) ["Forum" "NNP"]) nil nil nil) + +;; (recursive-descent-parser (pos-tag (tokenize "in the Forum")) grammar :locator) + +;; (recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar ["DT" "NNP"]) + +;; (rdp-extend (pos-tag (tokenize "The Forum")) grammar ["DT" :noun]) +;; (let [deeper (rdp-extend (pos-tag (tokenize "Forum on Sunday")) grammar ["NNP"])] +;; (cons (cons ["The" "DT"] (first deeper)) (rest deeper))) +;; (let [deeper (rdp-extend (pos-tag (tokenize "The Forum on Sunday")) grammar ["DT" "NNP"])] +;; deeper) diff --git a/test/wwui/core_test.clj b/test/wwui/core_test.clj new file mode 100644 index 0000000..7cb4a64 --- /dev/null +++ b/test/wwui/core_test.clj @@ -0,0 +1,7 @@ +(ns wwui.core-test + (:require [clojure.test :refer :all] + [wwui.core :refer :all])) + +(deftest a-test + (testing "FIXME, I fail." + (is (= 0 1)))) diff --git a/test/wwui/propositions_test.clj b/test/wwui/propositions_test.clj new file mode 100644 index 0000000..5729c60 --- /dev/null +++ b/test/wwui/propositions_test.clj @@ -0,0 +1,31 @@ +(ns wwui.propositions-test + (:require [clojure.test :refer :all] + [wwui.propositions :refer :all])) + +(deftest a-test + (testing "FIXME, I fail." + (is (= 0 1)))) + +(deftest "RDP tests" + (testing "Simplest constructs" + (is (= (recursive-descent-parser [] grammar :noun) nil)) + (is + (= + (recursive-descent-parser (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun) + '(((["Brutus" "NNP"]) :noun) ["killed" "VBD"] ["Caesar" "NNP"]))) + (is + (= + (recursive-descent-parser (pos-tag (tokenize "Brutus killed Caesar")) grammar :noun-phrase) + '((((["Brutus" "NNP"]) :noun) :nown-phrase) ["killed" "VBD"] ["Caesar" "NNP"]))) + (is + (= + (recursive-descent-parser (pos-tag (tokenize "The Forum")) grammar :noun-phrase) + (((["The" "DT"]["Forum" "NNP"]) :noun-phrase)))) + (is + (= + (recursive-descent-parser (pos-tag (tokenize "killed Caesar")) grammar :verb) + (((["killed" "VBN"]) :verb) ["Caesar" "NNP"]))) + (is + (= + (recursive-descent-parser (pos-tag (tokenize "in the Forum")) grammar :locator) + (((["in" "IN"]["the" "DT"]["Forum" "NNP"]) :locator) )))))