From 68fafdab990af68d1b41c41c65e8707dfa1d0a3a Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Fri, 8 Nov 2013 12:58:46 +0000 Subject: [PATCH] OK, there's a bug on the analyse side and I think it's in merge-rules. All the rules that ought to be being generated are being generated, but the rule tree returned by analyse-tokens is incomplete. I'm not yet certain what is wrong. --- README.md | 2 +- project.clj | 1 + src/milkwood_clj/analyse.clj | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fc3c0f8..0454a5d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ FIXME: listing of options this app accepts. ### Bugs -... +Not so much a bug, but as I've written this all as pure recursive functions it's vulnerable to stack exhaustion exceptions. I've specified extended stack size in the project file, but that won't be sufficient for analysing large texts. diff --git a/project.clj b/project.clj index 11fae01..cd06700 100644 --- a/project.clj +++ b/project.clj @@ -5,4 +5,5 @@ :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.5.1"]] :main milkwood-clj.core + :jvm-opts ["-Xss4m"] :profiles {:uberjar {:aot :all}}) diff --git a/src/milkwood_clj/analyse.clj b/src/milkwood_clj/analyse.clj index 502518d..013c923 100644 --- a/src/milkwood_clj/analyse.clj +++ b/src/milkwood_clj/analyse.clj @@ -23,6 +23,7 @@ rules: a rule tree (i.e. a recursively nested map token => rule-tree); path: a flat sequence of tokens." [rules path] + (prn "Rule: " path) (cond ;; if we have no more path, we're done. (empty? path) nil @@ -52,6 +53,7 @@ ;; else just continue without adding a rule. true (analyse-tokens rules rage (rest tokens) depth))))) + (defn analyse-file "Read this file and process it into rules. @@ -59,3 +61,4 @@ depth: the depth of rules/length of window we're considering" [file depth] (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'s|\w+|\p{Punct}" (slurp file))) depth)) +