diff --git a/README.md b/README.md index fc3c0f8..0454a5d 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ FIXME: listing of options this app accepts. ### Bugs -... +Not so much a bug, but as I've written this all as pure recursive functions it's vulnerable to stack exhaustion exceptions. I've specified extended stack size in the project file, but that won't be sufficient for analysing large texts. diff --git a/project.clj b/project.clj index 11fae01..cd06700 100644 --- a/project.clj +++ b/project.clj @@ -5,4 +5,5 @@ :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.5.1"]] :main milkwood-clj.core + :jvm-opts ["-Xss4m"] :profiles {:uberjar {:aot :all}}) diff --git a/src/milkwood_clj/analyse.clj b/src/milkwood_clj/analyse.clj index 502518d..013c923 100644 --- a/src/milkwood_clj/analyse.clj +++ b/src/milkwood_clj/analyse.clj @@ -23,6 +23,7 @@ rules: a rule tree (i.e. a recursively nested map token => rule-tree); path: a flat sequence of tokens." [rules path] + (prn "Rule: " path) (cond ;; if we have no more path, we're done. (empty? path) nil @@ -52,6 +53,7 @@ ;; else just continue without adding a rule. true (analyse-tokens rules rage (rest tokens) depth))))) + (defn analyse-file "Read this file and process it into rules. @@ -59,3 +61,4 @@ depth: the depth of rules/length of window we're considering" [file depth] (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'s|\w+|\p{Punct}" (slurp file))) depth)) +