diff --git a/src/milkwood_clj/analyse.clj b/src/milkwood_clj/analyse.clj index 1320871..d12c09b 100644 --- a/src/milkwood_clj/analyse.clj +++ b/src/milkwood_clj/analyse.clj @@ -33,8 +33,6 @@ ;; of the path. true (merge-rules rules (add-rule (rules (first path)) (rest path))))) -;; (map (fn [string] (.toLowerCase string)) (re-seq #"\w+" (slurp "../milkwood/undermilkwood.txt"))) - (defn analyse-tokens "Read this sequence of tokens and process it into rules. @@ -60,4 +58,4 @@ file: the path name of a file to read; depth: the depth of rules/length of window we're considering" [file depth] - (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+" (slurp file))) depth)) + (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+|\p{Punct}" (slurp file))) depth))