From 3315eef7b8a19f3443518183c8c1a3782f4902ae Mon Sep 17 00:00:00 2001
From: Simon Brooke <simon@journeyman.cc>
Date: Fri, 8 Nov 2013 11:11:30 +0000
Subject: [PATCH] Now correctly tokenises all punctuation.

---
 src/milkwood_clj/analyse.clj | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/milkwood_clj/analyse.clj b/src/milkwood_clj/analyse.clj
index 1320871..d12c09b 100644
--- a/src/milkwood_clj/analyse.clj
+++ b/src/milkwood_clj/analyse.clj
@@ -33,8 +33,6 @@
    ;; of the path.
    true (merge-rules rules (add-rule (rules (first path)) (rest path)))))
 
-;; (map (fn [string] (.toLowerCase string)) (re-seq #"\w+" (slurp "../milkwood/undermilkwood.txt")))
-
 (defn analyse-tokens
   "Read this sequence of tokens and process it into rules.
 
@@ -60,4 +58,4 @@
   file: the path name of a file to read;
   depth: the depth of rules/length of window we're considering"
   [file depth]
-  (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+" (slurp file))) depth))
+  (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+|\p{Punct}" (slurp file))) depth))