diff --git a/project.clj b/project.clj index 4f1709e..ab96e46 100644 --- a/project.clj +++ b/project.clj @@ -1,10 +1,10 @@ -(defproject milkwood-clj "0.1.0-SNAPSHOT" - :description "Reimplementation of the Milkwood rule driven nonsense generator in Clojure" +(defproject milkwood-clj "0.1.0" + :description "Rule driven nonsense generator in Clojure" :url "http://example.com/FIXME" :license {:name "Eclipse Public License" :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.5.1"] [org.clojure/tools.cli "0.2.4"]] - :main milkwood-clj.core :jvm-opts ["-Xss4m"] - :profiles {:uberjar {:aot :all}}) + :profiles {:uberjar + {:main milkwood-clj.core :aot :all}}) diff --git a/src/milkwood_clj/analyse.clj b/src/milkwood_clj/analyse.clj index cfbbf85..41f7e02 100644 --- a/src/milkwood_clj/analyse.clj +++ b/src/milkwood_clj/analyse.clj @@ -4,6 +4,12 @@ [clojure.set :as set]) (:gen-class)) +(def token-pattern + "Regular expression used to split input into tokens." +;; #"\w+\'[stdm]|\w+|\p{Punct}" + #"\w+['-]\w+|\w+|\p{Punct}" + ) + (defn compose-rule "Compose a new rule tree (containing (obviously) only one rule) from this path. @@ -57,6 +63,4 @@ depth: the depth of rules/length of window we're considering" [file depth] (analyse-tokens nil nil - (map - (fn [string] (.toLowerCase string)) - (re-seq #"\w+\'[stdm]|\w+|\p{Punct}" (slurp file))) depth)) + (re-seq token-pattern (slurp file)) depth)) diff --git a/src/milkwood_clj/core.clj b/src/milkwood_clj/core.clj index 0f997b8..0138fbd 100644 --- a/src/milkwood_clj/core.clj +++ b/src/milkwood_clj/core.clj @@ -10,7 +10,10 @@ (defn -main "Parse command line arguments and kick off the process." [& args] - (let [[arguments _ banner] (cli args ["-f" "--file" "The path name of the file to analyse (string)"] + (let [[arguments _ banner] (cli args + "Rule driven nonsense generator. +See http://codekata.pragprog.com/2007/01/kata_fourteen_t.html" + ["-f" "--file" "The path name of the file to analyse (string)"] ["-l" "--output-length" "The length in tokens of the output to generate (integer)" :parse-fn #(Integer. %) diff --git a/src/milkwood_clj/synthesise.clj b/src/milkwood_clj/synthesise.clj index b503913..cb25510 100644 --- a/src/milkwood_clj/synthesise.clj +++ b/src/milkwood_clj/synthesise.clj @@ -4,7 +4,10 @@ (:gen-class)) -(def end-magic-token "END") +(def end-magic-token + "A token to mark the end of the generated test, used to + distinguish completion from failure." + "ENDMAGICTOKEN") (defn next-tokens "Given these rules and this path, return a list of valid next tokens to emit.