Basically finished. Not capitalising initial words of sentences; not

printing output to an output file even if one is specified; not
honouring the help flag.
This commit is contained in:
Simon Brooke 2013-11-09 00:39:20 +00:00
parent 5141419efa
commit fffaf0cca0
6 changed files with 126 additions and 22 deletions

View file

@ -3,7 +3,8 @@
:url "http://example.com/FIXME" :url "http://example.com/FIXME"
:license {:name "Eclipse Public License" :license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"} :url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.5.1"]] :dependencies [[org.clojure/clojure "1.5.1"]
[org.clojure/tools.cli "0.2.4"]]
:main milkwood-clj.core :main milkwood-clj.core
:jvm-opts ["-Xss4m"] :jvm-opts ["-Xss4m"]
:profiles {:uberjar {:aot :all}}) :profiles {:uberjar {:aot :all}})

View file

@ -14,7 +14,12 @@
true (hash-map (first path) (compose-rule (rest path))))) true (hash-map (first path) (compose-rule (rest path)))))
(defn merge-rules [these those] (defn merge-rules
"Merge two rule trees.
these: a rule tree;
those: a rule tree."
[these those]
(utils/deep-merge-with set/union these those)) (utils/deep-merge-with set/union these those))
(defn add-rule (defn add-rule
@ -51,5 +56,4 @@
file: the path name of a file to read; file: the path name of a file to read;
depth: the depth of rules/length of window we're considering" depth: the depth of rules/length of window we're considering"
[file depth] [file depth]
(analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'s|\w+|\p{Punct}" (slurp file))) depth)) (analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'[st]|\w+|\p{Punct}" (slurp file))) depth))

View file

@ -1,18 +1,33 @@
(ns milkwood-clj.core (ns milkwood-clj.core
(require (require
[milkwood-clj.analyse :as analyse] [milkwood-clj.analyse :as analyse]
[milkwood-clj.synthesise :as sythesise] [milkwood-clj.synthesise :as synthesise]
[clojure.set :as set]) [clojure.set :as set])
(:use [clojure.tools.cli :only [cli]])
(:gen-class)) (:gen-class))
(defn -main (defn -main
"I don't do a whole lot ... yet." "Parse command line arguments and kick off the process."
[& args] [& args]
(println "Hello, World!")) (let [[arguments _ banner] (cli args ["-f" "--file" "The path name of the file to analyse (string)"]
["-l" "--output-length"
"The length in tokens of the output to generate (integer)"
:parse-fn #(Integer. %)
:default 100]
["-h" "--help" "Print this text and exit" :flag true]
["-o" "--output" "The path name of the file to write to, if any (string)"]
["-t" "--tuple-length"
"The length of the sequences to analyse it into (integer)"
:parse-fn #(Integer. %)
:default 2]) file (arguments :file)]
(cond
(= file nil) (print banner)
(arguments :help) (print banner)
true (synthesise/write-output
(synthesise/compose-nonsense
(analyse/analyse-file file (arguments :tuple-length))
(arguments :output-length))))
(prn "\n")))
;;;; read side - also probably a separate file
;;;; write side

View file

@ -4,6 +4,8 @@
(:gen-class)) (:gen-class))
(def end-magic-token "END")
(defn next-tokens (defn next-tokens
"Given these rules and this path, return a list of valid next tokens to emit. "Given these rules and this path, return a list of valid next tokens to emit.
@ -11,14 +13,87 @@
path: a flat sequence of tokens." path: a flat sequence of tokens."
[rules path] [rules path]
(cond (cond
(empty? (rest path)) (shuffle (keys (rules (first path)))) (empty? (rest path)) (keys (rules (first path)))
(empty? (rules (first path))) nil (empty? (rules (first path))) nil
true (next-tokens (rules (first path)) (rest path)))) true (next-tokens (rules (first path)) (rest path))))
(defn compose-nonsense (defn compose-prologue
[window token depth length] "Generate a prologue before entering the rule driven part of the synthesis
(cond process. The prologue needs to be as long as the rule-tree depth, to allow
(= length 0) nil it to be used as a glance back window.
) TODO: Should really generate something which is a gramatically legal start
of an English sentence.
) rules: a rule tree;
depth: the depth of that rule tree."
[rules depth]
(cond
(= depth 0) nil
true (let [token (first (shuffle (keys rules)))]
(cons token (compose-prologue (rules token) (- depth 1))))))
(defn compose-nonsense
"Try to compose this much nonsense given these rules and this glance-back window.
This is a rather messy hack around Clojure's curiously awkward rules on mutually
recursive functions. The second and third arities of this function (4 args and
5 args respectively) are in effect mutually recursive functions. The 4 args form
extends the output, the five args form hunts sideways among the options. The first,
two arg arity is simply a convenient entry point.
rules: a rule tree;
window: a glance back at the last tokens emitted;
depth: the target size of the glance back window, which should be one less than
the depth of the rule tree;
length: the number of tokens of nonsense which should be generated;
options: candidates to be the next token"
([rules length]
(compose-nonsense rules nil (- (utils/rule-tree-depth rules) 1) length))
([rules window depth length]
(let [window-size (count window) options (next-tokens rules window)]
(cond
;; if no more length, we're done. We need to return something non-null to indicate we haven't failed.
(= length 0) (list end-magic-token)
;; if we've no window, we need to choose one and continue.
(= window nil)
(let [prologue (compose-prologue rules depth)]
(flatten (list prologue (compose-nonsense rules prologue depth (- length depth)))))
;; if we've a window but no options, we're stuffed.
(= options nil) nil
true (compose-nonsense rules window depth (- length 1) (shuffle options)))))
([rules window depth length options]
(cond
;; if I've run out of options, I've failed.
(empty? options) nil
;; if I still have some options...
true
;; try the first of them...
(let [nonsense (compose-nonsense rules (utils/slide-window window (first options) depth) depth length)]
(cond
;; if that fails, try the others...
(empty? nonsense) (compose-nonsense rules window depth length (rest options))
;; but if it succeeds we're good.
true (cons (first options) nonsense))
))))
(defn write-token
[token]
"Write a single token to the output, doing some basic orthographic tricks.
token: the token to write."
(cond
(= token end-magic-token) nil
(re-find #"^[.!?]$" token) (do (print token) (cond (= (rand-int 5) 0) (print "\n\n")))
(re-find #"^\p{Punct}$" token) (print token)
true (print (str " " token))))
(defn write-output
"Write this output, doing little orthographic tricks to make it look superficially
like real English text.
TODO: does not yet work. Should take an optional second argument,
the file to write to if any (default to standard out).
output: a sequence of tokens to write."
[output]
(dorun (map write-token output)))

View file

@ -3,7 +3,16 @@
[clojure.set :as set]) [clojure.set :as set])
(:gen-class)) (:gen-class))
;;;; utilities - probably in the fullness of time a separate file (defn rule-tree-depth
"Return the depth of this rule tree.
rules: a rule tree.
NOTE: it is possible that there might be some short rules in a rule tree, so this
implementation is not entirely safe."
[rules]
(cond
(empty? rules) 0
true (+ 1 (apply min (map (fn [key] (rule-tree-depth (rules key))) (keys rules))))))
(defn slide-window (defn slide-window

View file

@ -2,6 +2,6 @@
(:require [clojure.test :refer :all] (:require [clojure.test :refer :all]
[milkwood-clj.core :refer :all])) [milkwood-clj.core :refer :all]))
(deftest a-test ;;(deftest a-test
(testing "Nothing worth testing in core, yet." ;; (testing "Nothing worth testing in core, yet."
(is (= 1 1)))) ;; (is (= 1 1))))