Basically finished. Not capitalising initial words of sentences; not
printing output to an output file even if one is specified; not honouring the help flag.
This commit is contained in:
parent
5141419efa
commit
fffaf0cca0
|
@ -3,7 +3,8 @@
|
|||
:url "http://example.com/FIXME"
|
||||
:license {:name "Eclipse Public License"
|
||||
:url "http://www.eclipse.org/legal/epl-v10.html"}
|
||||
:dependencies [[org.clojure/clojure "1.5.1"]]
|
||||
:dependencies [[org.clojure/clojure "1.5.1"]
|
||||
[org.clojure/tools.cli "0.2.4"]]
|
||||
:main milkwood-clj.core
|
||||
:jvm-opts ["-Xss4m"]
|
||||
:profiles {:uberjar {:aot :all}})
|
||||
|
|
|
@ -14,7 +14,12 @@
|
|||
true (hash-map (first path) (compose-rule (rest path)))))
|
||||
|
||||
|
||||
(defn merge-rules [these those]
|
||||
(defn merge-rules
|
||||
"Merge two rule trees.
|
||||
|
||||
these: a rule tree;
|
||||
those: a rule tree."
|
||||
[these those]
|
||||
(utils/deep-merge-with set/union these those))
|
||||
|
||||
(defn add-rule
|
||||
|
@ -51,5 +56,4 @@
|
|||
file: the path name of a file to read;
|
||||
depth: the depth of rules/length of window we're considering"
|
||||
[file depth]
|
||||
(analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'s|\w+|\p{Punct}" (slurp file))) depth))
|
||||
|
||||
(analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'[st]|\w+|\p{Punct}" (slurp file))) depth))
|
||||
|
|
|
@ -1,18 +1,33 @@
|
|||
(ns milkwood-clj.core
|
||||
(require
|
||||
[milkwood-clj.analyse :as analyse]
|
||||
[milkwood-clj.synthesise :as sythesise]
|
||||
[milkwood-clj.synthesise :as synthesise]
|
||||
[clojure.set :as set])
|
||||
(:use [clojure.tools.cli :only [cli]])
|
||||
(:gen-class))
|
||||
|
||||
|
||||
(defn -main
|
||||
"I don't do a whole lot ... yet."
|
||||
"Parse command line arguments and kick off the process."
|
||||
[& args]
|
||||
(println "Hello, World!"))
|
||||
(let [[arguments _ banner] (cli args ["-f" "--file" "The path name of the file to analyse (string)"]
|
||||
["-l" "--output-length"
|
||||
"The length in tokens of the output to generate (integer)"
|
||||
:parse-fn #(Integer. %)
|
||||
:default 100]
|
||||
["-h" "--help" "Print this text and exit" :flag true]
|
||||
["-o" "--output" "The path name of the file to write to, if any (string)"]
|
||||
["-t" "--tuple-length"
|
||||
"The length of the sequences to analyse it into (integer)"
|
||||
:parse-fn #(Integer. %)
|
||||
:default 2]) file (arguments :file)]
|
||||
(cond
|
||||
(= file nil) (print banner)
|
||||
(arguments :help) (print banner)
|
||||
true (synthesise/write-output
|
||||
(synthesise/compose-nonsense
|
||||
(analyse/analyse-file file (arguments :tuple-length))
|
||||
(arguments :output-length))))
|
||||
(prn "\n")))
|
||||
|
||||
|
||||
;;;; read side - also probably a separate file
|
||||
|
||||
|
||||
;;;; write side
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
(:gen-class))
|
||||
|
||||
|
||||
(def end-magic-token "END")
|
||||
|
||||
(defn next-tokens
|
||||
"Given these rules and this path, return a list of valid next tokens to emit.
|
||||
|
||||
|
@ -11,14 +13,87 @@
|
|||
path: a flat sequence of tokens."
|
||||
[rules path]
|
||||
(cond
|
||||
(empty? (rest path)) (shuffle (keys (rules (first path))))
|
||||
(empty? (rest path)) (keys (rules (first path)))
|
||||
(empty? (rules (first path))) nil
|
||||
true (next-tokens (rules (first path)) (rest path))))
|
||||
|
||||
(defn compose-nonsense
|
||||
[window token depth length]
|
||||
(cond
|
||||
(= length 0) nil
|
||||
)
|
||||
(defn compose-prologue
|
||||
"Generate a prologue before entering the rule driven part of the synthesis
|
||||
process. The prologue needs to be as long as the rule-tree depth, to allow
|
||||
it to be used as a glance back window.
|
||||
TODO: Should really generate something which is a gramatically legal start
|
||||
of an English sentence.
|
||||
|
||||
)
|
||||
rules: a rule tree;
|
||||
depth: the depth of that rule tree."
|
||||
[rules depth]
|
||||
(cond
|
||||
(= depth 0) nil
|
||||
true (let [token (first (shuffle (keys rules)))]
|
||||
(cons token (compose-prologue (rules token) (- depth 1))))))
|
||||
|
||||
|
||||
|
||||
(defn compose-nonsense
|
||||
"Try to compose this much nonsense given these rules and this glance-back window.
|
||||
This is a rather messy hack around Clojure's curiously awkward rules on mutually
|
||||
recursive functions. The second and third arities of this function (4 args and
|
||||
5 args respectively) are in effect mutually recursive functions. The 4 args form
|
||||
extends the output, the five args form hunts sideways among the options. The first,
|
||||
two arg arity is simply a convenient entry point.
|
||||
|
||||
rules: a rule tree;
|
||||
window: a glance back at the last tokens emitted;
|
||||
depth: the target size of the glance back window, which should be one less than
|
||||
the depth of the rule tree;
|
||||
length: the number of tokens of nonsense which should be generated;
|
||||
options: candidates to be the next token"
|
||||
([rules length]
|
||||
(compose-nonsense rules nil (- (utils/rule-tree-depth rules) 1) length))
|
||||
([rules window depth length]
|
||||
(let [window-size (count window) options (next-tokens rules window)]
|
||||
(cond
|
||||
;; if no more length, we're done. We need to return something non-null to indicate we haven't failed.
|
||||
(= length 0) (list end-magic-token)
|
||||
;; if we've no window, we need to choose one and continue.
|
||||
(= window nil)
|
||||
(let [prologue (compose-prologue rules depth)]
|
||||
(flatten (list prologue (compose-nonsense rules prologue depth (- length depth)))))
|
||||
;; if we've a window but no options, we're stuffed.
|
||||
(= options nil) nil
|
||||
true (compose-nonsense rules window depth (- length 1) (shuffle options)))))
|
||||
([rules window depth length options]
|
||||
(cond
|
||||
;; if I've run out of options, I've failed.
|
||||
(empty? options) nil
|
||||
;; if I still have some options...
|
||||
true
|
||||
;; try the first of them...
|
||||
(let [nonsense (compose-nonsense rules (utils/slide-window window (first options) depth) depth length)]
|
||||
(cond
|
||||
;; if that fails, try the others...
|
||||
(empty? nonsense) (compose-nonsense rules window depth length (rest options))
|
||||
;; but if it succeeds we're good.
|
||||
true (cons (first options) nonsense))
|
||||
))))
|
||||
|
||||
(defn write-token
|
||||
[token]
|
||||
"Write a single token to the output, doing some basic orthographic tricks.
|
||||
|
||||
token: the token to write."
|
||||
(cond
|
||||
(= token end-magic-token) nil
|
||||
(re-find #"^[.!?]$" token) (do (print token) (cond (= (rand-int 5) 0) (print "\n\n")))
|
||||
(re-find #"^\p{Punct}$" token) (print token)
|
||||
true (print (str " " token))))
|
||||
|
||||
(defn write-output
|
||||
"Write this output, doing little orthographic tricks to make it look superficially
|
||||
like real English text.
|
||||
TODO: does not yet work. Should take an optional second argument,
|
||||
the file to write to if any (default to standard out).
|
||||
|
||||
output: a sequence of tokens to write."
|
||||
[output]
|
||||
(dorun (map write-token output)))
|
||||
|
|
|
@ -3,7 +3,16 @@
|
|||
[clojure.set :as set])
|
||||
(:gen-class))
|
||||
|
||||
;;;; utilities - probably in the fullness of time a separate file
|
||||
(defn rule-tree-depth
|
||||
"Return the depth of this rule tree.
|
||||
|
||||
rules: a rule tree.
|
||||
NOTE: it is possible that there might be some short rules in a rule tree, so this
|
||||
implementation is not entirely safe."
|
||||
[rules]
|
||||
(cond
|
||||
(empty? rules) 0
|
||||
true (+ 1 (apply min (map (fn [key] (rule-tree-depth (rules key))) (keys rules))))))
|
||||
|
||||
|
||||
(defn slide-window
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
(:require [clojure.test :refer :all]
|
||||
[milkwood-clj.core :refer :all]))
|
||||
|
||||
(deftest a-test
|
||||
(testing "Nothing worth testing in core, yet."
|
||||
(is (= 1 1))))
|
||||
;;(deftest a-test
|
||||
;; (testing "Nothing worth testing in core, yet."
|
||||
;; (is (= 1 1))))
|
||||
|
|
Loading…
Reference in a new issue