From a876cb6d1b7cb41dac963c2e920d9b49a94b79f0 Mon Sep 17 00:00:00 2001
From: Simon Brooke <simon@journeyman.cc>
Date: Thu, 31 Oct 2013 11:32:11 +0000
Subject: [PATCH] All working very beautifully.

---
 README.txt                                   |   5 +
 src/cc/journeyman/milkwood/Composer.java     | 168 ++++++++-----------
 src/cc/journeyman/milkwood/Milkwood.java     |  32 ++--
 src/cc/journeyman/milkwood/RuleTreeNode.java |  25 ++-
 src/cc/journeyman/milkwood/Tokeniser.java    |   7 +
 src/cc/journeyman/milkwood/WordStack.java    |  57 +++++++
 src/cc/journeyman/milkwood/Writer.java       |  18 +-
 7 files changed, 195 insertions(+), 117 deletions(-)
 create mode 100644 src/cc/journeyman/milkwood/WordStack.java
diff --git a/README.txt b/README.txt
index 163facc..b2c7bc2 100644
--- a/README.txt
+++ b/README.txt
@@ -76,4 +76,9 @@ Decluttered the TextGenerator class by moving the whole read stage into two new
 
 Right, fully decluttered, All bugs(!) are in new class Composer. I have a little Liszt...
 
+Parsing word tuples for n > 2 working sweetly. That is not the problem!
+
+Major refactoring and cleanup of the compose stage...
+
+ye! Utuvienyes
 
diff --git a/src/cc/journeyman/milkwood/Composer.java b/src/cc/journeyman/milkwood/Composer.java
index 068b5f5..c69f863 100644
--- a/src/cc/journeyman/milkwood/Composer.java
+++ b/src/cc/journeyman/milkwood/Composer.java
@@ -1,7 +1,7 @@
 package cc.journeyman.milkwood;
 
 import java.util.Collection;
-import java.util.Stack;
+import java.util.Collections;
 
 /**
  * Composes text output based on a rule tree.
@@ -27,23 +27,29 @@ public class Composer {
 	/**
 	 * Recursive, backtracking, output generator.
 	 * 
-	 * @param rules
-	 * @param tupleLength
-	 * @param length
-	 * @return
+	 * @param rules the rule set we're working to.
+	 * @param length the number of tokens still to be output.
+	 * @return if a successful path forward is found, that path, else null.
 	 */
-	public WordSequence compose(RuleTreeNode rules, int tupleLength, int length) {
-		Stack<String> preamble = composePreamble(rules);
+	public WordSequence compose(RuleTreeNode rules, int length) {
+		WordStack preamble = composePreamble(rules);
 		WordSequence result = new WordSequence();
 
 		// composing the preamble will have ended with *ROOT* on top of the
 		// stack;
 		// get rid of it.
 		preamble.pop();
+		
+		if (debug) {
+			System.err.println( "Preamble: " + preamble);
+		}
 
 		result.addAll(preamble);
-
-		result.addAll(this.compose(preamble, rules, rules, tupleLength, length));
+		
+		WordStack body = this.compose(preamble, rules, length);
+		Collections.reverse(body);
+		result.addAll(body);
+		
 		return result;
 	}
 
@@ -51,103 +57,63 @@ public class Composer {
 	 * Recursively attempt to find sequences in the ruleset to append to what's
 	 * been composed so far.
 	 * 
-	 * @param glanceBack
-	 * @param allRules
-	 * @param currentRules
-	 * @param tupleLength
-	 * @param length
-	 * @return
+	 * @param glanceBack the last few words output.
+	 * @param rules the rule set we're working to.
+	 * @param length the number of tokens still to be output.
+	 * @return if a successful path forward is found, that path, else null.
 	 */
-	private WordSequence compose(Stack<String> glanceBack,
-			RuleTreeNode allRules, RuleTreeNode currentRules, int tupleLength,
+	private WordStack compose(WordStack glanceBack, RuleTreeNode rules,
 			int length) {
-		assert (glanceBack.size() == tupleLength) : "Shouldn't happen: bad tuple size";
-		assert (allRules.getWord() == RuleTreeNode.ROOTMAGICTOKEN) : "Shoudn't happen: bad rule set";
-		WordSequence result;
-
-		try {
-			@SuppressWarnings("unchecked")
-			String here = currentRules.getWord((Stack<String>) glanceBack
-					.clone());
-			System.err.println(String.format("Trying token %s", here));
-
-			result = new WordSequence();
-			result.add(here);
-
-			if (length != 0) {
-				/* we're not done yet */
-				Collection<String> options = allRules.getSuccessors();
-
-				for (String next : options) {
-					@SuppressWarnings("unchecked")
-					WordSequence rest = this
-							.tryOption((Stack<String>) glanceBack.clone(),
-									allRules, currentRules.getRule(next),
-									tupleLength, length - 1);
-
-					if (rest != null) {
-						/* we have a solution */
-						result.addAll(rest);
-						break;
-					}
-				}
-			}
-		} catch (NoSuchPathException ex) {
-			if (debug) {
-				System.err.println(String.format("No path %s: Backtracking...",
-						glanceBack));
-			}
-			result = null;
+		final WordStack result;
+		
+		if ( debug) {
+			System.err.println( String.format( "%d: %s", length, glanceBack));
 		}
 
-		return result;
-	}
-
-	/**
-	 * Try composing with this ruleset
-	 * 
-	 * @param glanceBack
-	 * @param allRules
-	 *            all the rules there are.
-	 * @param currentRules
-	 *            the current node in the rule tree.
-	 * @param tupleLength
-	 *            the size of the glanceback window we're considering.
-	 * @param length
-	 * @return
-	 */
-	private WordSequence tryOption(Stack<String> glanceBack,
-			RuleTreeNode allRules, RuleTreeNode currentRules, int tupleLength,
-			int length) {
-		final Stack<String> restack = this.restack(glanceBack,
-				currentRules.getWord());
-		restack.pop();
-		return this.compose(restack, allRules, currentRules, tupleLength,
-				length);
-	}
-
-	/**
-	 * Return a new stack comprising all the items on the current stack, with
-	 * this new string added at the bottom
-	 * 
-	 * @param stack
-	 *            the stack to restack.
-	 * @param bottom
-	 *            the item to place on the bottom.
-	 * @return the restacked stack.
-	 */
-	private Stack<String> restack(Stack<String> stack, String bottom) {
-		final Stack<String> result;
-		if (stack.isEmpty()) {
-			result = new Stack<String>();
-			result.push(bottom);
+		/* are we there yet? */
+		if (length == 0) {
+			result = new WordStack(); 
 		} else {
-			String top = stack.pop();
-			result = restack(stack, bottom);
-			result.push(top);
+			/*
+			 * are there any rules in this ruleset which matches the current
+			 * sliding window? if so, then recurse; if not, then fail.
+			 */
+			Collection<String> words = rules.match(glanceBack.duplicate());
+
+			if (words.isEmpty()) {
+				/* backtrack */
+				result = null;
+			} else {
+				result = tryOptions(words, glanceBack, rules, length);
+			}
 		}
 		return result;
 	}
+	
+	/**
+	 * Try each of these candidates in turn, attempting to recurse.
+	 * @param candidates words which could potentially be added to the output.
+	 * @param glanceBack the last few words output.
+	 * @param allRules the rule set we're working to.
+	 * @param length the number of tokens still to be output.
+	 * @return if a successful path forward is found, that path, else null.
+	 */
+	private WordStack tryOptions(Collection<String> candidates,
+			WordStack glanceBack, RuleTreeNode allRules, int length) {
+		WordStack result = null;
+		
+		for ( String candidate : candidates) {
+			result = compose( new WordStack(glanceBack, candidate), allRules, length - 1);
+			if ( result != null) {
+				/* by Jove, I think she's got it! */
+				result.push(candidate);
+				break;
+			}
+		}
+		
+		return result;
+	}
+
 
 	/**
 	 * Random walk of the rule tree to extract (from the root) a legal sequence
@@ -157,12 +123,12 @@ public class Composer {
 	 *            the rule tree (fragment) to walk.
 	 * @return a sequence of words.
 	 */
-	private Stack<String> composePreamble(RuleTreeNode rules) {
-		final Stack<String> result;
+	private WordStack composePreamble(RuleTreeNode rules) {
+		final WordStack result;
 		final RuleTreeNode successor = rules.getRule();
 
 		if (successor == null) {
-			result = new Stack<String>();
+			result = new WordStack();
 		} else {
 			result = this.composePreamble(successor);
 			result.push(rules.getWord());
diff --git a/src/cc/journeyman/milkwood/Milkwood.java b/src/cc/journeyman/milkwood/Milkwood.java
index d09947f..19011b7 100644
--- a/src/cc/journeyman/milkwood/Milkwood.java
+++ b/src/cc/journeyman/milkwood/Milkwood.java
@@ -31,13 +31,15 @@ public class Milkwood {
 	 * <dl>
 	 * <dt>-d, -debug</dt>
 	 * <dd>Print debugging output to standard error</dd>
-	 * <dt>-i, -input</dt>
+	 * <dt>-i [FILE], -input [FILE]</dt>
 	 * <dd>Input file, expected to be an English (or, frankly, other natural
 	 * language) text. Defaults to standard in.</dd>
-	 * <dt>-n, -tuple-length</dt>
-	 * <dd>The length of tuples into which the file will be analised, default 2.
+	 * <dt>-l [NN], -length [NN]</dt>
+	 * <dd>The length in tuples of the desired output. Defaults to 100.
+	 * <dt>-n [NN], -tuple-length [NN]</dt>
+	 * <dd>The length of tuples into which the file will be analysed, default 2.
 	 * </dd>
-	 * <dt>-o, -output</dt>
+	 * <dt>-o [FILE], -output [FILE]</dt>
 	 * <dd>Output file, to which generated text will be written. Defaults to
 	 * standard out.</dd>
 	 * </dl>
@@ -55,6 +57,7 @@ public class Milkwood {
 		OutputStream out = System.out;
 		int tupleLength = 2;
 		boolean debug = false;
+		int length = 100;
 
 		for (int cursor = 0; cursor < args.length; cursor++) {
 			String arg = args[cursor];
@@ -71,6 +74,9 @@ public class Milkwood {
 				case 'o': // output
 					out = new FileOutputStream(new File(args[++cursor]));
 					break;
+				case 'l': // length
+					length = Integer.parseInt(args[++cursor]);
+					break;
 				case 'n':
 				case 't': // tuple length
 					tupleLength = Integer.parseInt(args[++cursor]);
@@ -82,7 +88,7 @@ public class Milkwood {
 			}
 		}
 		try {
-			new Milkwood().readAndGenerate(in, out, tupleLength, debug);
+			new Milkwood().readAndGenerate(in, out, tupleLength, length, debug);
 		} finally {
 			out.close();
 		}
@@ -97,6 +103,8 @@ public class Milkwood {
 	 *            the output stream to write to.
 	 * @param tupleLength
 	 *            the length of tuples to be used in generation.
+	 * @param length
+	 *            the length in tokens of the output to be generated.
 	 * @param debug
 	 *            whether to print debugging output.
 	 * @throws IOException
@@ -104,14 +112,19 @@ public class Milkwood {
 	 *             scheme of things, very likely.
 	 */
 	void readAndGenerate(final InputStream in, final OutputStream out,
-			final int tupleLength, boolean debug) throws IOException {
+			final int tupleLength, int length, boolean debug)
+			throws IOException {
 		/* The root of the rule tree I shall build. */
 		RuleTreeNode root = new RuleTreeNode();
-		int length = read(in, tupleLength, debug, root);
+		read(in, tupleLength, debug, root);
 
 		WordSequence tokens = compose(tupleLength, debug, root, length);
 
 		write(out, debug, tokens);
+		
+		if ( debug) {
+			System.err.println( "\n\nCompleted.");
+		}
 	}
 
 	/**
@@ -142,8 +155,7 @@ public class Milkwood {
 
 	private WordSequence compose(final int tupleLength, boolean debug,
 			RuleTreeNode root, int length) {
-		WordSequence tokens = new Composer(debug).compose(root, tupleLength,
-				length);
+		WordSequence tokens = new Composer(debug).compose(root, length);
 
 		if (tokens.contains(PERIOD)) {
 			tokens = tokens.truncateAtLastInstance(PERIOD);
@@ -168,7 +180,7 @@ public class Milkwood {
 			WordSequence tokens) throws IOException {
 		Writer scrivenor = new Writer(out, debug);
 		try {
-			scrivenor.generate(tokens);
+			scrivenor.writeSequence(tokens);
 		} finally {
 			scrivenor.close();
 		}
diff --git a/src/cc/journeyman/milkwood/RuleTreeNode.java b/src/cc/journeyman/milkwood/RuleTreeNode.java
index 144fb2c..413b9b4 100644
--- a/src/cc/journeyman/milkwood/RuleTreeNode.java
+++ b/src/cc/journeyman/milkwood/RuleTreeNode.java
@@ -170,7 +170,7 @@ public class RuleTreeNode {
             final RuleTreeNode successor = this.getRule(path.pop());
             
             if (successor == null) {
-                throw new NoSuchPathException();
+                result = null;
             } else {
                 result = successor.getWord(path);
             }
@@ -178,4 +178,27 @@ public class RuleTreeNode {
         
         return result;
     }
+
+    /**
+     * Find all the terminal strings in the current rule set which would match this path.
+     * @param path the path to match
+     * @return a collection (possibly empty) of potential successors.
+     */
+	public Collection<String> match(WordStack path) {
+		final Collection<String> result;
+		
+        if ( path.isEmpty()) {
+            result = this.getSuccessors();
+        } else {
+            final RuleTreeNode successor = this.getRule(path.pop());
+            
+            if (successor == null) {
+                result = new ArrayList<String>();
+            } else {
+                result = successor.match(path);
+            }
+        }
+		
+		return result;
+	}
 }
diff --git a/src/cc/journeyman/milkwood/Tokeniser.java b/src/cc/journeyman/milkwood/Tokeniser.java
index 86a279c..7ce945b 100644
--- a/src/cc/journeyman/milkwood/Tokeniser.java
+++ b/src/cc/journeyman/milkwood/Tokeniser.java
@@ -36,6 +36,13 @@ public class Tokeniser extends StreamTokenizer {
 		 */
 		this.whitespaceChars((int) '\"', (int) '\"');
 		this.whitespaceChars((int) '\'', (int) '\'');
+		/*
+		 * treat underscore and hyphen as whitespace as well. Again, hyphen with
+		 * either leading or trailing non-whitespace probably ought to be
+		 * treated specially, but...
+		 */
+		this.whitespaceChars((int) '_', (int) '_');
+		this.whitespaceChars((int) '-', (int) '-');
 		this.wordChars((int) '0', (int) '9');
 		this.wordChars((int) 'A', (int) 'Z');
 		this.wordChars((int) 'a', (int) 'z');
diff --git a/src/cc/journeyman/milkwood/WordStack.java b/src/cc/journeyman/milkwood/WordStack.java
new file mode 100644
index 0000000..9760182
--- /dev/null
+++ b/src/cc/journeyman/milkwood/WordStack.java
@@ -0,0 +1,57 @@
+package cc.journeyman.milkwood;
+
+import java.util.Stack;
+
+/**
+ * Sliding window which rules may match.
+ * 
+ * @author simon
+ * 
+ */
+public class WordStack extends Stack<String> {
+
+	private static final long serialVersionUID = 1L;
+
+	/**
+	 * Create a new, empty, wordstack.
+	 */
+	public WordStack() {
+		super();
+	}
+	
+	/**
+	 * create a new window from this window, having this new word as its
+	 * terminal and ommitting the current first word. That is, the new window
+	 * should be as long as the old, with each word shuffled up one place.
+	 * 
+	 * @param prototype the window to copy from.
+	 * @param terminal the new terminal word.
+	 */
+	public WordStack(WordStack prototype, String terminal) {
+		this();
+
+		WordStack copy = prototype.duplicate();
+		copy.pop();
+		this.populate( copy, terminal);
+	}
+
+	private void populate(WordStack copy, String terminal) {
+		if ( copy.isEmpty()) {
+			this.push(terminal);
+		} else {
+			String token = copy.pop();
+			this.populate(copy, terminal);
+			this.push( token);
+		}
+	}
+
+	/**
+	 * A wrapper round clone which hides all the ugly casting.
+	 * 
+	 * @return a duplicate copy of myself.
+	 */
+	public WordStack duplicate() {
+		return (WordStack) this.clone();
+	}
+
+}
diff --git a/src/cc/journeyman/milkwood/Writer.java b/src/cc/journeyman/milkwood/Writer.java
index 75467ab..527a342 100644
--- a/src/cc/journeyman/milkwood/Writer.java
+++ b/src/cc/journeyman/milkwood/Writer.java
@@ -14,6 +14,8 @@ import java.util.Locale;
 import java.util.Random;
 
 /**
+ * A special purpose writer to write sequences of tokens, chopping them up into
+ * paragraphs on the fly..
  * 
  * @author Simon Brooke <simon@journeyman.cc>
  */
@@ -59,7 +61,7 @@ class Writer extends BufferedWriter {
 	 * @throws IOException
 	 *             if it is impossible to write (e.g. file system full).
 	 */
-	public void generate(WordSequence tokens) throws IOException {
+	public void writeSequence(WordSequence tokens) throws IOException {
 		boolean capitaliseNext = true;
 
 		try {
@@ -113,7 +115,11 @@ class Writer extends BufferedWriter {
 	private boolean spaceBefore(String token) {
 		final boolean result;
 
-		if (token.length() == 1) {
+		switch (token.length()) {
+		case 0:
+			result = false;
+			break;
+		case 1:
 			switch (token.charAt(0)) {
 			case '.':
 			case ',':
@@ -135,8 +141,9 @@ class Writer extends BufferedWriter {
 				result = true;
 				break;
 			}
-		} else {
-			result = false;
+			break;
+		default:
+			result = true;
 		}
 
 		return result;
@@ -155,7 +162,8 @@ class Writer extends BufferedWriter {
 	 *             if Mr this has run out of ink
 	 */
 	private void maybeParagraph(String token) throws IOException {
-		if (token.endsWith(Milkwood.PERIOD) && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
+		if (token.endsWith(Milkwood.PERIOD)
+				&& RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
 			this.write("\n\n");
 		}
 	}