All working very beautifully.

2013-10-31 11:32:11 +00:00 · 2013-10-31 11:32:11 +00:00 · a876cb6d1b
parent 0012a72e3f
commit a876cb6d1b
7 changed files with 195 additions and 117 deletions
--- a/README.txt
+++ b/README.txt
@ -76,4 +76,9 @@ Decluttered the TextGenerator class by moving the whole read stage into two new
 Right, fully decluttered, All bugs(!) are in new class Composer. I have a little Liszt...
 Parsing word tuples for n > 2 working sweetly. That is not the problem!
 Major refactoring and cleanup of the compose stage...
 ye! Utuvienyes
--- a/src/cc/journeyman/milkwood/Composer.java
+++ b/src/cc/journeyman/milkwood/Composer.java
@ -1,7 +1,7 @@
 package cc.journeyman.milkwood;
 import java.util.Collection;
-import java.util.Stack;
+import java.util.Collections;
 /**
 * Composes text output based on a rule tree.
@ -27,13 +27,12 @@ public class Composer {
 	/**
 	 * Recursive, backtracking, output generator.
 	 * 
-	 * @param rules
+	 * @param rules the rule set we're working to.
-	 * @param tupleLength
+	 * @param length the number of tokens still to be output.
-	 * @param length
+	 * @return if a successful path forward is found, that path, else null.
 	 * @return
 	 */
-	public WordSequence compose(RuleTreeNode rules, int tupleLength, int length) {
+	public WordSequence compose(RuleTreeNode rules, int length) {
-		Stack<String> preamble = composePreamble(rules);
+		WordStack preamble = composePreamble(rules);
 		WordSequence result = new WordSequence();
 		// composing the preamble will have ended with *ROOT* on top of the
@ -41,9 +40,16 @@ public class Composer {
 		// get rid of it.
 		preamble.pop();
 		if (debug) {
 			System.err.println( "Preamble: " + preamble);
 		}
 		result.addAll(preamble);
-		result.addAll(this.compose(preamble, rules, rules, tupleLength, length));
+		WordStack body = this.compose(preamble, rules, length);
 		Collections.reverse(body);
 		result.addAll(body);
 		return result;
 	}
@ -51,103 +57,63 @@ public class Composer {
 	 * Recursively attempt to find sequences in the ruleset to append to what's
 	 * been composed so far.
 	 * 
-	 * @param glanceBack
+	 * @param glanceBack the last few words output.
-	 * @param allRules
+	 * @param rules the rule set we're working to.
-	 * @param currentRules
+	 * @param length the number of tokens still to be output.
-	 * @param tupleLength
+	 * @return if a successful path forward is found, that path, else null.
 	 * @param length
 	 * @return
 	 */
-	private WordSequence compose(Stack<String> glanceBack,
+	private WordStack compose(WordStack glanceBack, RuleTreeNode rules,
 			RuleTreeNode allRules, RuleTreeNode currentRules, int tupleLength,
 			int length) {
-		assert (glanceBack.size() == tupleLength) : "Shouldn't happen: bad tuple size";
+		final WordStack result;
 		assert (allRules.getWord() == RuleTreeNode.ROOTMAGICTOKEN) : "Shoudn't happen: bad rule set";
 		WordSequence result;
-		try {
+		if ( debug) {
-			@SuppressWarnings("unchecked")
+			System.err.println( String.format( "%d: %s", length, glanceBack));
-			String here = currentRules.getWord((Stack<String>) glanceBack
+		}
 					.clone());
 			System.err.println(String.format("Trying token %s", here));
-			result = new WordSequence();
+		/* are we there yet? */
-			result.add(here);
+		if (length == 0) {
 			result = new WordStack(); 
 		} else {
 			/*
 			 * are there any rules in this ruleset which matches the current
 			 * sliding window? if so, then recurse; if not, then fail.
 			 */
 			Collection<String> words = rules.match(glanceBack.duplicate());
-			if (length != 0) {
+			if (words.isEmpty()) {
-				/* we're not done yet */
+				/* backtrack */
-				Collection<String> options = allRules.getSuccessors();
+				result = null;
 			} else {
 				result = tryOptions(words, glanceBack, rules, length);
 			}
 		}
 		return result;
 	}
-				for (String next : options) {
+	/**
-					@SuppressWarnings("unchecked")
+	 * Try each of these candidates in turn, attempting to recurse.
-					WordSequence rest = this
+	 * @param candidates words which could potentially be added to the output.
-							.tryOption((Stack<String>) glanceBack.clone(),
+	 * @param glanceBack the last few words output.
-									allRules, currentRules.getRule(next),
+	 * @param allRules the rule set we're working to.
-									tupleLength, length - 1);
+	 * @param length the number of tokens still to be output.
 	 * @return if a successful path forward is found, that path, else null.
 	 */
 	private WordStack tryOptions(Collection<String> candidates,
 			WordStack glanceBack, RuleTreeNode allRules, int length) {
 		WordStack result = null;
-					if (rest != null) {
+		for ( String candidate : candidates) {
-						/* we have a solution */
+			result = compose( new WordStack(glanceBack, candidate), allRules, length - 1);
-						result.addAll(rest);
+			if ( result != null) {
 				/* by Jove, I think she's got it! */
 				result.push(candidate);
 				break;
 			}
 		}
 			}
 		} catch (NoSuchPathException ex) {
 			if (debug) {
 				System.err.println(String.format("No path %s: Backtracking...",
 						glanceBack));
 			}
 			result = null;
 		}
 		return result;
 	}
 	/**
 	 * Try composing with this ruleset
 	 * 
 	 * @param glanceBack
 	 * @param allRules
 	 *            all the rules there are.
 	 * @param currentRules
 	 *            the current node in the rule tree.
 	 * @param tupleLength
 	 *            the size of the glanceback window we're considering.
 	 * @param length
 	 * @return
 	 */
 	private WordSequence tryOption(Stack<String> glanceBack,
 			RuleTreeNode allRules, RuleTreeNode currentRules, int tupleLength,
 			int length) {
 		final Stack<String> restack = this.restack(glanceBack,
 				currentRules.getWord());
 		restack.pop();
 		return this.compose(restack, allRules, currentRules, tupleLength,
 				length);
 	}
 	/**
 	 * Return a new stack comprising all the items on the current stack, with
 	 * this new string added at the bottom
 	 * 
 	 * @param stack
 	 *            the stack to restack.
 	 * @param bottom
 	 *            the item to place on the bottom.
 	 * @return the restacked stack.
 	 */
 	private Stack<String> restack(Stack<String> stack, String bottom) {
 		final Stack<String> result;
 		if (stack.isEmpty()) {
 			result = new Stack<String>();
 			result.push(bottom);
 		} else {
 			String top = stack.pop();
 			result = restack(stack, bottom);
 			result.push(top);
 		}
 		return result;
 	}
 	/**
 	 * Random walk of the rule tree to extract (from the root) a legal sequence
@ -157,12 +123,12 @@ public class Composer {
 	 *            the rule tree (fragment) to walk.
 	 * @return a sequence of words.
 	 */
-	private Stack<String> composePreamble(RuleTreeNode rules) {
+	private WordStack composePreamble(RuleTreeNode rules) {
-		final Stack<String> result;
+		final WordStack result;
 		final RuleTreeNode successor = rules.getRule();
 		if (successor == null) {
-			result = new Stack<String>();
+			result = new WordStack();
 		} else {
 			result = this.composePreamble(successor);
 			result.push(rules.getWord());
--- a/src/cc/journeyman/milkwood/Milkwood.java
+++ b/src/cc/journeyman/milkwood/Milkwood.java
@ -31,13 +31,15 @@ public class Milkwood {
 	 * <dl>
 	 * <dt>-d, -debug</dt>
 	 * <dd>Print debugging output to standard error</dd>
-	 * <dt>-i, -input</dt>
+	 * <dt>-i [FILE], -input [FILE]</dt>
 	 * <dd>Input file, expected to be an English (or, frankly, other natural
 	 * language) text. Defaults to standard in.</dd>
-	 * <dt>-n, -tuple-length</dt>
+	 * <dt>-l [NN], -length [NN]</dt>
-	 * <dd>The length of tuples into which the file will be analised, default 2.
+	 * <dd>The length in tuples of the desired output. Defaults to 100.
 	 * <dt>-n [NN], -tuple-length [NN]</dt>
 	 * <dd>The length of tuples into which the file will be analysed, default 2.
 	 * </dd>
-	 * <dt>-o, -output</dt>
+	 * <dt>-o [FILE], -output [FILE]</dt>
 	 * <dd>Output file, to which generated text will be written. Defaults to
 	 * standard out.</dd>
 	 * </dl>
@ -55,6 +57,7 @@ public class Milkwood {
 		OutputStream out = System.out;
 		int tupleLength = 2;
 		boolean debug = false;
 		int length = 100;
 		for (int cursor = 0; cursor < args.length; cursor++) {
 			String arg = args[cursor];
@ -71,6 +74,9 @@ public class Milkwood {
 				case 'o': // output
 					out = new FileOutputStream(new File(args[++cursor]));
 					break;
 				case 'l': // length
 					length = Integer.parseInt(args[++cursor]);
 					break;
 				case 'n':
 				case 't': // tuple length
 					tupleLength = Integer.parseInt(args[++cursor]);
@ -82,7 +88,7 @@ public class Milkwood {
 			}
 		}
 		try {
-			new Milkwood().readAndGenerate(in, out, tupleLength, debug);
+			new Milkwood().readAndGenerate(in, out, tupleLength, length, debug);
 		} finally {
 			out.close();
 		}
@ -97,6 +103,8 @@ public class Milkwood {
 	 *            the output stream to write to.
 	 * @param tupleLength
 	 *            the length of tuples to be used in generation.
 	 * @param length
 	 *            the length in tokens of the output to be generated.
 	 * @param debug
 	 *            whether to print debugging output.
 	 * @throws IOException
@ -104,14 +112,19 @@ public class Milkwood {
 	 *             scheme of things, very likely.
 	 */
 	void readAndGenerate(final InputStream in, final OutputStream out,
-			final int tupleLength, boolean debug) throws IOException {
+			final int tupleLength, int length, boolean debug)
 			throws IOException {
 		/* The root of the rule tree I shall build. */
 		RuleTreeNode root = new RuleTreeNode();
-		int length = read(in, tupleLength, debug, root);
+		read(in, tupleLength, debug, root);
 		WordSequence tokens = compose(tupleLength, debug, root, length);
 		write(out, debug, tokens);
 		if ( debug) {
 			System.err.println( "\n\nCompleted.");
 		}
 	}
 	/**
@ -142,8 +155,7 @@ public class Milkwood {
 	private WordSequence compose(final int tupleLength, boolean debug,
 			RuleTreeNode root, int length) {
-		WordSequence tokens = new Composer(debug).compose(root, tupleLength,
+		WordSequence tokens = new Composer(debug).compose(root, length);
 				length);
 		if (tokens.contains(PERIOD)) {
 			tokens = tokens.truncateAtLastInstance(PERIOD);
@ -168,7 +180,7 @@ public class Milkwood {
 			WordSequence tokens) throws IOException {
 		Writer scrivenor = new Writer(out, debug);
 		try {
-			scrivenor.generate(tokens);
+			scrivenor.writeSequence(tokens);
 		} finally {
 			scrivenor.close();
 		}
--- a/src/cc/journeyman/milkwood/RuleTreeNode.java
+++ b/src/cc/journeyman/milkwood/RuleTreeNode.java
@ -170,7 +170,7 @@ public class RuleTreeNode {
            final RuleTreeNode successor = this.getRule(path.pop());
            if (successor == null) {
-                throw new NoSuchPathException();
+                result = null;
            } else {
                result = successor.getWord(path);
            }
@ -178,4 +178,27 @@ public class RuleTreeNode {
        return result;
    }
    /**
     * Find all the terminal strings in the current rule set which would match this path.
     * @param path the path to match
     * @return a collection (possibly empty) of potential successors.
     */
 	public Collection<String> match(WordStack path) {
 		final Collection<String> result;
        if ( path.isEmpty()) {
            result = this.getSuccessors();
        } else {
            final RuleTreeNode successor = this.getRule(path.pop());
            if (successor == null) {
                result = new ArrayList<String>();
            } else {
                result = successor.match(path);
            }
        }
 		return result;
 	}
 }
--- a/src/cc/journeyman/milkwood/Tokeniser.java
+++ b/src/cc/journeyman/milkwood/Tokeniser.java
@ -36,6 +36,13 @@ public class Tokeniser extends StreamTokenizer {
 		 */
 		this.whitespaceChars((int) '\"', (int) '\"');
 		this.whitespaceChars((int) '\'', (int) '\'');
 		/*
 		 * treat underscore and hyphen as whitespace as well. Again, hyphen with
 		 * either leading or trailing non-whitespace probably ought to be
 		 * treated specially, but...
 		 */
 		this.whitespaceChars((int) '_', (int) '_');
 		this.whitespaceChars((int) '-', (int) '-');
 		this.wordChars((int) '0', (int) '9');
 		this.wordChars((int) 'A', (int) 'Z');
 		this.wordChars((int) 'a', (int) 'z');
--- a/src/cc/journeyman/milkwood/WordStack.java
+++ b/src/cc/journeyman/milkwood/WordStack.java
@ -0,0 +1,57 @@
 package cc.journeyman.milkwood;
 import java.util.Stack;
 /**
 * Sliding window which rules may match.
 * 
 * @author simon
 * 
 */
 public class WordStack extends Stack<String> {
 	private static final long serialVersionUID = 1L;
 	/**
 	 * Create a new, empty, wordstack.
 	 */
 	public WordStack() {
 		super();
 	}
 	/**
 	 * create a new window from this window, having this new word as its
 	 * terminal and ommitting the current first word. That is, the new window
 	 * should be as long as the old, with each word shuffled up one place.
 	 * 
 	 * @param prototype the window to copy from.
 	 * @param terminal the new terminal word.
 	 */
 	public WordStack(WordStack prototype, String terminal) {
 		this();
 		WordStack copy = prototype.duplicate();
 		copy.pop();
 		this.populate( copy, terminal);
 	}
 	private void populate(WordStack copy, String terminal) {
 		if ( copy.isEmpty()) {
 			this.push(terminal);
 		} else {
 			String token = copy.pop();
 			this.populate(copy, terminal);
 			this.push( token);
 		}
 	}
 	/**
 	 * A wrapper round clone which hides all the ugly casting.
 	 * 
 	 * @return a duplicate copy of myself.
 	 */
 	public WordStack duplicate() {
 		return (WordStack) this.clone();
 	}
 }
--- a/src/cc/journeyman/milkwood/Writer.java
+++ b/src/cc/journeyman/milkwood/Writer.java
@ -14,6 +14,8 @@ import java.util.Locale;
 import java.util.Random;
 /**
 * A special purpose writer to write sequences of tokens, chopping them up into
 * paragraphs on the fly..
 * 
 * @author Simon Brooke <simon@journeyman.cc>
 */
@ -59,7 +61,7 @@ class Writer extends BufferedWriter {
 	 * @throws IOException
 	 *             if it is impossible to write (e.g. file system full).
 	 */
-	public void generate(WordSequence tokens) throws IOException {
+	public void writeSequence(WordSequence tokens) throws IOException {
 		boolean capitaliseNext = true;
 		try {
@ -113,7 +115,11 @@ class Writer extends BufferedWriter {
 	private boolean spaceBefore(String token) {
 		final boolean result;
-		if (token.length() == 1) {
+		switch (token.length()) {
 		case 0:
 			result = false;
 			break;
 		case 1:
 			switch (token.charAt(0)) {
 			case '.':
 			case ',':
@ -135,8 +141,9 @@ class Writer extends BufferedWriter {
 				result = true;
 				break;
 			}
-		} else {
+			break;
-			result = false;
+		default:
 			result = true;
 		}
 		return result;
@ -155,7 +162,8 @@ class Writer extends BufferedWriter {
 	 *             if Mr this has run out of ink
 	 */
 	private void maybeParagraph(String token) throws IOException {
-		if (token.endsWith(Milkwood.PERIOD) && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
+		if (token.endsWith(Milkwood.PERIOD)
 				&& RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
 			this.write("\n\n");
 		}
 	}