Moved a lot of stuff out of TextGenerator mainly to declutter, so I can

think about what isn't working and why not a bit more cleanly.
2013-10-31 08:51:41 +00:00 · 2013-10-31 08:51:41 +00:00 · 61a8b7ad97
commit 61a8b7ad97
parent e59f160f70
5 changed files with 487 additions and 386 deletions
--- a/src/cc/journeyman/milkwood/Digester.java
+++ b/src/cc/journeyman/milkwood/Digester.java
@ -0,0 +1,60 @@
 /*
 * Proprietary unpublished source code property of 
 * Simon Brooke <simon@journeyman.cc>.
 * 
 * Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
 */
 package cc.journeyman.milkwood;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StreamTokenizer;
 import java.util.LinkedList;
 import java.util.Queue;
 /**
 * Read an input stream of text and digest it into a set of generation rules.
 * Separated out of TextGenerator mainly to declutter tht class.
 * 
 * @author simon
 * 
 */
 public class Digester {
 	/**
 	 * Read tokens from the input stream, and compile them into the rule tree
 	 * below this root.
 	 * 
 	 * @param in
 	 *            the input stream from which I read.
 	 * @param tupleLength
 	 *            the length of the tuples I read.
 	 * @param root
 	 *            the ruleset to which I shall add.
 	 * @return the number of tokens read.
 	 * @throws IOException if can't read from file system.
 	 */
 	protected int read(final InputStream in, final int tupleLength,
 			final RuleTreeNode root) throws IOException {
 		int result = 0;
 		final Queue<WordSequence> openTuples = new LinkedList<WordSequence>();
 		final Tokeniser tok = new Tokeniser(in);
 		for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok
 				.nextToken()) {
 			result++;
 			final WordSequence newTuple = new WordSequence();
 			String token = tok.readBareToken();
 			openTuples.add(newTuple);
 			for (WordSequence tuple : openTuples) {
 				tuple.add(token);
 			}
 			if (openTuples.size() > tupleLength) {
 				root.addSequence(openTuples.remove());
 			}
 		}
 		return result;
 	}
 }
--- a/src/cc/journeyman/milkwood/Milkwood.java
+++ b/src/cc/journeyman/milkwood/Milkwood.java
@ -15,60 +15,97 @@ import java.io.OutputStream;
 * Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
 */
 /**
 * 
 * @author Simon Brooke <simon@journeyman.cc>
 */
 public class Milkwood {
-    /**
+	/**
-     * Parse command line arguments and kick off the process. Expected 
+	 * Parse command line arguments and kick off the process. Expected arguments
-     * arguments include:
+	 * include:
-     * <dl>
+	 * <dl>
-     * <dt>-i, -input</dt>
+	 * <dt>-d, -debug</dt>
-     * <dd>Input file, expected to be an English (or, frankly, other natural
+	 * <dd>Print debugging output to standard error</dd>
-     * language) text. Defaults to standard in.</dd>
+	 * <dt>-i, -input</dt>
-     * <dt>-n, -tuple-length</dt>
+	 * <dd>Input file, expected to be an English (or, frankly, other natural
-     * <dd>The length of tuples into which the file will be analised, default 2.</dd>
+	 * language) text. Defaults to standard in.</dd>
-     * <dt>-o, -output</dt>
+	 * <dt>-n, -tuple-length</dt>
-     * <dd>Output file, to which generated text will be written. 
+	 * <dd>The length of tuples into which the file will be analised, default 2.
-     * Defaults to standard out.</dd>
+	 * </dd>
-     * </dl>
+	 * <dt>-o, -output</dt>
-     * 
+	 * <dd>Output file, to which generated text will be written. Defaults to
-     * @param args the command line arguments
+	 * standard out.</dd>
-     * @exception FileNotFoundException if the user specifies a file which 
+	 * </dl>
-     * isn't available.
+	 * 
-     * @excpetion IOException if could not read from input or write to output.
+	 * @param args
-     */
+	 *            the command line arguments
-    public static void main(String[] args) throws FileNotFoundException, IOException {
+	 * @exception FileNotFoundException
-        InputStream in = System.in;
+	 *                if the user specifies a file which isn't available.
-        OutputStream out = System.out;
+	 * @excpetion IOException if could not read from input or write to output.
-        int tupleLength = 2;
+	 */
 	public static void main(String[] args) throws FileNotFoundException,
 			IOException {
 		InputStream in = System.in;
 		OutputStream out = System.out;
 		int tupleLength = 2;
 		boolean debug = false;
-        for (int cursor = 0; cursor < args.length; cursor++) {
+		for (int cursor = 0; cursor < args.length; cursor++) {
-            String arg = args[cursor];
+			String arg = args[cursor];
-            if (arg.startsWith("-") && arg.length() > 1) {
+			if (arg.startsWith("-") && arg.length() > 1) {
-                switch (arg.charAt(1)) {
+				switch (arg.charAt(1)) {
-                    case 'i':
+				case 'd':
-                        // input
+					debug = true;
-                        in = new FileInputStream(new File(args[++cursor]));
+					break;
-                        break;
+				case 'i':
-                    case 'o': // output
+					// input
-                        out = new FileOutputStream(new File(args[++cursor]));
+					in = new FileInputStream(new File(args[++cursor]));
-                        break;
+					break;
-                    case 'n':
+				case 'o': // output
-                    case 't': // tuple length
+					out = new FileOutputStream(new File(args[++cursor]));
-                        tupleLength = Integer.parseInt(args[++cursor]);
+					break;
-                        break;
+				case 'n':
-                    default:
+				case 't': // tuple length
-                        throw new IllegalArgumentException(
+					tupleLength = Integer.parseInt(args[++cursor]);
-                                String.format("Unrecognised argument '%s'", arg));
+					break;
-                }
+				default:
-            }
+					throw new IllegalArgumentException(String.format(
-        }
+							"Unrecognised argument '%s'", arg));
 				}
 			}
 		}
 		new Milkwood().readAndGenerate(in, out, tupleLength, debug);
 	}
 	/**
 	 * Read tokens from this input and use them to generate text on this output.
 	 * 
 	 * @param in
 	 *            the input stream to read.
 	 * @param out
 	 *            the output stream to write to.
 	 * @param tupleLength
 	 *            the length of tuples to be used in generation.
 	 * @param debug
 	 *            whether to print debugging output.
 	 * @throws IOException
 	 *             if the file system buggers up, which is not, in the cosmic
 	 *             scheme of things, very likely.
 	 */
 	void readAndGenerate(final InputStream in, final OutputStream out,
 			final int tupleLength, boolean debug) throws IOException {
 		/* The root of the rule tree I shall build. */
 		RuleTreeNode root = new RuleTreeNode();
 		int length = new Digester().read(in, tupleLength, root);
 		if (debug) {
 			System.err.println(root.toString());
 		}
 		new TextGenerator().generate(out, tupleLength, root, length);
 	}
        new TextGenerator().readAndGenerate( in, out, tupleLength);
    }
 }
--- a/src/cc/journeyman/milkwood/RuleTreeNode.java
+++ b/src/cc/journeyman/milkwood/RuleTreeNode.java
@ -23,6 +23,10 @@ import java.util.Stack;
 * @author Simon Brooke <simon@journeyman.cc>
 */
 public class RuleTreeNode {
 	/**
 	 * The magic token which identifies the root node of a rule tree.
 	 */
 	public static final String ROOTMAGICTOKEN = "*ROOT*";
    /**
     * The line separator on this platform.
     */
@ -42,6 +46,13 @@ public class RuleTreeNode {
     */
    private Map<String,RuleTreeNode> rules = new HashMap<String,RuleTreeNode>();
    /**
     * If no argument passed, generate a root node.
     */
    public RuleTreeNode() {
    	this( RuleTreeNode.ROOTMAGICTOKEN);
    }
    /**
     * Create me wrapping this word.
     * @param word the word I represent.
--- a/src/cc/journeyman/milkwood/TextGenerator.java
+++ b/src/cc/journeyman/milkwood/TextGenerator.java
@ -6,228 +6,126 @@
 */
 package cc.journeyman.milkwood;
 import java.io.BufferedReader;
 import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
 import java.io.Reader;
 import java.io.StreamTokenizer;
 import java.util.Collection;
 import java.util.LinkedList;
 import java.util.Locale;
 import java.util.Queue;
 import java.util.Random;
 import java.util.Stack;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 /**
 * 
 * @author Simon Brooke <simon@journeyman.cc>
 */
 class TextGenerator {
 	/**
-	 * The magic token which identifies the root node of the
+	 * The magic token which is deemed to end sentences.
 	 * rule tree.
 	 */
-    private static final String ROOTMAGICTOKEN = "*ROOT*";
+	public static final String PERIOD = ".";
 	/**
-     * The special magic token which is deemed to end sentences.
+	 * The average number of sentences in a paragraph.
-     */
+	 */
-    public static final String PERIOD = ".";
+	public static final int AVSENTENCESPERPARA = 5;
 	/**
 	 * A random number generator.
 	 */
 	private static Random RANDOM = new Random();
 	/**
 	 * Dictionary of first-words we know about; each first-word maps onto a
 	 * tuple of tuples of word sequences beginning with that word, so 'I' might
 	 * map onto [[I, CAME, COMMA],[I, SAW, COMMA],[I CONQUERED COMMA]].
 	 */
 	TupleDictionary dictionary = new TupleDictionary();
-    /**
+	public TextGenerator() {
-     * The average number of sentences in a paragraph.
+	}
     */
    public static final int AVSENTENCESPERPARA = 5;
    /**
     * A random number generator.
     */
    private static Random RANDOM = new Random();
    /**
     * Dictionary of first-words we know about; each first-word maps 
     * onto a tuple of tuples of word sequences beginning with that 
     * word, so 'I' might map onto [[I, CAME, COMMA],[I, SAW, COMMA],[I CONQUERED COMMA]].
     */
    TupleDictionary dictionary = new TupleDictionary();
    public TextGenerator() {
    }
-    /**
+	public void generate(OutputStream out, int tupleLength, RuleTreeNode root,
-     * Read tokens from this input and use them to generate text on this output.
+			int length) throws IOException {
-     * @param in the input stream to read.
+		WordSequence tokens = this.compose(root, tupleLength, length);
     * @param out the output stream to write to.
     * @param tupleLength the length of tuples to be used in generation.
     * @throws IOException if the file system buggers up, which is not, in the
     * cosmic scheme of things, very likely.
     */
    void readAndGenerate(InputStream in, OutputStream out, int tupleLength) throws IOException {
    /* The root of the rule tree I shall build. */
    RuleTreeNode root = new RuleTreeNode( ROOTMAGICTOKEN);
        int length = read(in, tupleLength, root);
-        System.err.println( root.toString());
+		if (tokens.contains(PERIOD)) {
-        
+			// TODO: eq = equal?
-        generate( out, tupleLength, root, length);
+			tokens = this.truncateAtLastInstance(tokens, PERIOD);
    }
    /**
     * Read tokens from the input stream, and compile them into a ruleset below root.
     * @param in the input stream from which I read.
     * @param tupleLength the length of the tuples I read.
     * @param root the ruleset to which I shall add.
     * @return the number of tokens read.
     * @throws IOException 
     */
    private int read(InputStream in, int tupleLength, RuleTreeNode root) throws IOException {
        int result = 0;
        Queue<WordSequence> openTuples = new LinkedList<WordSequence>();
        StreamTokenizer tok = prepareTokenizer(in);
        for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok.nextToken()) {
            result ++;
            final WordSequence newTuple = new WordSequence();
            String token = readBareToken(tok, type);
            openTuples.add(newTuple);
            for ( WordSequence tuple : openTuples) {
                tuple.add(token);
            }
            if (openTuples.size() > tupleLength) {
                root.addSequence( openTuples.remove());
            }
        }
        return result;
    }
    /**
     * There surely must be a better way to get just the token out of a 
     * StreamTokenizer...!
     * @param tok the tokenizer.
     * @return just the next token.
     */
 	private String readBareToken(StreamTokenizer tok, int type) {
 		final String token;
 		switch (type) {
 		case StreamTokenizer.TT_EOL:
 			token = "FIXME"; // TODO: fix this!
 			break;
 		case StreamTokenizer.TT_NUMBER:
 			token = new Double(tok.nval).toString();
 			break;
 		case StreamTokenizer.TT_WORD:
 			token = tok.sval.toLowerCase();
 			break;
 		default:
 			StringBuffer buffy = new StringBuffer();
 			buffy.append((char) type);
 			token = buffy.toString();
 			break;
 		}
-		return token;
+
 		this.generate(out, tokens);
 	}
-    /**
+	/**
-     * Prepare a tokeniser on this input stream, set up to handle at least 
+	 * Write this sequence of tokens on this stream, sorting out minor issues of
-     * Western European natural language text.
+	 * orthography.
-     * @param in the stream.
+	 * 
-     * @return a suitable tokeniser.
+	 * @param out
-     */
+	 *            the stream.
-	private StreamTokenizer prepareTokenizer(InputStream in) {
+	 * @param tokens
-		Reader gentle = new BufferedReader(new InputStreamReader(in));
+	 *            the tokens.
-        StreamTokenizer tok = new StreamTokenizer(gentle);
+	 * @throws IOException
 	 *             if it is impossible to write (e.g. file system full).
 	 */
 	private void generate(OutputStream out, WordSequence tokens)
 			throws IOException {
 		BufferedWriter dickens = new BufferedWriter(new OutputStreamWriter(out));
 		boolean capitaliseNext = true;
-        tok.resetSyntax();
+		try {
-        tok.whitespaceChars(8, 15);
+			for (String token : tokens) {
-        tok.whitespaceChars(28, 32);
+				capitaliseNext = writeToken(dickens, capitaliseNext, token);
-        /* treat quotemarks as white space */
+			}
-        tok.whitespaceChars((int) '\"', (int) '\"');
+		} finally {
-        tok.whitespaceChars((int) '\'', (int) '\'');
+			dickens.flush();
-        tok.wordChars((int) '0', (int) '9');
+			dickens.close();
-        tok.wordChars((int) 'A', (int) 'Z');
+		}
        tok.wordChars((int) 'a', (int) 'z');
        tok.parseNumbers();
 		return tok;
 	}
-    private void generate(OutputStream out, int tupleLength, RuleTreeNode root, int length) throws IOException {
+	/**
-        WordSequence tokens = this.compose( root, tupleLength, length);
+	 * Deal with end of paragraph, capital after full stop, and other minor
-        
+	 * orthographic conventions.
-        if ( tokens.contains(PERIOD)) {
+	 * 
-            // TODO: eq = equal?
+	 * @param dickens
-            tokens = this.truncateAtLastInstance( tokens, PERIOD);
+	 *            the scrivenor who writes for us.
-        }
+	 * @param capitalise
-        
+	 *            whether or not the token should be capitalised
-        this.generate( out, tokens);
+	 * @param token
-    }
+	 *            the token to write;
-
+	 * @returnvtrue if the next token to be written should be capitalised.
-    /**
+	 * @throws IOException
-     * Write this sequence of tokens on this stream, sorting out minor 
+	 */
     * issues of orthography.
     * @param out the stream.
     * @param tokens the tokens.
     * @throws IOException if it is impossible to write (e.g. file system full).
     */
    private void generate(OutputStream out, WordSequence tokens) throws IOException {
    	BufferedWriter dickens = new BufferedWriter(new OutputStreamWriter(out));
    	boolean capitaliseNext = true;
        try {
            for (String token : tokens) {
                capitaliseNext = writeToken(dickens, capitaliseNext, token);
            }
        } finally {
        	dickens.flush();
        	dickens.close();
        }
    }
    /**
     * Deal with end of paragraph, capital after full stop, and other 
     * minor orthographic conventions.
     * @param dickens the scrivenor who writes for us.
     * @param capitalise whether or not the token should be capitalised
     * @param token the token to write;
     * @returnvtrue if the next token to be written should be capitalised.
     * @throws IOException
     */
 	private boolean writeToken(BufferedWriter dickens, boolean capitalise,
 			String token) throws IOException {
-		if ( this.spaceBefore(token)) {
+		if (this.spaceBefore(token)) {
-		    dickens.write( " ");
+			dickens.write(" ");
 		}
-		if ( capitalise) {
+		if (capitalise) {
-			dickens.write(token.substring(0, 1).toUpperCase(Locale.getDefault()));
+			dickens.write(token.substring(0, 1)
 					.toUpperCase(Locale.getDefault()));
 			dickens.write(token.substring(1));
 		} else {
 			dickens.write(token);
 		}
-		this.maybeParagraph( token, dickens);
+		this.maybeParagraph(token, dickens);
 		return (token.endsWith(PERIOD));
 	}
-    /**
+	/**
-     * Return false if token is punctuation, else true. Wouldn't it be 
+	 * Return false if token is punctuation, else true. Wouldn't it be nice if
-     * nice if Java provided Character.isPunctuation(char)? However, since it 
+	 * Java provided Character.isPunctuation(char)? However, since it doesn't, I
-     * doesn't, I can give this slightly special semantics: return true only if 
+	 * can give this slightly special semantics: return true only if this is
-     * this is punctuation which would not normally be preceded with a space.
+	 * punctuation which would not normally be preceded with a space.
-     * @param ch a character.
+	 * 
-     * @return true if the should be preceded by a space, else false.
+	 * @param ch
-     */
+	 *            a character.
-    private boolean spaceBefore(String token) {
+	 * @return true if the should be preceded by a space, else false.
-        final boolean result;
+	 */
 	private boolean spaceBefore(String token) {
 		final boolean result;
 		if (token.length() == 1) {
 			switch (token.charAt(0)) {
@ -241,9 +139,10 @@ class TextGenerator {
 				 * the apostrophe lost
 				 */
 			case 't':
-					/* similar; probably 'doesn't' or 'shouldn't' or other cases
+				/*
-					 * of 'not' with an elided 'o'.
+				 * similar; probably 'doesn't' or 'shouldn't' or other cases of
-					 */
+				 * 'not' with an elided 'o'.
 				 */
 				result = false;
 				break;
 			default:
@ -254,106 +153,119 @@ class TextGenerator {
 			result = false;
 		}
-        return result;
+		return result;
-    }
+	}
-    /**
+	/**
-     * If this token is an end-of-sentence token, then, on one chance in 
+	 * If this token is an end-of-sentence token, then, on one chance in some,
-     * some, have the writer write two new lines. NOTE: The tokeniser is treating
+	 * have the writer write two new lines. NOTE: The tokeniser is treating
-     * PERIOD ('.') as a word character, even though it has not been told to. 
+	 * PERIOD ('.') as a word character, even though it has not been told to.
-     * Token.endsWith( PERIOD) is a hack to get round this problem.
+	 * Token.endsWith( PERIOD) is a hack to get round this problem. TODO:
-     * TODO: investigate and fix.
+	 * investigate and fix.
-     * 
+	 * 
-     * @param token a token
+	 * @param token
-     * @param dickens our scrivenor
+	 *            a token
-     * @throws IOException if Mr Dickens has run out of ink
+	 * @param dickens
-     */
+	 *            our scrivenor
-    private void maybeParagraph(String token, BufferedWriter dickens) throws IOException {
+	 * @throws IOException
-        if ( token.endsWith(PERIOD) && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
+	 *             if Mr Dickens has run out of ink
-            dickens.write("\n\n");
+	 */
-        }
+	private void maybeParagraph(String token, BufferedWriter dickens)
-    }
+			throws IOException {
 		if (token.endsWith(PERIOD) && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
 			dickens.write("\n\n");
 		}
 	}
-    /**
+	/**
-     * Recursive, backtracking, output generator.
+	 * Recursive, backtracking, output generator.
-     * @param rules
+	 * 
-     * @param tupleLength
+	 * @param rules
-     * @param length
+	 * @param tupleLength
-     * @return 
+	 * @param length
-     */
+	 * @return
-    private WordSequence compose(RuleTreeNode rules, int tupleLength, int length) {
+	 */
-        Stack<String> preamble = composePreamble( rules);
+	private WordSequence compose(RuleTreeNode rules, int tupleLength, int length) {
-        WordSequence result = new WordSequence();
+		Stack<String> preamble = composePreamble(rules);
 		WordSequence result = new WordSequence();
-        // composing the preamble will have ended with *ROOT* on top of the stack;
+		// composing the preamble will have ended with *ROOT* on top of the
-        // get rid of it.
+		// stack;
-        preamble.pop();
+		// get rid of it.
 		preamble.pop();
-        result.addAll(preamble);
+		result.addAll(preamble);
-        result.addAll(this.compose( preamble, rules, rules, tupleLength, length));
+		result.addAll(this.compose(preamble, rules, rules, tupleLength, length));
-        return result;
+		return result;
-    }
+	}
-    /**
+	/**
-     * Recursively attempt to find sequences in the ruleset to append to 
+	 * Recursively attempt to find sequences in the ruleset to append to what's
-     * what's been composed so far.
+	 * been composed so far.
-     * @param glanceBack
+	 * 
-     * @param allRules
+	 * @param glanceBack
-     * @param currentRules
+	 * @param allRules
-     * @param tupleLength
+	 * @param currentRules
-     * @param length
+	 * @param tupleLength
-     * @return 
+	 * @param length
-     */
+	 * @return
 	 */
 	private WordSequence compose(Stack<String> glanceBack,
 			RuleTreeNode allRules, RuleTreeNode currentRules, int tupleLength,
 			int length) {
-        assert (glanceBack.size() == tupleLength) : "Shouldn't happen: bad tuple size";
+		assert (glanceBack.size() == tupleLength) : "Shouldn't happen: bad tuple size";
-        assert (allRules.getWord() == ROOTMAGICTOKEN) : "Shoudn't happen: bad rule set";
+		assert (allRules.getWord() == RuleTreeNode.ROOTMAGICTOKEN) : "Shoudn't happen: bad rule set";
-        WordSequence result;
+		WordSequence result;
-        try {
+		try {
-            @SuppressWarnings("unchecked")
+			@SuppressWarnings("unchecked")
-			String here = currentRules.getWord((Stack<String>) glanceBack.clone());
+			String here = currentRules.getWord((Stack<String>) glanceBack
-            System.err.println( String.format( "Trying token %s", here));
+					.clone());
 			System.err.println(String.format("Trying token %s", here));
-            result = new WordSequence();
+			result = new WordSequence();
-            result.add(here);
+			result.add(here);
-            if (length != 0) {
+			if (length != 0) {
-                /* we're not done yet */
+				/* we're not done yet */
-                Collection<String> options = allRules.getSuccessors();
+				Collection<String> options = allRules.getSuccessors();
-                for (String next : options) {
+				for (String next : options) {
-                    WordSequence rest =
+					@SuppressWarnings("unchecked")
-                            this.tryOption( (Stack<String>) glanceBack.clone(), allRules,
+					WordSequence rest = this
-                            currentRules.getRule(next), tupleLength, length - 1);
+							.tryOption((Stack<String>) glanceBack.clone(),
 									allRules, currentRules.getRule(next),
 									tupleLength, length - 1);
-                    if (rest != null) {
+					if (rest != null) {
-                        /* we have a solution */
+						/* we have a solution */
-                        result.addAll(rest);
+						result.addAll(rest);
-                        break;
+						break;
-                    }
+					}
-                }
+				}
-            }
+			}
-        } catch (NoSuchPathException ex) {
+		} catch (NoSuchPathException ex) {
-            Logger.getLogger(TextGenerator.class.getName()).log(Level.WARNING,
+			System.err.println( String.format("No path %s: Backtracking...", glanceBack));
-                    String.format("No path %s: Backtracking...", glanceBack));
+			result = null;
-            result = null;
+		}
        }
-        return result;
+		return result;
-    }
+	}
-    /**
+	/**
-     * Try composing with this ruleset 
+	 * Try composing with this ruleset
-     * @param glanceBack
+	 * 
-     * @param allRules all the rules there are.
+	 * @param glanceBack
-     * @param currentRules the current node in the rule tree.
+	 * @param allRules
-     * @param tupleLength the size of the glanceback window we're considering.
+	 *            all the rules there are.
-     * @param length
+	 * @param currentRules
-     * @return 
+	 *            the current node in the rule tree.
-     */
+	 * @param tupleLength
 	 *            the size of the glanceback window we're considering.
 	 * @param length
 	 * @return
 	 */
 	private WordSequence tryOption(Stack<String> glanceBack,
 			RuleTreeNode allRules, RuleTreeNode currentRules, int tupleLength,
 			int length) {
@ -364,69 +276,76 @@ class TextGenerator {
 				length);
 	}
-    /**
+	/**
-     * Return a new stack comprising all the items on the current stack, 
+	 * Return a new stack comprising all the items on the current stack, with
-     * with this new string added at the bottom
+	 * this new string added at the bottom
-     *
+	 * 
-     * @param stack the stack to restack.
+	 * @param stack
-     * @param bottom the item to place on the bottom.
+	 *            the stack to restack.
-     * @return the restacked stack.
+	 * @param bottom
-     */
+	 *            the item to place on the bottom.
-    private Stack<String> restack(Stack<String> stack, String bottom) {
+	 * @return the restacked stack.
-        final Stack<String> result;
+	 */
-        if (stack.isEmpty()) {
+	private Stack<String> restack(Stack<String> stack, String bottom) {
-            result = new Stack<String>();
+		final Stack<String> result;
-            result.push(bottom);
+		if (stack.isEmpty()) {
-        } else {
+			result = new Stack<String>();
-            String top = stack.pop();
+			result.push(bottom);
-            result = restack(stack, bottom);
+		} else {
-            result.push(top);
+			String top = stack.pop();
-        }
+			result = restack(stack, bottom);
-        return result;
+			result.push(top);
-    }
+		}
 		return result;
 	}
 	/**
 	 * Random walk of the rule tree to extract (from the root) a legal sequence
 	 * of words the length of our tuple.
 	 * 
 	 * @param rules
 	 *            the rule tree (fragment) to walk.
 	 * @return a sequence of words.
 	 */
 	private Stack<String> composePreamble(RuleTreeNode rules) {
 		final Stack<String> result;
 		final RuleTreeNode successor = rules.getRule();
-    /** 
+		if (successor == null) {
-     * Random walk of the rule tree to extract (from the root) a legal sequence of words the length of our tuple.
+			result = new Stack<String>();
-     * 
+		} else {
-     * @param rules the rule tree (fragment) to walk.
+			result = this.composePreamble(successor);
-     * @return a sequence of words.
+			result.push(rules.getWord());
-     */
+		}
-    private Stack<String> composePreamble(RuleTreeNode rules) {
+		return result;
-        final Stack<String> result;
+	}
        final RuleTreeNode successor = rules.getRule();
-        if (successor == null) {
+	/**
-            result = new Stack<String>();
+	 * 
-        } else {
+	 * @param tokens
-            result = this.composePreamble(successor);
+	 *            a sequence of tokens
-            result.push(rules.getWord());
+	 * @param marker
-        }
+	 *            a marker to terminate after the last occurrance of.
-        return result;
+	 * @return a copy of tokens, truncated at the last occurrance of the marker.
-    }
+	 */
 	private WordSequence truncateAtLastInstance(WordSequence tokens,
 			String marker) {
 		final WordSequence result = new WordSequence();
-    /**
+		if (!tokens.isEmpty()) {
     * 
     * @param tokens a sequence of tokens
     * @param marker a marker to terminate after the last occurrance of.
     * @return a copy of tokens, truncated at the last occurrance of the marker.
     */
    private WordSequence truncateAtLastInstance(WordSequence tokens, 
            String marker) {
        final WordSequence result = new WordSequence();
-        if (!tokens.isEmpty()) {
+			String token = tokens.remove();
 			result.add(token);
 			if (!(marker.equals(token) && !tokens.contains(marker))) {
 				/*
 				 * woah, double negatives. If the token we're looking at is the
 				 * marker, and the remainder of the tokens does not include the
 				 * marker, we're done. Otherwise, we continue. OK?
 				 */
 				result.addAll(this.truncateAtLastInstance(tokens, marker));
 			}
 		}
-            String token = tokens.remove();
+		return result;
-            result.add(token);
+	}
            if (!(marker.equals(token) && !tokens.contains(marker))) {
                /* woah, double negatives. If the token we're looking at is the
                 * marker, and the remainder of the tokens does not include the 
                 * marker, we're done. Otherwise, we continue. OK? */
                result.addAll(this.truncateAtLastInstance(tokens, marker));
            }
        }
        return result;
    }
 }
--- a/src/cc/journeyman/milkwood/Tokeniser.java
+++ b/src/cc/journeyman/milkwood/Tokeniser.java
@ -0,0 +1,74 @@
 /*
 * Proprietary unpublished source code property of 
 * Simon Brooke <simon@journeyman.cc>.
 * 
 * Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
 */
 package cc.journeyman.milkwood;
 import java.io.BufferedReader;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StreamTokenizer;
 /**
 * A tokeniser which reads tokens in a manner which suits me. Although this
 * implementation is based on a StreamTokenizer, the point of separating this
 * out into its own class is that if I had more time I could reimplement.
 * 
 * @author simon
 * 
 */
 public class Tokeniser extends StreamTokenizer {
 	public Tokeniser(Reader r) {
 		super(r);
 		this.resetSyntax();
 		this.whitespaceChars(8, 15);
 		this.whitespaceChars(28, 32);
 		/*
 		 * treat quotemarks as white space. Actually it would be better if quote
 		 * marks were white space only if preceded or followed by whitespace, so
 		 * that, e.g., 'don't' and 'can't' appeared as single tokens. But that
 		 * means really reimplementing the parser and I don't have time.
 		 */
 		this.whitespaceChars((int) '\"', (int) '\"');
 		this.whitespaceChars((int) '\'', (int) '\'');
 		this.wordChars((int) '0', (int) '9');
 		this.wordChars((int) 'A', (int) 'Z');
 		this.wordChars((int) 'a', (int) 'z');
 	}
 	public Tokeniser(InputStream in) {
 		this(new BufferedReader(new InputStreamReader(in)));
 	}
 	/**
 	 * There surely must be a better way to get just the token out of a
 	 * StreamTokenizer...!
 	 */
 	public String readBareToken() {
 		final String token;
 		switch (this.ttype) {
 		case StreamTokenizer.TT_EOL:
 			token = "FIXME"; // TODO: fix this!
 			break;
 		case StreamTokenizer.TT_NUMBER:
 			token = new Double(this.nval).toString();
 			break;
 		case StreamTokenizer.TT_WORD:
 			token = this.sval.toLowerCase();
 			break;
 		default:
 			StringBuffer buffy = new StringBuffer();
 			buffy.append((char) this.ttype);
 			token = buffy.toString();
 			break;
 		}
 		return token;
 	}
 }