From 27efd210899f581f2b7cff1dd15ce578e610f6db Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 31 Oct 2013 13:38:04 +0000 Subject: [PATCH] General tidy up, removal of dead code, and bring back to Java 7 level. --- .gitignore | 2 + nbproject/build-impl.xml | 77 ++--- nbproject/genfiles.properties | 6 +- nbproject/private/private.properties | 4 + nbproject/project.properties | 11 +- nbproject/project.xml | 1 - src/cc/journeyman/milkwood/Composer.java | 231 +++++++------ src/cc/journeyman/milkwood/Digester.java | 68 ++-- src/cc/journeyman/milkwood/Milkwood.java | 306 +++++++++--------- .../milkwood/NoSuchPathException.java | 17 - src/cc/journeyman/milkwood/RuleTreeNode.java | 173 +++++----- src/cc/journeyman/milkwood/Tokeniser.java | 111 +++---- .../journeyman/milkwood/TupleDictionary.java | 60 ---- src/cc/journeyman/milkwood/Window.java | 56 ++++ src/cc/journeyman/milkwood/WordSequence.java | 90 +++--- src/cc/journeyman/milkwood/WordStack.java | 57 ---- src/cc/journeyman/milkwood/Writer.java | 261 +++++++-------- 17 files changed, 699 insertions(+), 832 deletions(-) create mode 100644 .gitignore delete mode 100644 src/cc/journeyman/milkwood/NoSuchPathException.java delete mode 100644 src/cc/journeyman/milkwood/TupleDictionary.java create mode 100644 src/cc/journeyman/milkwood/Window.java delete mode 100644 src/cc/journeyman/milkwood/WordStack.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..95832e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/build/ +/dist/ \ No newline at end of file diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml index 86591fe..3be18f2 100644 --- a/nbproject/build-impl.xml +++ b/nbproject/build-impl.xml @@ -54,43 +54,6 @@ is divided into following sections: - - - - - - - - - - - - - - - - - - - - - - - - - - Must set platform.home - Must set platform.bootcp - Must set platform.java - Must set platform.javac - - The J2SE Platform is not correctly set up. - Your active platform is: ${platform.active}, but the corresponding property "platforms.${platform.active}.home" is not found in the project's properties files. - Either open the project in the IDE and setup the Platform with the same name or add it manually. - For example like this: - ant -Duser.properties.file=<path_to_property_file> jar (where you put the property "platforms.${platform.active}.home" in a .properties file) - or ant -Dplatforms.${platform.active}.home=<path_to_JDK_home> jar (where no properties file is used) - @@ -225,6 +188,15 @@ is divided into following sections: + + + + + + + + + @@ -293,7 +265,7 @@ is divided into following sections: - + @@ -332,7 +304,7 @@ is divided into following sections: - + @@ -412,7 +384,7 @@ is divided into following sections: - + @@ -435,7 +407,7 @@ is divided into following sections: - + @@ -474,7 +446,7 @@ is divided into following sections: - + @@ -554,7 +526,7 @@ is divided into following sections: - + @@ -579,7 +551,7 @@ is divided into following sections: - + @@ -759,9 +731,6 @@ is divided into following sections: - - - @@ -777,9 +746,7 @@ is divided into following sections: - - - + @@ -804,7 +771,7 @@ is divided into following sections: - + @@ -831,7 +798,7 @@ is divided into following sections: - + @@ -1018,7 +985,7 @@ is divided into following sections: - ${platform.java} -cp "${run.classpath.with.dist.jar}" ${main.class} + java -cp "${run.classpath.with.dist.jar}" ${main.class} @@ -1045,7 +1012,7 @@ is divided into following sections: To run this application from the command line without Ant, try: - ${platform.java} -jar "${dist.jar.resolved}" + java -jar "${dist.jar.resolved}" @@ -1236,7 +1203,7 @@ is divided into following sections: - + diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties index 8621aae..fad583e 100644 --- a/nbproject/genfiles.properties +++ b/nbproject/genfiles.properties @@ -1,8 +1,8 @@ -build.xml.data.CRC32=d35b316e +build.xml.data.CRC32=31018a52 build.xml.script.CRC32=cd5c02b3 build.xml.stylesheet.CRC32=28e38971@1.56.1.46 # This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. # Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. -nbproject/build-impl.xml.data.CRC32=d35b316e -nbproject/build-impl.xml.script.CRC32=0441a68e +nbproject/build-impl.xml.data.CRC32=31018a52 +nbproject/build-impl.xml.script.CRC32=fe6e4d15 nbproject/build-impl.xml.stylesheet.CRC32=c6d2a60f@1.56.1.46 diff --git a/nbproject/private/private.properties b/nbproject/private/private.properties index f8e12c4..0b27e09 100644 --- a/nbproject/private/private.properties +++ b/nbproject/private/private.properties @@ -1,2 +1,6 @@ compile.on.save=true +do.depend=false +do.jar=true +javac.debug=true +javadoc.preview=true user.properties.file=/home/simon/.jmonkeyplatform/3.0/build.properties diff --git a/nbproject/project.properties b/nbproject/project.properties index 27b980f..5973007 100644 --- a/nbproject/project.properties +++ b/nbproject/project.properties @@ -1,9 +1,10 @@ annotation.processing.enabled=true annotation.processing.enabled.in.editor=false -annotation.processing.processor.options= annotation.processing.processors.list= annotation.processing.run.all.processors=true annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output +application.title=milkwood +application.vendor=simon build.classes.dir=${build.dir}/classes build.classes.excludes=**/*.java,**/*.form # This directory is removed when the project is cleaned: @@ -24,6 +25,7 @@ debug.test.classpath=\ dist.dir=dist dist.jar=${dist.dir}/milkwood.jar dist.javadoc.dir=${dist.dir}/javadoc +endorsed.classpath= excludes= includes=** jar.compress=false @@ -33,8 +35,8 @@ javac.compilerargs= javac.deprecation=false javac.processorpath=\ ${javac.classpath} -javac.source=1.6 -javac.target=1.6 +javac.source=1.7 +javac.target=1.7 javac.test.classpath=\ ${javac.classpath}:\ ${build.classes.dir} @@ -55,7 +57,8 @@ main.class=cc.journeyman.milkwood.Milkwood manifest.file=manifest.mf meta.inf.dir=${src.dir}/META-INF mkdist.disabled=false -platform.active=JDK_1.6 +obfuscate.options=-keep public class * extends com.jme3.app.Application{public *;}\n-keep public class * extends com.jme3.system.JmeSystemDelegate{public *;}\n-keep public class * implements com.jme3.renderer.Renderer{public *;}\n-keep public class * implements com.jme3.asset.AssetLoader{public *;}\n-keep public class * implements com.jme3.asset.AssetLocator{public *;}\n-keep public class * implements de.lessvoid.nifty.screen.ScreenController{public *;}\n-dontwarn\n-dontnote\n +platform.active=default_platform platforms.JDK_1.6.home=/usr/lib/jvm/java-6-openjdk-amd64/ run.classpath=\ ${javac.classpath}:\ diff --git a/nbproject/project.xml b/nbproject/project.xml index a6aa914..c684742 100644 --- a/nbproject/project.xml +++ b/nbproject/project.xml @@ -4,7 +4,6 @@ milkwood - diff --git a/src/cc/journeyman/milkwood/Composer.java b/src/cc/journeyman/milkwood/Composer.java index c69f863..562ea35 100644 --- a/src/cc/journeyman/milkwood/Composer.java +++ b/src/cc/journeyman/milkwood/Composer.java @@ -1,139 +1,134 @@ package cc.journeyman.milkwood; import java.util.Collection; -import java.util.Collections; /** * Composes text output based on a rule tree. - * + * * @author simon - * + * */ public class Composer { - /** - * Whether or not I am in debugging mode. - */ - private final boolean debug; - /** - * - * @param debug - * Whether or not I am in debugging mode. - */ - public Composer(boolean debug) { - this.debug = debug; - } + /** + * Whether or not I am in debugging mode. + */ + private final boolean debug; - /** - * Recursive, backtracking, output generator. - * - * @param rules the rule set we're working to. - * @param length the number of tokens still to be output. - * @return if a successful path forward is found, that path, else null. - */ - public WordSequence compose(RuleTreeNode rules, int length) { - WordStack preamble = composePreamble(rules); - WordSequence result = new WordSequence(); + /** + * + * @param debug Whether or not I am in debugging mode. + */ + public Composer(boolean debug) { + this.debug = debug; + } - // composing the preamble will have ended with *ROOT* on top of the - // stack; - // get rid of it. - preamble.pop(); - - if (debug) { - System.err.println( "Preamble: " + preamble); - } + /** + * Recursive, backtracking, output generator. + * + * @param rules the rule set we're working to. + * @param length the number of tokens still to be output. + * @return if a successful path forward is found, that path, else null. + */ + protected WordSequence compose(RuleTreeNode rules, int length) { + Window preamble = composePreamble(rules); + WordSequence result = new WordSequence(); - result.addAll(preamble); - - WordStack body = this.compose(preamble, rules, length); - Collections.reverse(body); - result.addAll(body); - - return result; - } + // composing the preamble will have ended with *ROOT* on top of the + // stack; + // get rid of it. + preamble.pop(); - /** - * Recursively attempt to find sequences in the ruleset to append to what's - * been composed so far. - * - * @param glanceBack the last few words output. - * @param rules the rule set we're working to. - * @param length the number of tokens still to be output. - * @return if a successful path forward is found, that path, else null. - */ - private WordStack compose(WordStack glanceBack, RuleTreeNode rules, - int length) { - final WordStack result; - - if ( debug) { - System.err.println( String.format( "%d: %s", length, glanceBack)); - } + if (debug) { + System.err.println("Preamble: " + preamble); + } - /* are we there yet? */ - if (length == 0) { - result = new WordStack(); - } else { - /* - * are there any rules in this ruleset which matches the current - * sliding window? if so, then recurse; if not, then fail. - */ - Collection words = rules.match(glanceBack.duplicate()); + result.addAll(preamble); - if (words.isEmpty()) { - /* backtrack */ - result = null; - } else { - result = tryOptions(words, glanceBack, rules, length); - } - } - return result; - } - - /** - * Try each of these candidates in turn, attempting to recurse. - * @param candidates words which could potentially be added to the output. - * @param glanceBack the last few words output. - * @param allRules the rule set we're working to. - * @param length the number of tokens still to be output. - * @return if a successful path forward is found, that path, else null. - */ - private WordStack tryOptions(Collection candidates, - WordStack glanceBack, RuleTreeNode allRules, int length) { - WordStack result = null; - - for ( String candidate : candidates) { - result = compose( new WordStack(glanceBack, candidate), allRules, length - 1); - if ( result != null) { - /* by Jove, I think she's got it! */ - result.push(candidate); - break; - } - } - - return result; - } + result.addAll(this.compose(preamble, rules, length)); + return result; + } - /** - * Random walk of the rule tree to extract (from the root) a legal sequence - * of words the length of our tuple. - * - * @param rules - * the rule tree (fragment) to walk. - * @return a sequence of words. - */ - private WordStack composePreamble(RuleTreeNode rules) { - final WordStack result; - final RuleTreeNode successor = rules.getRule(); + /** + * Recursively attempt to find sequences in the ruleset to append to what's + * been composed so far. + * + * @param glanceBack the last few words output. + * @param rules the rule set we're working to. + * @param length the number of tokens still to be output. + * @return if a successful path forward is found, that path, else null. + */ + private WordSequence compose(Window glanceBack, RuleTreeNode rules, + int length) { + final WordSequence result; - if (successor == null) { - result = new WordStack(); - } else { - result = this.composePreamble(successor); - result.push(rules.getWord()); - } - return result; - } + if (debug) { + System.err.println(String.format("%d: %s", length, glanceBack)); + } + /* are we there yet? */ + if (length == 0) { + result = new WordSequence(); + } else { + /* + * are there any rules in this ruleset which matches the current + * sliding window? if so, then recurse; if not, then fail. + */ + Collection words = rules.match(glanceBack.duplicate()); + + if (words.isEmpty()) { + /* backtrack */ + result = null; + } else { + result = tryOptions(words, glanceBack, rules, length); + } + } + return result; + } + + /** + * Try each of these candidates in turn, attempting to recurse. + * + * @param candidates words which could potentially be added to the output. + * @param glanceBack the last few words output. + * @param allRules the rule set we're working to. + * @param length the number of tokens still to be output. + * @return if a successful path forward is found, that path, else null. + */ + private WordSequence tryOptions(Collection candidates, + Window glanceBack, RuleTreeNode allRules, int length) { + WordSequence result = null; + + for (String candidate : candidates) { + result = compose(new Window(glanceBack, candidate), allRules, length - 1); + if (result != null) { + /* by Jove, I think she's got it! */ + result.push(candidate); + break; + } + } + + return result; + } + + /** + * Random walk of the rule tree to extract (from the root) a legal sequence + * of words the length of our tuple. + * + * @param rules the rule tree (fragment) to walk. + * @return a sequence of words. + */ + private Window composePreamble(RuleTreeNode rules) { + final Window result; + final RuleTreeNode successor = rules.getRule(); + + if (successor == null) { + result = new Window(); + } else { + result = this.composePreamble(successor); + result.push(rules.getWord()); + } + return result; + } } diff --git a/src/cc/journeyman/milkwood/Digester.java b/src/cc/journeyman/milkwood/Digester.java index 8b2facb..973cc32 100644 --- a/src/cc/journeyman/milkwood/Digester.java +++ b/src/cc/journeyman/milkwood/Digester.java @@ -15,46 +15,44 @@ import java.util.Queue; /** * Read an input stream of text and digest it into a set of generation rules. * Separated out of TextGenerator mainly to declutter tht class. - * + * * @author simon - * + * */ public class Digester { - /** - * Read tokens from the input stream, and compile them into the rule tree - * below this root. - * - * @param in - * the input stream from which I read. - * @param tupleLength - * the length of the tuples I read. - * @param root - * the ruleset to which I shall add. - * @return the number of tokens read. - * @throws IOException if can't read from file system. - */ - protected int read(final InputStream in, final int tupleLength, - final RuleTreeNode root) throws IOException { - int result = 0; - final Queue openTuples = new LinkedList(); - final Tokeniser tok = new Tokeniser(in); - for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok - .nextToken()) { - result++; - final WordSequence newTuple = new WordSequence(); - String token = tok.readBareToken(); + /** + * Read tokens from the input stream, and compile them into the rule tree + * below this root. + * + * @param in the input stream from which I read. + * @param tupleLength the length of the tuples I read. + * @param root the ruleset to which I shall add. + * @return the number of tokens read. + * @throws IOException if can't read from file system. + */ + protected int digest(final InputStream in, final int tupleLength, + final RuleTreeNode root) throws IOException { + int result = 0; + final Queue openTuples = new LinkedList<>(); + final Tokeniser tok = new Tokeniser(in); - openTuples.add(newTuple); - for (WordSequence tuple : openTuples) { - tuple.add(token); - } + for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok + .nextToken()) { + result++; + final WordSequence newTuple = new WordSequence(); + String token = tok.readBareToken(); - if (openTuples.size() > tupleLength) { - root.addSequence(openTuples.remove()); - } - } + openTuples.add(newTuple); + for (WordSequence tuple : openTuples) { + tuple.add(token); + } - return result; - } + if (openTuples.size() > tupleLength) { + root.addSequence(openTuples.remove()); + } + } + + return result; + } } diff --git a/src/cc/journeyman/milkwood/Milkwood.java b/src/cc/journeyman/milkwood/Milkwood.java index 19011b7..8b0a71d 100644 --- a/src/cc/journeyman/milkwood/Milkwood.java +++ b/src/cc/journeyman/milkwood/Milkwood.java @@ -1,3 +1,9 @@ +/* + * Proprietary unpublished source code property of + * Simon Brooke . + * + * Copyright (c) 2013 Simon Brooke + */ package cc.journeyman.milkwood; import java.io.File; @@ -8,182 +14,158 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -/* - * Proprietary unpublished source code property of - * Simon Brooke . - * - * Copyright (c) 2013 Simon Brooke - */ - /** - * + * Text mangler based on + * http://codekata.pragprog.com/2007/01/kata_fourteen_t.html + * * @author Simon Brooke */ public class Milkwood { - /** - * The magic token which is deemed to end sentences. - */ - public static final String PERIOD = "."; - /** - * Parse command line arguments and kick off the process. Expected arguments - * include: - *
- *
-d, -debug
- *
Print debugging output to standard error
- *
-i [FILE], -input [FILE]
- *
Input file, expected to be an English (or, frankly, other natural - * language) text. Defaults to standard in.
- *
-l [NN], -length [NN]
- *
The length in tuples of the desired output. Defaults to 100. - *
-n [NN], -tuple-length [NN]
- *
The length of tuples into which the file will be analysed, default 2. - *
- *
-o [FILE], -output [FILE]
- *
Output file, to which generated text will be written. Defaults to - * standard out.
- *
- * - * @param args - * the command line arguments - * @exception FileNotFoundException - * if the user specifies a file which isn't available. - * @excpetion IOException if could not read from input or write to output. - */ - public static void main(String[] args) throws FileNotFoundException, - IOException { - /* defaults */ - InputStream in = System.in; - OutputStream out = System.out; - int tupleLength = 2; - boolean debug = false; - int length = 100; + /** + * The magic token which is deemed to end sentences. + */ + public static final String PERIOD = "."; - for (int cursor = 0; cursor < args.length; cursor++) { - String arg = args[cursor]; + /** + * Parse command line arguments and kick off the process. Expected arguments + * include: + *
+ *
-d, -debug
+ *
Print debugging output to standard error
+ *
-i [FILE], -input [FILE]
+ *
Input file, expected to be an English (or, frankly, other natural + * language) text. Defaults to standard in.
+ *
-l [NN], -length [NN]
+ *
The length in tuples of the desired output. Defaults to 100. + *
-n [NN], -tuple-length [NN]
+ *
The length of tuples into which the file will be analysed, default 2. + *
+ *
-o [FILE], -output [FILE]
+ *
Output file, to which generated text will be written. Defaults to + * standard out.
+ *
+ * + * @param args the command line arguments + * @exception FileNotFoundException if the user specifies a file which isn't + * available. + * @excpetion IOException if could not read from input or write to output. + */ + public static void main(String[] args) throws FileNotFoundException, + IOException { + /* defaults */ + InputStream in = System.in; + OutputStream out = System.out; + int tupleLength = 2; + boolean debug = false; + int length = 100; - if (arg.startsWith("-") && arg.length() > 1) { - switch (arg.charAt(1)) { - case 'd': - debug = true; - break; - case 'i': - // input - in = new FileInputStream(new File(args[++cursor])); - break; - case 'o': // output - out = new FileOutputStream(new File(args[++cursor])); - break; - case 'l': // length - length = Integer.parseInt(args[++cursor]); - break; - case 'n': - case 't': // tuple length - tupleLength = Integer.parseInt(args[++cursor]); - break; - default: - throw new IllegalArgumentException(String.format( - "Unrecognised argument '%s'", arg)); - } - } - } - try { - new Milkwood().readAndGenerate(in, out, tupleLength, length, debug); - } finally { - out.close(); - } - } + for (int cursor = 0; cursor < args.length; cursor++) { + String arg = args[cursor]; - /** - * Read tokens from this input and use them to generate text on this output. - * - * @param in - * the input stream to read. - * @param out - * the output stream to write to. - * @param tupleLength - * the length of tuples to be used in generation. - * @param length - * the length in tokens of the output to be generated. - * @param debug - * whether to print debugging output. - * @throws IOException - * if the file system buggers up, which is not, in the cosmic - * scheme of things, very likely. - */ - void readAndGenerate(final InputStream in, final OutputStream out, - final int tupleLength, int length, boolean debug) - throws IOException { - /* The root of the rule tree I shall build. */ - RuleTreeNode root = new RuleTreeNode(); - read(in, tupleLength, debug, root); + if (arg.startsWith("-") && arg.length() > 1) { + switch (arg.charAt(1)) { + case 'd': + debug = true; + break; + case 'i': + // input + in = new FileInputStream(new File(args[++cursor])); + break; + case 'o': // output + out = new FileOutputStream(new File(args[++cursor])); + break; + case 'l': // length + length = Integer.parseInt(args[++cursor]); + break; + case 'n': + case 't': // tuple length + tupleLength = Integer.parseInt(args[++cursor]); + break; + default: + throw new IllegalArgumentException(String.format( + "Unrecognised argument '%s'", arg)); + } + } + } + try { + new Milkwood().readAndGenerate(in, out, tupleLength, length, debug); + } finally { + out.close(); + } + } - WordSequence tokens = compose(tupleLength, debug, root, length); + /** + * Read tokens from this input and use them to generate text on this output. + * + * @param in the input stream to read. + * @param out the output stream to write to. + * @param tupleLength the length of tuples to be used in generation. + * @param length the length in tokens of the output to be generated. + * @param debug whether to print debugging output. + * @throws IOException if the file system buggers up, which is not, in the + * cosmic scheme of things, very likely. + */ + void readAndGenerate(final InputStream in, final OutputStream out, + final int tupleLength, int length, boolean debug) + throws IOException { + /* The root of the rule tree I shall build. */ + RuleTreeNode root = new RuleTreeNode(); + read(in, tupleLength, debug, root); - write(out, debug, tokens); - - if ( debug) { - System.err.println( "\n\nCompleted."); - } - } + WordSequence tokens = compose(tupleLength, debug, root, length); - /** - * Digest the input into a set of rules. - * - * @param in - * the input stream. - * @param tupleLength - * the length of tuples we shall consider. - * @param debug - * whether or not to print debugging output. - * @param root - * the root of the rule tree. - * @return the number of tokens read. - * @throws IOException - * if the file system buggers up, which is not, in the cosmic - * scheme of things, very likely. - */ - private int read(final InputStream in, final int tupleLength, - boolean debug, RuleTreeNode root) throws IOException { - int length = new Digester().read(in, tupleLength, root); + write(out, debug, tokens); - if (debug) { - System.err.println(root.toString()); - } - return length; - } + if (debug) { + System.err.println("\n\nCompleted."); + } + } - private WordSequence compose(final int tupleLength, boolean debug, - RuleTreeNode root, int length) { - WordSequence tokens = new Composer(debug).compose(root, length); + /** + * Digest the input into a set of rules. + * + * @param in the input stream. + * @param tupleLength the length of tuples we shall consider. + * @param debug whether or not to print debugging output. + * @param root the root of the rule tree. + * @return the number of tokens read. + * @throws IOException if the file system buggers up, which is not, in the + * cosmic scheme of things, very likely. + */ + private int read(final InputStream in, final int tupleLength, + boolean debug, RuleTreeNode root) throws IOException { + int length = new Digester().digest(in, tupleLength, root); - if (tokens.contains(PERIOD)) { - tokens = tokens.truncateAtLastInstance(PERIOD); - } - return tokens; - } + if (debug) { + System.err.println(root.toString()); + } + return length; + } - /** - * Write this sequence of tokens to this output. - * - * @param out - * the stream to which to write. - * @param debug - * whether or not to print debugging output. - * @param tokens - * the sequence of tokens to write. - * @throws IOException - * if the file system buggers up, which is not, in the cosmic - * scheme of things, very likely. - */ - private void write(final OutputStream out, boolean debug, - WordSequence tokens) throws IOException { - Writer scrivenor = new Writer(out, debug); - try { - scrivenor.writeSequence(tokens); - } finally { - scrivenor.close(); - } - } + private WordSequence compose(final int tupleLength, boolean debug, + RuleTreeNode root, int length) { + WordSequence tokens = new Composer(debug).compose(root, length); + if (tokens.contains(PERIOD)) { + tokens = tokens.truncateAtLastInstance(PERIOD); + } + return tokens; + } + + /** + * Write this sequence of tokens to this output. + * + * @param out the stream to which to write. + * @param debug whether or not to print debugging output. + * @param tokens the sequence of tokens to write. + * @throws IOException if the file system buggers up, which is not, in the + * cosmic scheme of things, very likely. + */ + private void write(final OutputStream out, boolean debug, + WordSequence tokens) throws IOException { + try (Writer scrivenor = new Writer(out, debug)) { + scrivenor.writeSequence(tokens); + } + } } diff --git a/src/cc/journeyman/milkwood/NoSuchPathException.java b/src/cc/journeyman/milkwood/NoSuchPathException.java deleted file mode 100644 index 2b49071..0000000 --- a/src/cc/journeyman/milkwood/NoSuchPathException.java +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Proprietary unpublished source code property of - * Simon Brooke . - * - * Copyright (c) 2013 Simon Brooke - */ -package cc.journeyman.milkwood; - -/** - * - * @author Simon Brooke - */ -class NoSuchPathException extends Exception { - - private static final long serialVersionUID = 1L; - -} diff --git a/src/cc/journeyman/milkwood/RuleTreeNode.java b/src/cc/journeyman/milkwood/RuleTreeNode.java index 413b9b4..a0263ba 100644 --- a/src/cc/journeyman/milkwood/RuleTreeNode.java +++ b/src/cc/journeyman/milkwood/RuleTreeNode.java @@ -16,17 +16,18 @@ import java.util.Random; import java.util.Stack; /** - * Mapping a word to its successor words. This is probably highly - * inefficient of store, but for the present purposes my withers are unwrung. - * Not thread safe in this form because of access to the random number generator. - * + * Mapping a word to its successor words. This is probably highly inefficient of + * store, but for the present purposes my withers are unwrung. Not thread safe + * in this form because of access to the random number generator. + * * @author Simon Brooke */ public class RuleTreeNode { - /** - * The magic token which identifies the root node of a rule tree. - */ - public static final String ROOTMAGICTOKEN = "*ROOT*"; + + /** + * The magic token which identifies the root node of a rule tree. + */ + public static final String ROOTMAGICTOKEN = "*ROOT*"; /** * The line separator on this platform. */ @@ -35,64 +36,66 @@ public class RuleTreeNode { * A random number generator. */ private static Random RANDOM = new Random(); - /** * The word at this node. */ private final String word; - /** * Potential successors of this node */ - private Map rules = new HashMap(); - + private Map rules = new HashMap<>(); + /** * If no argument passed, generate a root node. */ public RuleTreeNode() { - this( RuleTreeNode.ROOTMAGICTOKEN); + this(RuleTreeNode.ROOTMAGICTOKEN); } /** * Create me wrapping this word. + * * @param word the word I represent. */ public RuleTreeNode(String word) { this.word = word; } - - + + /** + * Specialisation: neatly format the rule tree. + * + * @return a neatly formatted representation. + */ + @Override public String toString() { - StringBuffer buffy = new StringBuffer(); - - this.printToBuffer( buffy, 0); - - - return buffy.toString(); + StringBuffer buffy = new StringBuffer(); + + this.printToBuffer(buffy, 0); + + + return buffy.toString(); } - - + private void printToBuffer(StringBuffer buffy, int indent) { - for (int i = 0; i < indent; i++) { - buffy.append( '\t'); - } - buffy.append( this.getWord()); - - - if ( this.rules.isEmpty()) { - buffy.append(NEWLINE); - } else { - buffy.append( " ==>").append(NEWLINE); - for ( String successor : this.getSuccessors()) { - rules.get(successor).printToBuffer(buffy, indent + 1); - } - buffy.append(NEWLINE); - } - } + for (int i = 0; i < indent; i++) { + buffy.append('\t'); + } + buffy.append(this.getWord()); - /** - * + if (this.rules.isEmpty()) { + buffy.append(NEWLINE); + } else { + buffy.append(" ==>").append(NEWLINE); + for (String successor : this.getSuccessors()) { + rules.get(successor).printToBuffer(buffy, indent + 1); + } + buffy.append(NEWLINE); + } + } + + /** + * * @return my word. */ public String getWord() { @@ -100,60 +103,60 @@ public class RuleTreeNode { } /** - * + * * @return a shuffled list of the words which could follow this one. */ public Collection getSuccessors() { - ArrayList result = new ArrayList(); + ArrayList result = new ArrayList<>(); result.addAll(rules.keySet()); Collections.shuffle(result, RANDOM); return result; } - - + /** * Compile this sequence of tokens into rule nodes under me. + * * @param sequence the sequence of tokens to compile. */ public void addSequence(Queue sequence) { if (!sequence.isEmpty()) { - String word = sequence.remove(); - RuleTreeNode successor = this.getRule(word); + String token = sequence.remove(); + RuleTreeNode successor = this.getRule(token); if (successor == null) { - successor = new RuleTreeNode(word); - this.rules.put(word, successor); + successor = new RuleTreeNode(token); + this.rules.put(token, successor); } - + successor.addSequence(sequence); } } - - /** + + /** * Choose a successor at random. - * + * * @return the successor chosen, or null if I have none. */ - protected RuleTreeNode getRule() { - RuleTreeNode result = null; + protected RuleTreeNode getRule() { + RuleTreeNode result = null; - if (!rules.isEmpty()) { - int target = RANDOM.nextInt(rules.keySet().size()); + if (!rules.isEmpty()) { + int target = RANDOM.nextInt(rules.keySet().size()); - for (String key : rules.keySet()) { - /* - * NOTE: decrement after test. - */ - if (target-- == 0) { - result = rules.get(key); - } - } - } + for (String key : rules.keySet()) { + /* + * NOTE: decrement after test. + */ + if (target-- == 0) { + result = rules.get(key); + } + } + } - return result; + return result; } - + /** - * + * * @param token a token to seek. * @return the successor among my successors which has this token, if any. */ @@ -161,44 +164,46 @@ public class RuleTreeNode { return rules.get(token); } - protected String getWord(Stack path) throws NoSuchPathException { + protected String getWord(Stack path) { final String result; - - if ( path.isEmpty()) { + + if (path.isEmpty()) { result = this.getWord(); } else { final RuleTreeNode successor = this.getRule(path.pop()); - + if (successor == null) { result = null; } else { result = successor.getWord(path); } } - + return result; } /** - * Find all the terminal strings in the current rule set which would match this path. + * Find all the terminal strings in the current rule set which would match + * this path. + * * @param path the path to match * @return a collection (possibly empty) of potential successors. */ - public Collection match(WordStack path) { - final Collection result; - - if ( path.isEmpty()) { + public Collection match(Window path) { + final Collection result; + + if (path.isEmpty()) { result = this.getSuccessors(); } else { final RuleTreeNode successor = this.getRule(path.pop()); - + if (successor == null) { - result = new ArrayList(); + result = new ArrayList<>(); } else { result = successor.match(path); } } - - return result; - } + + return result; + } } diff --git a/src/cc/journeyman/milkwood/Tokeniser.java b/src/cc/journeyman/milkwood/Tokeniser.java index 7ce945b..b9b1317 100644 --- a/src/cc/journeyman/milkwood/Tokeniser.java +++ b/src/cc/journeyman/milkwood/Tokeniser.java @@ -16,66 +16,69 @@ import java.io.StreamTokenizer; * A tokeniser which reads tokens in a manner which suits me. Although this * implementation is based on a StreamTokenizer, the point of separating this * out into its own class is that if I had more time I could reimplement. - * + * * @author simon - * + * */ public class Tokeniser extends StreamTokenizer { - public Tokeniser(Reader r) { - super(r); + /** + * Initialise me appropriately wrapping this reader. + * @param r the reader to wrap. + */ + public Tokeniser(Reader r) { + super(r); - this.resetSyntax(); - this.whitespaceChars(8, 15); - this.whitespaceChars(28, 32); - /* - * treat quotemarks as white space. Actually it would be better if quote - * marks were white space only if preceded or followed by whitespace, so - * that, e.g., 'don't' and 'can't' appeared as single tokens. But that - * means really reimplementing the parser and I don't have time. - */ - this.whitespaceChars((int) '\"', (int) '\"'); - this.whitespaceChars((int) '\'', (int) '\''); - /* - * treat underscore and hyphen as whitespace as well. Again, hyphen with - * either leading or trailing non-whitespace probably ought to be - * treated specially, but... - */ - this.whitespaceChars((int) '_', (int) '_'); - this.whitespaceChars((int) '-', (int) '-'); - this.wordChars((int) '0', (int) '9'); - this.wordChars((int) 'A', (int) 'Z'); - this.wordChars((int) 'a', (int) 'z'); - } + this.resetSyntax(); + this.whitespaceChars(8, 15); + this.whitespaceChars(28, 32); + /* + * treat quotemarks as white space. Actually it would be better if quote + * marks were white space only if preceded or followed by whitespace, so + * that, e.g., 'don't' and 'can't' appeared as single tokens. But that + * means really reimplementing the parser and I don't have time. + */ + this.whitespaceChars((int) '\"', (int) '\"'); + this.whitespaceChars((int) '\'', (int) '\''); + /* + * treat underscore and hyphen as whitespace as well. Again, hyphen with + * either leading or trailing non-whitespace probably ought to be + * treated specially, but... + */ + this.whitespaceChars((int) '_', (int) '_'); + this.whitespaceChars((int) '-', (int) '-'); + this.wordChars((int) '0', (int) '9'); + this.wordChars((int) 'A', (int) 'Z'); + this.wordChars((int) 'a', (int) 'z'); + } - public Tokeniser(InputStream in) { - this(new BufferedReader(new InputStreamReader(in))); - } + public Tokeniser(InputStream in) { + this(new BufferedReader(new InputStreamReader(in))); + } - /** - * There surely must be a better way to get just the token out of a - * StreamTokenizer...! - */ - public String readBareToken() { - final String token; - - switch (this.ttype) { - case StreamTokenizer.TT_EOL: - token = "FIXME"; // TODO: fix this! - break; - case StreamTokenizer.TT_NUMBER: - token = new Double(this.nval).toString(); - break; - case StreamTokenizer.TT_WORD: - token = this.sval.toLowerCase(); - break; - default: - StringBuffer buffy = new StringBuffer(); - buffy.append((char) this.ttype); - token = buffy.toString(); - break; - } - return token; - } + /** + * There surely must be a better way to get just the token out of a + * StreamTokenizer...! + */ + public String readBareToken() { + final String token; + switch (this.ttype) { + case StreamTokenizer.TT_EOL: + token = "FIXME"; // TODO: fix this! + break; + case StreamTokenizer.TT_NUMBER: + token = new Double(this.nval).toString(); + break; + case StreamTokenizer.TT_WORD: + token = this.sval.toLowerCase(); + break; + default: + StringBuilder bob = new StringBuilder(); + bob.append((char) this.ttype); + token = bob.toString(); + break; + } + return token; + } } diff --git a/src/cc/journeyman/milkwood/TupleDictionary.java b/src/cc/journeyman/milkwood/TupleDictionary.java deleted file mode 100644 index ae192ab..0000000 --- a/src/cc/journeyman/milkwood/TupleDictionary.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Proprietary unpublished source code property of - * Simon Brooke . - * - * Copyright (c) 2013 Simon Brooke - */ -package cc.journeyman.milkwood; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; - -/** - * - * @author Simon Brooke - */ -public class TupleDictionary extends HashMap> { - - private static final long serialVersionUID = 1L; - - /** - * Specialisation: if there isn't an existing entry, create one. - * - * @param token the token to look up - * @return the collection of possible tuples for that token. - */ - public Collection get(String token) { - Collection result = super.get(token); - - if (result == null) { - result = new ArrayList(); - this.put(token, result); - } - - return result; - } - - /** - * Add a new, empty sequence to my entry for this token. - * @param token the token - * @return the new sequence which was added. - */ - protected WordSequence addSequence(String token) { - return this.addSequence(token, new WordSequence()); - } - - /** - * Add this sequence to my entry for this token. - * @param token the token. - * @param sequence the sequence to add. Must not be null! - * @return the sequence which was added. - */ - protected WordSequence addSequence(String token, WordSequence sequence) { - assert (sequence != null) : "invalid sequence argument"; - - this.get(token).add(sequence); - - return sequence; - } -} diff --git a/src/cc/journeyman/milkwood/Window.java b/src/cc/journeyman/milkwood/Window.java new file mode 100644 index 0000000..1b50b23 --- /dev/null +++ b/src/cc/journeyman/milkwood/Window.java @@ -0,0 +1,56 @@ +package cc.journeyman.milkwood; + +import java.util.Stack; + +/** + * Sliding window which rules may match. + * + * @author simon + * + */ +public class Window extends Stack { + + private static final long serialVersionUID = 1L; + + /** + * Create a new, empty, wordstack. + */ + public Window() { + super(); + } + + /** + * create a new window from this window, having this new word as its + * terminal and ommitting the current first word. That is, the new window + * should be as long as the old, with each word shuffled up one place. + * + * @param prototype the window to copy from. + * @param terminal the new terminal word. + */ + public Window(Window prototype, String terminal) { + this(); + + Window copy = prototype.duplicate(); + copy.pop(); + this.populate(copy, terminal); + } + + private void populate(Window copy, String terminal) { + if (copy.isEmpty()) { + this.push(terminal); + } else { + String token = copy.pop(); + this.populate(copy, terminal); + this.push(token); + } + } + + /** + * A wrapper round clone which hides all the ugly casting. + * + * @return a duplicate copy of myself. + */ + public Window duplicate() { + return (Window) this.clone(); + } +} diff --git a/src/cc/journeyman/milkwood/WordSequence.java b/src/cc/journeyman/milkwood/WordSequence.java index 6908848..69d0532 100644 --- a/src/cc/journeyman/milkwood/WordSequence.java +++ b/src/cc/journeyman/milkwood/WordSequence.java @@ -12,56 +12,58 @@ import java.util.Queue; /** * An ordered sequence of words. Of course it implements Queue since it is a * LinkedList and LinkedList implements Queue, but I want to make it explicitly - * clear that this is a queue and can be used as such. - * + * clear that this is a queue and can be used as such. Different from WordStack + * which is a Stack. + * + * @see WordStack + * * @author Simon Brooke */ -class WordSequence extends LinkedList implements Queue { +public class WordSequence extends LinkedList implements Queue { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - /** - * - * @param tokens - * a sequence of tokens - * @param marker - * a marker to terminate after the last occurrance of. - * @return a copy of tokens, truncated at the last occurrance of the marker. - */ - public WordSequence truncateAtLastInstance(String marker) { - final WordSequence result = new WordSequence(); + /** + * + * @param tokens a sequence of tokens + * @param marker a marker to terminate after the last occurrance of. + * @return a copy of tokens, truncated at the last occurrance of the marker. + */ + public WordSequence truncateAtLastInstance(String marker) { + final WordSequence result = new WordSequence(); - for (String token : this) { - if (token.endsWith(marker) && !this.contains(marker)) { - /* - * If the token we're looking at ends with the marker, and the - * remainder of the tokens does not include a token ending with - * the marker, we're done. Otherwise, we continue. OK? - */ - break; - } - result.add(token); - } + for (String token : this) { + result.add(token); + if (token.endsWith(marker) && !this.contains(marker)) { + /* + * If the token we're looking at ends with the marker, and the + * remainder of the tokens does not include a token ending with + * the marker, we're done. Otherwise, we continue. OK? + */ + break; + } + } - return result; - } + return result; + } - /** - * Specialisation: Working around the bug that the tokeniser treats PERIOD as a word character. - */ - @Override - public boolean contains(Object target) { - boolean result = false; - if (target != null) { - String marker = target.toString(); + /** + * Specialisation: Working around the bug that the tokeniser treats PERIOD + * as a word character. + */ + @Override + public boolean contains(Object target) { + boolean result = false; + if (target != null) { + String marker = target.toString(); - for (String token : this) { - if (token.endsWith(marker)) { - result = true; - break; - } - } - } - return result; - } + for (String token : this) { + if (token.endsWith(marker)) { + result = true; + break; + } + } + } + return result; + } } diff --git a/src/cc/journeyman/milkwood/WordStack.java b/src/cc/journeyman/milkwood/WordStack.java deleted file mode 100644 index 9760182..0000000 --- a/src/cc/journeyman/milkwood/WordStack.java +++ /dev/null @@ -1,57 +0,0 @@ -package cc.journeyman.milkwood; - -import java.util.Stack; - -/** - * Sliding window which rules may match. - * - * @author simon - * - */ -public class WordStack extends Stack { - - private static final long serialVersionUID = 1L; - - /** - * Create a new, empty, wordstack. - */ - public WordStack() { - super(); - } - - /** - * create a new window from this window, having this new word as its - * terminal and ommitting the current first word. That is, the new window - * should be as long as the old, with each word shuffled up one place. - * - * @param prototype the window to copy from. - * @param terminal the new terminal word. - */ - public WordStack(WordStack prototype, String terminal) { - this(); - - WordStack copy = prototype.duplicate(); - copy.pop(); - this.populate( copy, terminal); - } - - private void populate(WordStack copy, String terminal) { - if ( copy.isEmpty()) { - this.push(terminal); - } else { - String token = copy.pop(); - this.populate(copy, terminal); - this.push( token); - } - } - - /** - * A wrapper round clone which hides all the ugly casting. - * - * @return a duplicate copy of myself. - */ - public WordStack duplicate() { - return (WordStack) this.clone(); - } - -} diff --git a/src/cc/journeyman/milkwood/Writer.java b/src/cc/journeyman/milkwood/Writer.java index 527a342..e70e6f8 100644 --- a/src/cc/journeyman/milkwood/Writer.java +++ b/src/cc/journeyman/milkwood/Writer.java @@ -16,156 +16,141 @@ import java.util.Random; /** * A special purpose writer to write sequences of tokens, chopping them up into * paragraphs on the fly.. - * + * * @author Simon Brooke */ class Writer extends BufferedWriter { - /** - * The average number of sentences in a paragraph. - */ - public static final int AVSENTENCESPERPARA = 5; - /** - * A random number generator. - */ - private static Random RANDOM = new Random(); - /** - * Dictionary of first-words we know about; each first-word maps onto a - * tuple of tuples of word sequences beginning with that word, so 'I' might - * map onto [[I, CAME, COMMA],[I, SAW, COMMA],[I CONQUERED COMMA]]. - */ - TupleDictionary dictionary = new TupleDictionary(); - /** - * Whether or not I am in debugging mode. - */ - @SuppressWarnings("unused") - private final boolean debug; + /** + * Line separator on this platform. + */ + public static final String NEWLINE = System.getProperty("line.separator"); + /** + * The average number of sentences in a paragraph. + */ + public static final int AVSENTENCESPERPARA = 5; + /** + * A random number generator. + */ + private static Random RANDOM = new Random(); + /** + * Whether or not I am in debugging mode. + */ + @SuppressWarnings("unused") + private final boolean debug; - /** - * @param out - * the output stream to which I shall write. - * @param debug - * Whether or not I am in debugging mode. - */ - public Writer(OutputStream out, final boolean debug) { - super(new OutputStreamWriter(out)); - this.debug = debug; - } + /** + * @param out the output stream to which I shall write. + * @param debug Whether or not I am in debugging mode. + */ + public Writer(OutputStream out, final boolean debug) { + super(new OutputStreamWriter(out)); + this.debug = debug; + } - /** - * Write this sequence of tokens on this stream, sorting out minor issues of - * orthography. - * - * @param tokens - * the tokens. - * @throws IOException - * if it is impossible to write (e.g. file system full). - */ - public void writeSequence(WordSequence tokens) throws IOException { - boolean capitaliseNext = true; + /** + * Write this sequence of tokens on this stream, sorting out minor issues of + * orthography. + * + * @param tokens the tokens. + * @throws IOException if it is impossible to write (e.g. file system full). + */ + public void writeSequence(WordSequence tokens) throws IOException { + boolean capitaliseNext = true; - try { - for (String token : tokens) { - capitaliseNext = writeToken(capitaliseNext, token); - } - } finally { - this.flush(); - this.close(); - } - } + for (String token : tokens) { + capitaliseNext = writeToken(capitaliseNext, token); + } + this.write(NEWLINE); + } - /** - * Deal with end of paragraph, capital after full stop, and other minor - * orthographic conventions. - * - * @param capitalise - * whether or not the token should be capitalised - * @param token - * the token to write; - * @returnvtrue if the next token to be written should be capitalised. - * @throws IOException - */ - private boolean writeToken(boolean capitalise, String token) - throws IOException { - if (this.spaceBefore(token)) { - this.write(" "); - } - if (capitalise) { - this.write(token.substring(0, 1).toUpperCase(Locale.getDefault())); - this.write(token.substring(1)); - } else { - this.write(token); - } + /** + * Deal with end of paragraph, capital after full stop, and other minor + * orthographic conventions. + * + * @param capitalise whether or not the token should be capitalised + * @param token the token to write; + * @returnvtrue if the next token to be written should be capitalised. + * @throws IOException + */ + private boolean writeToken(boolean capitalise, String token) + throws IOException { + if (this.spaceBefore(token)) { + this.write(" "); + } + if (capitalise) { + this.write(token.substring(0, 1).toUpperCase(Locale.getDefault())); + this.write(token.substring(1)); + } else { + this.write(token); + } - this.maybeParagraph(token); + this.maybeParagraph(token); - return (token.endsWith(Milkwood.PERIOD)); - } + return (token.endsWith(Milkwood.PERIOD)); + } - /** - * Return false if token is punctuation, else true. Wouldn't it be nice if - * Java provided Character.isPunctuation(char)? However, since it doesn't, I - * can give this slightly special semantics: return true only if this is - * punctuation which would not normally be preceded with a space. - * - * @param ch - * a character. - * @return true if the should be preceded by a space, else false. - */ - private boolean spaceBefore(String token) { - final boolean result; + /** + * Return false if token is punctuation, else true. Wouldn't it be nice if + * Java provided Character.isPunctuation(char)? However, since it doesn't, I + * can give this slightly special semantics: return true only if this is + * punctuation which would not normally be preceded with a space. + * + * @param ch a character. + * @return true if the should be preceded by a space, else false. + */ + private boolean spaceBefore(String token) { + final boolean result; - switch (token.length()) { - case 0: - result = false; - break; - case 1: - switch (token.charAt(0)) { - case '.': - case ',': - case ':': - case ';': - case 's': - /* - * an 's' on its own is probably evidence of a possessive with - * the apostrophe lost - */ - case 't': - /* - * similar; probably 'doesn't' or 'shouldn't' or other cases of - * 'not' with an elided 'o'. - */ - result = false; - break; - default: - result = true; - break; - } - break; - default: - result = true; - } + switch (token.length()) { + case 0: + result = false; + break; + case 1: + switch (token.charAt(0)) { + case '.': + case ',': + case ':': + case ';': + case 's': + /* + * an 's' on its own is probably evidence of a possessive with + * the apostrophe lost + */ + case 't': + /* + * similar; probably 'doesn't' or 'shouldn't' or other cases of + * 'not' with an elided 'o'. + */ + result = false; + break; + default: + result = true; + break; + } + break; + default: + result = true; + } - return result; - } - - /** - * If this token is an end-of-sentence token, then, on one chance in some, - * have the writer write two new lines. NOTE: The tokeniser is treating - * PERIOD ('.') as a word character, even though it has not been told to. - * Token.endsWith( PERIOD) is a hack to get round this problem. TODO: - * investigate and fix. - * - * @param token - * a token - * @throws IOException - * if Mr this has run out of ink - */ - private void maybeParagraph(String token) throws IOException { - if (token.endsWith(Milkwood.PERIOD) - && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) { - this.write("\n\n"); - } - } + return result; + } + /** + * If this token is an end-of-sentence token, then, on one chance in some, + * have the writer write two new lines. NOTE: The tokeniser is treating + * PERIOD ('.') as a word character, even though it has not been told to. + * Token.endsWith( PERIOD) is a hack to get round this problem. TODO: + * investigate and fix. + * + * @param token a token + * @throws IOException if Mr this has run out of ink + */ + private void maybeParagraph(String token) throws IOException { + if (token.endsWith(Milkwood.PERIOD) + && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) { + this.write(NEWLINE); + this.write(NEWLINE); + } + } }