diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..95832e8
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/build/
+/dist/
\ No newline at end of file
diff --git a/README.txt b/README.txt
index b2c7bc2..e40dc12 100644
--- a/README.txt
+++ b/README.txt
@@ -1,5 +1,7 @@
Trigrams process
+http://codekata.pragprog.com/2007/01/kata_fourteen_t.html
+
Started at: 20131030:12:48 GMT
OK, it's a tokeniser, with a map. The map maps token tuples onto tokens.
diff --git a/nbproject/build-impl.xml b/nbproject/build-impl.xml
index 86591fe..3be18f2 100644
--- a/nbproject/build-impl.xml
+++ b/nbproject/build-impl.xml
@@ -54,43 +54,6 @@ is divided into following sections:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Must set platform.home
- Must set platform.bootcp
- Must set platform.java
- Must set platform.javac
-
- The J2SE Platform is not correctly set up.
- Your active platform is: ${platform.active}, but the corresponding property "platforms.${platform.active}.home" is not found in the project's properties files.
- Either open the project in the IDE and setup the Platform with the same name or add it manually.
- For example like this:
- ant -Duser.properties.file=<path_to_property_file> jar (where you put the property "platforms.${platform.active}.home" in a .properties file)
- or ant -Dplatforms.${platform.active}.home=<path_to_JDK_home> jar (where no properties file is used)
-
@@ -225,6 +188,15 @@ is divided into following sections:
+
+
+
+
+
+
+
+
+
@@ -293,7 +265,7 @@ is divided into following sections:
-
+
@@ -332,7 +304,7 @@ is divided into following sections:
-
+
@@ -412,7 +384,7 @@ is divided into following sections:
-
+
@@ -435,7 +407,7 @@ is divided into following sections:
-
+
@@ -474,7 +446,7 @@ is divided into following sections:
-
+
@@ -554,7 +526,7 @@ is divided into following sections:
-
+
@@ -579,7 +551,7 @@ is divided into following sections:
-
+
@@ -759,9 +731,6 @@ is divided into following sections:
-
-
-
@@ -777,9 +746,7 @@ is divided into following sections:
-
-
-
+
@@ -804,7 +771,7 @@ is divided into following sections:
-
+
@@ -831,7 +798,7 @@ is divided into following sections:
-
+
@@ -1018,7 +985,7 @@ is divided into following sections:
- ${platform.java} -cp "${run.classpath.with.dist.jar}" ${main.class}
+ java -cp "${run.classpath.with.dist.jar}" ${main.class}
@@ -1045,7 +1012,7 @@ is divided into following sections:
To run this application from the command line without Ant, try:
- ${platform.java} -jar "${dist.jar.resolved}"
+ java -jar "${dist.jar.resolved}"
@@ -1236,7 +1203,7 @@ is divided into following sections:
-
+
diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties
index 8621aae..fad583e 100644
--- a/nbproject/genfiles.properties
+++ b/nbproject/genfiles.properties
@@ -1,8 +1,8 @@
-build.xml.data.CRC32=d35b316e
+build.xml.data.CRC32=31018a52
build.xml.script.CRC32=cd5c02b3
build.xml.stylesheet.CRC32=28e38971@1.56.1.46
# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
-nbproject/build-impl.xml.data.CRC32=d35b316e
-nbproject/build-impl.xml.script.CRC32=0441a68e
+nbproject/build-impl.xml.data.CRC32=31018a52
+nbproject/build-impl.xml.script.CRC32=fe6e4d15
nbproject/build-impl.xml.stylesheet.CRC32=c6d2a60f@1.56.1.46
diff --git a/nbproject/private/private.properties b/nbproject/private/private.properties
index f8e12c4..0b27e09 100644
--- a/nbproject/private/private.properties
+++ b/nbproject/private/private.properties
@@ -1,2 +1,6 @@
compile.on.save=true
+do.depend=false
+do.jar=true
+javac.debug=true
+javadoc.preview=true
user.properties.file=/home/simon/.jmonkeyplatform/3.0/build.properties
diff --git a/nbproject/project.properties b/nbproject/project.properties
index 27b980f..5973007 100644
--- a/nbproject/project.properties
+++ b/nbproject/project.properties
@@ -1,9 +1,10 @@
annotation.processing.enabled=true
annotation.processing.enabled.in.editor=false
-annotation.processing.processor.options=
annotation.processing.processors.list=
annotation.processing.run.all.processors=true
annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
+application.title=milkwood
+application.vendor=simon
build.classes.dir=${build.dir}/classes
build.classes.excludes=**/*.java,**/*.form
# This directory is removed when the project is cleaned:
@@ -24,6 +25,7 @@ debug.test.classpath=\
dist.dir=dist
dist.jar=${dist.dir}/milkwood.jar
dist.javadoc.dir=${dist.dir}/javadoc
+endorsed.classpath=
excludes=
includes=**
jar.compress=false
@@ -33,8 +35,8 @@ javac.compilerargs=
javac.deprecation=false
javac.processorpath=\
${javac.classpath}
-javac.source=1.6
-javac.target=1.6
+javac.source=1.7
+javac.target=1.7
javac.test.classpath=\
${javac.classpath}:\
${build.classes.dir}
@@ -55,7 +57,8 @@ main.class=cc.journeyman.milkwood.Milkwood
manifest.file=manifest.mf
meta.inf.dir=${src.dir}/META-INF
mkdist.disabled=false
-platform.active=JDK_1.6
+obfuscate.options=-keep public class * extends com.jme3.app.Application{public *;}\n-keep public class * extends com.jme3.system.JmeSystemDelegate{public *;}\n-keep public class * implements com.jme3.renderer.Renderer{public *;}\n-keep public class * implements com.jme3.asset.AssetLoader{public *;}\n-keep public class * implements com.jme3.asset.AssetLocator{public *;}\n-keep public class * implements de.lessvoid.nifty.screen.ScreenController{public *;}\n-dontwarn\n-dontnote\n
+platform.active=default_platform
platforms.JDK_1.6.home=/usr/lib/jvm/java-6-openjdk-amd64/
run.classpath=\
${javac.classpath}:\
diff --git a/nbproject/project.xml b/nbproject/project.xml
index a6aa914..c684742 100644
--- a/nbproject/project.xml
+++ b/nbproject/project.xml
@@ -4,7 +4,6 @@
milkwood
-
diff --git a/src/cc/journeyman/milkwood/Composer.java b/src/cc/journeyman/milkwood/Composer.java
index c69f863..562ea35 100644
--- a/src/cc/journeyman/milkwood/Composer.java
+++ b/src/cc/journeyman/milkwood/Composer.java
@@ -1,139 +1,134 @@
package cc.journeyman.milkwood;
import java.util.Collection;
-import java.util.Collections;
/**
* Composes text output based on a rule tree.
- *
+ *
* @author simon
- *
+ *
*/
public class Composer {
- /**
- * Whether or not I am in debugging mode.
- */
- private final boolean debug;
- /**
- *
- * @param debug
- * Whether or not I am in debugging mode.
- */
- public Composer(boolean debug) {
- this.debug = debug;
- }
+ /**
+ * Whether or not I am in debugging mode.
+ */
+ private final boolean debug;
- /**
- * Recursive, backtracking, output generator.
- *
- * @param rules the rule set we're working to.
- * @param length the number of tokens still to be output.
- * @return if a successful path forward is found, that path, else null.
- */
- public WordSequence compose(RuleTreeNode rules, int length) {
- WordStack preamble = composePreamble(rules);
- WordSequence result = new WordSequence();
+ /**
+ *
+ * @param debug Whether or not I am in debugging mode.
+ */
+ public Composer(boolean debug) {
+ this.debug = debug;
+ }
- // composing the preamble will have ended with *ROOT* on top of the
- // stack;
- // get rid of it.
- preamble.pop();
-
- if (debug) {
- System.err.println( "Preamble: " + preamble);
- }
+ /**
+ * Recursive, backtracking, output generator.
+ *
+ * @param rules the rule set we're working to.
+ * @param length the number of tokens still to be output.
+ * @return if a successful path forward is found, that path, else null.
+ */
+ protected WordSequence compose(RuleTreeNode rules, int length) {
+ Window preamble = composePreamble(rules);
+ WordSequence result = new WordSequence();
- result.addAll(preamble);
-
- WordStack body = this.compose(preamble, rules, length);
- Collections.reverse(body);
- result.addAll(body);
-
- return result;
- }
+ // composing the preamble will have ended with *ROOT* on top of the
+ // stack;
+ // get rid of it.
+ preamble.pop();
- /**
- * Recursively attempt to find sequences in the ruleset to append to what's
- * been composed so far.
- *
- * @param glanceBack the last few words output.
- * @param rules the rule set we're working to.
- * @param length the number of tokens still to be output.
- * @return if a successful path forward is found, that path, else null.
- */
- private WordStack compose(WordStack glanceBack, RuleTreeNode rules,
- int length) {
- final WordStack result;
-
- if ( debug) {
- System.err.println( String.format( "%d: %s", length, glanceBack));
- }
+ if (debug) {
+ System.err.println("Preamble: " + preamble);
+ }
- /* are we there yet? */
- if (length == 0) {
- result = new WordStack();
- } else {
- /*
- * are there any rules in this ruleset which matches the current
- * sliding window? if so, then recurse; if not, then fail.
- */
- Collection words = rules.match(glanceBack.duplicate());
+ result.addAll(preamble);
- if (words.isEmpty()) {
- /* backtrack */
- result = null;
- } else {
- result = tryOptions(words, glanceBack, rules, length);
- }
- }
- return result;
- }
-
- /**
- * Try each of these candidates in turn, attempting to recurse.
- * @param candidates words which could potentially be added to the output.
- * @param glanceBack the last few words output.
- * @param allRules the rule set we're working to.
- * @param length the number of tokens still to be output.
- * @return if a successful path forward is found, that path, else null.
- */
- private WordStack tryOptions(Collection candidates,
- WordStack glanceBack, RuleTreeNode allRules, int length) {
- WordStack result = null;
-
- for ( String candidate : candidates) {
- result = compose( new WordStack(glanceBack, candidate), allRules, length - 1);
- if ( result != null) {
- /* by Jove, I think she's got it! */
- result.push(candidate);
- break;
- }
- }
-
- return result;
- }
+ result.addAll(this.compose(preamble, rules, length));
+ return result;
+ }
- /**
- * Random walk of the rule tree to extract (from the root) a legal sequence
- * of words the length of our tuple.
- *
- * @param rules
- * the rule tree (fragment) to walk.
- * @return a sequence of words.
- */
- private WordStack composePreamble(RuleTreeNode rules) {
- final WordStack result;
- final RuleTreeNode successor = rules.getRule();
+ /**
+ * Recursively attempt to find sequences in the ruleset to append to what's
+ * been composed so far.
+ *
+ * @param glanceBack the last few words output.
+ * @param rules the rule set we're working to.
+ * @param length the number of tokens still to be output.
+ * @return if a successful path forward is found, that path, else null.
+ */
+ private WordSequence compose(Window glanceBack, RuleTreeNode rules,
+ int length) {
+ final WordSequence result;
- if (successor == null) {
- result = new WordStack();
- } else {
- result = this.composePreamble(successor);
- result.push(rules.getWord());
- }
- return result;
- }
+ if (debug) {
+ System.err.println(String.format("%d: %s", length, glanceBack));
+ }
+ /* are we there yet? */
+ if (length == 0) {
+ result = new WordSequence();
+ } else {
+ /*
+ * are there any rules in this ruleset which matches the current
+ * sliding window? if so, then recurse; if not, then fail.
+ */
+ Collection words = rules.match(glanceBack.duplicate());
+
+ if (words.isEmpty()) {
+ /* backtrack */
+ result = null;
+ } else {
+ result = tryOptions(words, glanceBack, rules, length);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Try each of these candidates in turn, attempting to recurse.
+ *
+ * @param candidates words which could potentially be added to the output.
+ * @param glanceBack the last few words output.
+ * @param allRules the rule set we're working to.
+ * @param length the number of tokens still to be output.
+ * @return if a successful path forward is found, that path, else null.
+ */
+ private WordSequence tryOptions(Collection candidates,
+ Window glanceBack, RuleTreeNode allRules, int length) {
+ WordSequence result = null;
+
+ for (String candidate : candidates) {
+ result = compose(new Window(glanceBack, candidate), allRules, length - 1);
+ if (result != null) {
+ /* by Jove, I think she's got it! */
+ result.push(candidate);
+ break;
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Random walk of the rule tree to extract (from the root) a legal sequence
+ * of words the length of our tuple.
+ *
+ * @param rules the rule tree (fragment) to walk.
+ * @return a sequence of words.
+ */
+ private Window composePreamble(RuleTreeNode rules) {
+ final Window result;
+ final RuleTreeNode successor = rules.getRule();
+
+ if (successor == null) {
+ result = new Window();
+ } else {
+ result = this.composePreamble(successor);
+ result.push(rules.getWord());
+ }
+ return result;
+ }
}
diff --git a/src/cc/journeyman/milkwood/Digester.java b/src/cc/journeyman/milkwood/Digester.java
index 8b2facb..973cc32 100644
--- a/src/cc/journeyman/milkwood/Digester.java
+++ b/src/cc/journeyman/milkwood/Digester.java
@@ -15,46 +15,44 @@ import java.util.Queue;
/**
* Read an input stream of text and digest it into a set of generation rules.
* Separated out of TextGenerator mainly to declutter tht class.
- *
+ *
* @author simon
- *
+ *
*/
public class Digester {
- /**
- * Read tokens from the input stream, and compile them into the rule tree
- * below this root.
- *
- * @param in
- * the input stream from which I read.
- * @param tupleLength
- * the length of the tuples I read.
- * @param root
- * the ruleset to which I shall add.
- * @return the number of tokens read.
- * @throws IOException if can't read from file system.
- */
- protected int read(final InputStream in, final int tupleLength,
- final RuleTreeNode root) throws IOException {
- int result = 0;
- final Queue openTuples = new LinkedList();
- final Tokeniser tok = new Tokeniser(in);
- for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok
- .nextToken()) {
- result++;
- final WordSequence newTuple = new WordSequence();
- String token = tok.readBareToken();
+ /**
+ * Read tokens from the input stream, and compile them into the rule tree
+ * below this root.
+ *
+ * @param in the input stream from which I read.
+ * @param tupleLength the length of the tuples I read.
+ * @param root the ruleset to which I shall add.
+ * @return the number of tokens read.
+ * @throws IOException if can't read from file system.
+ */
+ protected int digest(final InputStream in, final int tupleLength,
+ final RuleTreeNode root) throws IOException {
+ int result = 0;
+ final Queue openTuples = new LinkedList<>();
+ final Tokeniser tok = new Tokeniser(in);
- openTuples.add(newTuple);
- for (WordSequence tuple : openTuples) {
- tuple.add(token);
- }
+ for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok
+ .nextToken()) {
+ result++;
+ final WordSequence newTuple = new WordSequence();
+ String token = tok.readBareToken();
- if (openTuples.size() > tupleLength) {
- root.addSequence(openTuples.remove());
- }
- }
+ openTuples.add(newTuple);
+ for (WordSequence tuple : openTuples) {
+ tuple.add(token);
+ }
- return result;
- }
+ if (openTuples.size() > tupleLength) {
+ root.addSequence(openTuples.remove());
+ }
+ }
+
+ return result;
+ }
}
diff --git a/src/cc/journeyman/milkwood/Milkwood.java b/src/cc/journeyman/milkwood/Milkwood.java
index 19011b7..8b0a71d 100644
--- a/src/cc/journeyman/milkwood/Milkwood.java
+++ b/src/cc/journeyman/milkwood/Milkwood.java
@@ -1,3 +1,9 @@
+/*
+ * Proprietary unpublished source code property of
+ * Simon Brooke .
+ *
+ * Copyright (c) 2013 Simon Brooke
+ */
package cc.journeyman.milkwood;
import java.io.File;
@@ -8,182 +14,158 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-/*
- * Proprietary unpublished source code property of
- * Simon Brooke .
- *
- * Copyright (c) 2013 Simon Brooke
- */
-
/**
- *
+ * Text mangler based on
+ * http://codekata.pragprog.com/2007/01/kata_fourteen_t.html
+ *
* @author Simon Brooke
*/
public class Milkwood {
- /**
- * The magic token which is deemed to end sentences.
- */
- public static final String PERIOD = ".";
- /**
- * Parse command line arguments and kick off the process. Expected arguments
- * include:
- *
- * - -d, -debug
- * - Print debugging output to standard error
- * - -i [FILE], -input [FILE]
- * - Input file, expected to be an English (or, frankly, other natural
- * language) text. Defaults to standard in.
- * - -l [NN], -length [NN]
- * - The length in tuples of the desired output. Defaults to 100.
- *
- -n [NN], -tuple-length [NN]
- * - The length of tuples into which the file will be analysed, default 2.
- *
- * - -o [FILE], -output [FILE]
- * - Output file, to which generated text will be written. Defaults to
- * standard out.
- *
- *
- * @param args
- * the command line arguments
- * @exception FileNotFoundException
- * if the user specifies a file which isn't available.
- * @excpetion IOException if could not read from input or write to output.
- */
- public static void main(String[] args) throws FileNotFoundException,
- IOException {
- /* defaults */
- InputStream in = System.in;
- OutputStream out = System.out;
- int tupleLength = 2;
- boolean debug = false;
- int length = 100;
+ /**
+ * The magic token which is deemed to end sentences.
+ */
+ public static final String PERIOD = ".";
- for (int cursor = 0; cursor < args.length; cursor++) {
- String arg = args[cursor];
+ /**
+ * Parse command line arguments and kick off the process. Expected arguments
+ * include:
+ *
+ * - -d, -debug
+ * - Print debugging output to standard error
+ * - -i [FILE], -input [FILE]
+ * - Input file, expected to be an English (or, frankly, other natural
+ * language) text. Defaults to standard in.
+ * - -l [NN], -length [NN]
+ * - The length in tuples of the desired output. Defaults to 100.
+ *
- -n [NN], -tuple-length [NN]
+ * - The length of tuples into which the file will be analysed, default 2.
+ *
+ * - -o [FILE], -output [FILE]
+ * - Output file, to which generated text will be written. Defaults to
+ * standard out.
+ *
+ *
+ * @param args the command line arguments
+ * @exception FileNotFoundException if the user specifies a file which isn't
+ * available.
+ * @excpetion IOException if could not read from input or write to output.
+ */
+ public static void main(String[] args) throws FileNotFoundException,
+ IOException {
+ /* defaults */
+ InputStream in = System.in;
+ OutputStream out = System.out;
+ int tupleLength = 2;
+ boolean debug = false;
+ int length = 100;
- if (arg.startsWith("-") && arg.length() > 1) {
- switch (arg.charAt(1)) {
- case 'd':
- debug = true;
- break;
- case 'i':
- // input
- in = new FileInputStream(new File(args[++cursor]));
- break;
- case 'o': // output
- out = new FileOutputStream(new File(args[++cursor]));
- break;
- case 'l': // length
- length = Integer.parseInt(args[++cursor]);
- break;
- case 'n':
- case 't': // tuple length
- tupleLength = Integer.parseInt(args[++cursor]);
- break;
- default:
- throw new IllegalArgumentException(String.format(
- "Unrecognised argument '%s'", arg));
- }
- }
- }
- try {
- new Milkwood().readAndGenerate(in, out, tupleLength, length, debug);
- } finally {
- out.close();
- }
- }
+ for (int cursor = 0; cursor < args.length; cursor++) {
+ String arg = args[cursor];
- /**
- * Read tokens from this input and use them to generate text on this output.
- *
- * @param in
- * the input stream to read.
- * @param out
- * the output stream to write to.
- * @param tupleLength
- * the length of tuples to be used in generation.
- * @param length
- * the length in tokens of the output to be generated.
- * @param debug
- * whether to print debugging output.
- * @throws IOException
- * if the file system buggers up, which is not, in the cosmic
- * scheme of things, very likely.
- */
- void readAndGenerate(final InputStream in, final OutputStream out,
- final int tupleLength, int length, boolean debug)
- throws IOException {
- /* The root of the rule tree I shall build. */
- RuleTreeNode root = new RuleTreeNode();
- read(in, tupleLength, debug, root);
+ if (arg.startsWith("-") && arg.length() > 1) {
+ switch (arg.charAt(1)) {
+ case 'd':
+ debug = true;
+ break;
+ case 'i':
+ // input
+ in = new FileInputStream(new File(args[++cursor]));
+ break;
+ case 'o': // output
+ out = new FileOutputStream(new File(args[++cursor]));
+ break;
+ case 'l': // length
+ length = Integer.parseInt(args[++cursor]);
+ break;
+ case 'n':
+ case 't': // tuple length
+ tupleLength = Integer.parseInt(args[++cursor]);
+ break;
+ default:
+ throw new IllegalArgumentException(String.format(
+ "Unrecognised argument '%s'", arg));
+ }
+ }
+ }
+ try {
+ new Milkwood().readAndGenerate(in, out, tupleLength, length, debug);
+ } finally {
+ out.close();
+ }
+ }
- WordSequence tokens = compose(tupleLength, debug, root, length);
+ /**
+ * Read tokens from this input and use them to generate text on this output.
+ *
+ * @param in the input stream to read.
+ * @param out the output stream to write to.
+ * @param tupleLength the length of tuples to be used in generation.
+ * @param length the length in tokens of the output to be generated.
+ * @param debug whether to print debugging output.
+ * @throws IOException if the file system buggers up, which is not, in the
+ * cosmic scheme of things, very likely.
+ */
+ void readAndGenerate(final InputStream in, final OutputStream out,
+ final int tupleLength, int length, boolean debug)
+ throws IOException {
+ /* The root of the rule tree I shall build. */
+ RuleTreeNode root = new RuleTreeNode();
+ read(in, tupleLength, debug, root);
- write(out, debug, tokens);
-
- if ( debug) {
- System.err.println( "\n\nCompleted.");
- }
- }
+ WordSequence tokens = compose(tupleLength, debug, root, length);
- /**
- * Digest the input into a set of rules.
- *
- * @param in
- * the input stream.
- * @param tupleLength
- * the length of tuples we shall consider.
- * @param debug
- * whether or not to print debugging output.
- * @param root
- * the root of the rule tree.
- * @return the number of tokens read.
- * @throws IOException
- * if the file system buggers up, which is not, in the cosmic
- * scheme of things, very likely.
- */
- private int read(final InputStream in, final int tupleLength,
- boolean debug, RuleTreeNode root) throws IOException {
- int length = new Digester().read(in, tupleLength, root);
+ write(out, debug, tokens);
- if (debug) {
- System.err.println(root.toString());
- }
- return length;
- }
+ if (debug) {
+ System.err.println("\n\nCompleted.");
+ }
+ }
- private WordSequence compose(final int tupleLength, boolean debug,
- RuleTreeNode root, int length) {
- WordSequence tokens = new Composer(debug).compose(root, length);
+ /**
+ * Digest the input into a set of rules.
+ *
+ * @param in the input stream.
+ * @param tupleLength the length of tuples we shall consider.
+ * @param debug whether or not to print debugging output.
+ * @param root the root of the rule tree.
+ * @return the number of tokens read.
+ * @throws IOException if the file system buggers up, which is not, in the
+ * cosmic scheme of things, very likely.
+ */
+ private int read(final InputStream in, final int tupleLength,
+ boolean debug, RuleTreeNode root) throws IOException {
+ int length = new Digester().digest(in, tupleLength, root);
- if (tokens.contains(PERIOD)) {
- tokens = tokens.truncateAtLastInstance(PERIOD);
- }
- return tokens;
- }
+ if (debug) {
+ System.err.println(root.toString());
+ }
+ return length;
+ }
- /**
- * Write this sequence of tokens to this output.
- *
- * @param out
- * the stream to which to write.
- * @param debug
- * whether or not to print debugging output.
- * @param tokens
- * the sequence of tokens to write.
- * @throws IOException
- * if the file system buggers up, which is not, in the cosmic
- * scheme of things, very likely.
- */
- private void write(final OutputStream out, boolean debug,
- WordSequence tokens) throws IOException {
- Writer scrivenor = new Writer(out, debug);
- try {
- scrivenor.writeSequence(tokens);
- } finally {
- scrivenor.close();
- }
- }
+ private WordSequence compose(final int tupleLength, boolean debug,
+ RuleTreeNode root, int length) {
+ WordSequence tokens = new Composer(debug).compose(root, length);
+ if (tokens.contains(PERIOD)) {
+ tokens = tokens.truncateAtLastInstance(PERIOD);
+ }
+ return tokens;
+ }
+
+ /**
+ * Write this sequence of tokens to this output.
+ *
+ * @param out the stream to which to write.
+ * @param debug whether or not to print debugging output.
+ * @param tokens the sequence of tokens to write.
+ * @throws IOException if the file system buggers up, which is not, in the
+ * cosmic scheme of things, very likely.
+ */
+ private void write(final OutputStream out, boolean debug,
+ WordSequence tokens) throws IOException {
+ try (Writer scrivenor = new Writer(out, debug)) {
+ scrivenor.writeSequence(tokens);
+ }
+ }
}
diff --git a/src/cc/journeyman/milkwood/NoSuchPathException.java b/src/cc/journeyman/milkwood/NoSuchPathException.java
deleted file mode 100644
index 2b49071..0000000
--- a/src/cc/journeyman/milkwood/NoSuchPathException.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Proprietary unpublished source code property of
- * Simon Brooke .
- *
- * Copyright (c) 2013 Simon Brooke
- */
-package cc.journeyman.milkwood;
-
-/**
- *
- * @author Simon Brooke
- */
-class NoSuchPathException extends Exception {
-
- private static final long serialVersionUID = 1L;
-
-}
diff --git a/src/cc/journeyman/milkwood/RuleTreeNode.java b/src/cc/journeyman/milkwood/RuleTreeNode.java
index 413b9b4..a0263ba 100644
--- a/src/cc/journeyman/milkwood/RuleTreeNode.java
+++ b/src/cc/journeyman/milkwood/RuleTreeNode.java
@@ -16,17 +16,18 @@ import java.util.Random;
import java.util.Stack;
/**
- * Mapping a word to its successor words. This is probably highly
- * inefficient of store, but for the present purposes my withers are unwrung.
- * Not thread safe in this form because of access to the random number generator.
- *
+ * Mapping a word to its successor words. This is probably highly inefficient of
+ * store, but for the present purposes my withers are unwrung. Not thread safe
+ * in this form because of access to the random number generator.
+ *
* @author Simon Brooke
*/
public class RuleTreeNode {
- /**
- * The magic token which identifies the root node of a rule tree.
- */
- public static final String ROOTMAGICTOKEN = "*ROOT*";
+
+ /**
+ * The magic token which identifies the root node of a rule tree.
+ */
+ public static final String ROOTMAGICTOKEN = "*ROOT*";
/**
* The line separator on this platform.
*/
@@ -35,64 +36,66 @@ public class RuleTreeNode {
* A random number generator.
*/
private static Random RANDOM = new Random();
-
/**
* The word at this node.
*/
private final String word;
-
/**
* Potential successors of this node
*/
- private Map rules = new HashMap();
-
+ private Map rules = new HashMap<>();
+
/**
* If no argument passed, generate a root node.
*/
public RuleTreeNode() {
- this( RuleTreeNode.ROOTMAGICTOKEN);
+ this(RuleTreeNode.ROOTMAGICTOKEN);
}
/**
* Create me wrapping this word.
+ *
* @param word the word I represent.
*/
public RuleTreeNode(String word) {
this.word = word;
}
-
-
+
+ /**
+ * Specialisation: neatly format the rule tree.
+ *
+ * @return a neatly formatted representation.
+ */
+ @Override
public String toString() {
- StringBuffer buffy = new StringBuffer();
-
- this.printToBuffer( buffy, 0);
-
-
- return buffy.toString();
+ StringBuffer buffy = new StringBuffer();
+
+ this.printToBuffer(buffy, 0);
+
+
+ return buffy.toString();
}
-
-
+
private void printToBuffer(StringBuffer buffy, int indent) {
- for (int i = 0; i < indent; i++) {
- buffy.append( '\t');
- }
- buffy.append( this.getWord());
-
-
- if ( this.rules.isEmpty()) {
- buffy.append(NEWLINE);
- } else {
- buffy.append( " ==>").append(NEWLINE);
- for ( String successor : this.getSuccessors()) {
- rules.get(successor).printToBuffer(buffy, indent + 1);
- }
- buffy.append(NEWLINE);
- }
- }
+ for (int i = 0; i < indent; i++) {
+ buffy.append('\t');
+ }
+ buffy.append(this.getWord());
- /**
- *
+ if (this.rules.isEmpty()) {
+ buffy.append(NEWLINE);
+ } else {
+ buffy.append(" ==>").append(NEWLINE);
+ for (String successor : this.getSuccessors()) {
+ rules.get(successor).printToBuffer(buffy, indent + 1);
+ }
+ buffy.append(NEWLINE);
+ }
+ }
+
+ /**
+ *
* @return my word.
*/
public String getWord() {
@@ -100,60 +103,60 @@ public class RuleTreeNode {
}
/**
- *
+ *
* @return a shuffled list of the words which could follow this one.
*/
public Collection getSuccessors() {
- ArrayList result = new ArrayList();
+ ArrayList result = new ArrayList<>();
result.addAll(rules.keySet());
Collections.shuffle(result, RANDOM);
return result;
}
-
-
+
/**
* Compile this sequence of tokens into rule nodes under me.
+ *
* @param sequence the sequence of tokens to compile.
*/
public void addSequence(Queue sequence) {
if (!sequence.isEmpty()) {
- String word = sequence.remove();
- RuleTreeNode successor = this.getRule(word);
+ String token = sequence.remove();
+ RuleTreeNode successor = this.getRule(token);
if (successor == null) {
- successor = new RuleTreeNode(word);
- this.rules.put(word, successor);
+ successor = new RuleTreeNode(token);
+ this.rules.put(token, successor);
}
-
+
successor.addSequence(sequence);
}
}
-
- /**
+
+ /**
* Choose a successor at random.
- *
+ *
* @return the successor chosen, or null if I have none.
*/
- protected RuleTreeNode getRule() {
- RuleTreeNode result = null;
+ protected RuleTreeNode getRule() {
+ RuleTreeNode result = null;
- if (!rules.isEmpty()) {
- int target = RANDOM.nextInt(rules.keySet().size());
+ if (!rules.isEmpty()) {
+ int target = RANDOM.nextInt(rules.keySet().size());
- for (String key : rules.keySet()) {
- /*
- * NOTE: decrement after test.
- */
- if (target-- == 0) {
- result = rules.get(key);
- }
- }
- }
+ for (String key : rules.keySet()) {
+ /*
+ * NOTE: decrement after test.
+ */
+ if (target-- == 0) {
+ result = rules.get(key);
+ }
+ }
+ }
- return result;
+ return result;
}
-
+
/**
- *
+ *
* @param token a token to seek.
* @return the successor among my successors which has this token, if any.
*/
@@ -161,44 +164,46 @@ public class RuleTreeNode {
return rules.get(token);
}
- protected String getWord(Stack path) throws NoSuchPathException {
+ protected String getWord(Stack path) {
final String result;
-
- if ( path.isEmpty()) {
+
+ if (path.isEmpty()) {
result = this.getWord();
} else {
final RuleTreeNode successor = this.getRule(path.pop());
-
+
if (successor == null) {
result = null;
} else {
result = successor.getWord(path);
}
}
-
+
return result;
}
/**
- * Find all the terminal strings in the current rule set which would match this path.
+ * Find all the terminal strings in the current rule set which would match
+ * this path.
+ *
* @param path the path to match
* @return a collection (possibly empty) of potential successors.
*/
- public Collection match(WordStack path) {
- final Collection result;
-
- if ( path.isEmpty()) {
+ public Collection match(Window path) {
+ final Collection result;
+
+ if (path.isEmpty()) {
result = this.getSuccessors();
} else {
final RuleTreeNode successor = this.getRule(path.pop());
-
+
if (successor == null) {
- result = new ArrayList();
+ result = new ArrayList<>();
} else {
result = successor.match(path);
}
}
-
- return result;
- }
+
+ return result;
+ }
}
diff --git a/src/cc/journeyman/milkwood/Tokeniser.java b/src/cc/journeyman/milkwood/Tokeniser.java
index 7ce945b..b9b1317 100644
--- a/src/cc/journeyman/milkwood/Tokeniser.java
+++ b/src/cc/journeyman/milkwood/Tokeniser.java
@@ -16,66 +16,69 @@ import java.io.StreamTokenizer;
* A tokeniser which reads tokens in a manner which suits me. Although this
* implementation is based on a StreamTokenizer, the point of separating this
* out into its own class is that if I had more time I could reimplement.
- *
+ *
* @author simon
- *
+ *
*/
public class Tokeniser extends StreamTokenizer {
- public Tokeniser(Reader r) {
- super(r);
+ /**
+ * Initialise me appropriately wrapping this reader.
+ * @param r the reader to wrap.
+ */
+ public Tokeniser(Reader r) {
+ super(r);
- this.resetSyntax();
- this.whitespaceChars(8, 15);
- this.whitespaceChars(28, 32);
- /*
- * treat quotemarks as white space. Actually it would be better if quote
- * marks were white space only if preceded or followed by whitespace, so
- * that, e.g., 'don't' and 'can't' appeared as single tokens. But that
- * means really reimplementing the parser and I don't have time.
- */
- this.whitespaceChars((int) '\"', (int) '\"');
- this.whitespaceChars((int) '\'', (int) '\'');
- /*
- * treat underscore and hyphen as whitespace as well. Again, hyphen with
- * either leading or trailing non-whitespace probably ought to be
- * treated specially, but...
- */
- this.whitespaceChars((int) '_', (int) '_');
- this.whitespaceChars((int) '-', (int) '-');
- this.wordChars((int) '0', (int) '9');
- this.wordChars((int) 'A', (int) 'Z');
- this.wordChars((int) 'a', (int) 'z');
- }
+ this.resetSyntax();
+ this.whitespaceChars(8, 15);
+ this.whitespaceChars(28, 32);
+ /*
+ * treat quotemarks as white space. Actually it would be better if quote
+ * marks were white space only if preceded or followed by whitespace, so
+ * that, e.g., 'don't' and 'can't' appeared as single tokens. But that
+ * means really reimplementing the parser and I don't have time.
+ */
+ this.whitespaceChars((int) '\"', (int) '\"');
+ this.whitespaceChars((int) '\'', (int) '\'');
+ /*
+ * treat underscore and hyphen as whitespace as well. Again, hyphen with
+ * either leading or trailing non-whitespace probably ought to be
+ * treated specially, but...
+ */
+ this.whitespaceChars((int) '_', (int) '_');
+ this.whitespaceChars((int) '-', (int) '-');
+ this.wordChars((int) '0', (int) '9');
+ this.wordChars((int) 'A', (int) 'Z');
+ this.wordChars((int) 'a', (int) 'z');
+ }
- public Tokeniser(InputStream in) {
- this(new BufferedReader(new InputStreamReader(in)));
- }
+ public Tokeniser(InputStream in) {
+ this(new BufferedReader(new InputStreamReader(in)));
+ }
- /**
- * There surely must be a better way to get just the token out of a
- * StreamTokenizer...!
- */
- public String readBareToken() {
- final String token;
-
- switch (this.ttype) {
- case StreamTokenizer.TT_EOL:
- token = "FIXME"; // TODO: fix this!
- break;
- case StreamTokenizer.TT_NUMBER:
- token = new Double(this.nval).toString();
- break;
- case StreamTokenizer.TT_WORD:
- token = this.sval.toLowerCase();
- break;
- default:
- StringBuffer buffy = new StringBuffer();
- buffy.append((char) this.ttype);
- token = buffy.toString();
- break;
- }
- return token;
- }
+ /**
+ * There surely must be a better way to get just the token out of a
+ * StreamTokenizer...!
+ */
+ public String readBareToken() {
+ final String token;
+ switch (this.ttype) {
+ case StreamTokenizer.TT_EOL:
+ token = "FIXME"; // TODO: fix this!
+ break;
+ case StreamTokenizer.TT_NUMBER:
+ token = new Double(this.nval).toString();
+ break;
+ case StreamTokenizer.TT_WORD:
+ token = this.sval.toLowerCase();
+ break;
+ default:
+ StringBuilder bob = new StringBuilder();
+ bob.append((char) this.ttype);
+ token = bob.toString();
+ break;
+ }
+ return token;
+ }
}
diff --git a/src/cc/journeyman/milkwood/TupleDictionary.java b/src/cc/journeyman/milkwood/TupleDictionary.java
deleted file mode 100644
index ae192ab..0000000
--- a/src/cc/journeyman/milkwood/TupleDictionary.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Proprietary unpublished source code property of
- * Simon Brooke .
- *
- * Copyright (c) 2013 Simon Brooke
- */
-package cc.journeyman.milkwood;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-
-/**
- *
- * @author Simon Brooke
- */
-public class TupleDictionary extends HashMap> {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * Specialisation: if there isn't an existing entry, create one.
- *
- * @param token the token to look up
- * @return the collection of possible tuples for that token.
- */
- public Collection get(String token) {
- Collection result = super.get(token);
-
- if (result == null) {
- result = new ArrayList();
- this.put(token, result);
- }
-
- return result;
- }
-
- /**
- * Add a new, empty sequence to my entry for this token.
- * @param token the token
- * @return the new sequence which was added.
- */
- protected WordSequence addSequence(String token) {
- return this.addSequence(token, new WordSequence());
- }
-
- /**
- * Add this sequence to my entry for this token.
- * @param token the token.
- * @param sequence the sequence to add. Must not be null!
- * @return the sequence which was added.
- */
- protected WordSequence addSequence(String token, WordSequence sequence) {
- assert (sequence != null) : "invalid sequence argument";
-
- this.get(token).add(sequence);
-
- return sequence;
- }
-}
diff --git a/src/cc/journeyman/milkwood/Window.java b/src/cc/journeyman/milkwood/Window.java
new file mode 100644
index 0000000..1b50b23
--- /dev/null
+++ b/src/cc/journeyman/milkwood/Window.java
@@ -0,0 +1,56 @@
+package cc.journeyman.milkwood;
+
+import java.util.Stack;
+
+/**
+ * Sliding window which rules may match.
+ *
+ * @author simon
+ *
+ */
+public class Window extends Stack {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Create a new, empty, wordstack.
+ */
+ public Window() {
+ super();
+ }
+
+ /**
+ * create a new window from this window, having this new word as its
+ * terminal and ommitting the current first word. That is, the new window
+ * should be as long as the old, with each word shuffled up one place.
+ *
+ * @param prototype the window to copy from.
+ * @param terminal the new terminal word.
+ */
+ public Window(Window prototype, String terminal) {
+ this();
+
+ Window copy = prototype.duplicate();
+ copy.pop();
+ this.populate(copy, terminal);
+ }
+
+ private void populate(Window copy, String terminal) {
+ if (copy.isEmpty()) {
+ this.push(terminal);
+ } else {
+ String token = copy.pop();
+ this.populate(copy, terminal);
+ this.push(token);
+ }
+ }
+
+ /**
+ * A wrapper round clone which hides all the ugly casting.
+ *
+ * @return a duplicate copy of myself.
+ */
+ public Window duplicate() {
+ return (Window) this.clone();
+ }
+}
diff --git a/src/cc/journeyman/milkwood/WordSequence.java b/src/cc/journeyman/milkwood/WordSequence.java
index 6908848..69d0532 100644
--- a/src/cc/journeyman/milkwood/WordSequence.java
+++ b/src/cc/journeyman/milkwood/WordSequence.java
@@ -12,56 +12,58 @@ import java.util.Queue;
/**
* An ordered sequence of words. Of course it implements Queue since it is a
* LinkedList and LinkedList implements Queue, but I want to make it explicitly
- * clear that this is a queue and can be used as such.
- *
+ * clear that this is a queue and can be used as such. Different from WordStack
+ * which is a Stack.
+ *
+ * @see WordStack
+ *
* @author Simon Brooke
*/
-class WordSequence extends LinkedList implements Queue {
+public class WordSequence extends LinkedList implements Queue {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- /**
- *
- * @param tokens
- * a sequence of tokens
- * @param marker
- * a marker to terminate after the last occurrance of.
- * @return a copy of tokens, truncated at the last occurrance of the marker.
- */
- public WordSequence truncateAtLastInstance(String marker) {
- final WordSequence result = new WordSequence();
+ /**
+ *
+ * @param tokens a sequence of tokens
+ * @param marker a marker to terminate after the last occurrance of.
+ * @return a copy of tokens, truncated at the last occurrance of the marker.
+ */
+ public WordSequence truncateAtLastInstance(String marker) {
+ final WordSequence result = new WordSequence();
- for (String token : this) {
- if (token.endsWith(marker) && !this.contains(marker)) {
- /*
- * If the token we're looking at ends with the marker, and the
- * remainder of the tokens does not include a token ending with
- * the marker, we're done. Otherwise, we continue. OK?
- */
- break;
- }
- result.add(token);
- }
+ for (String token : this) {
+ result.add(token);
+ if (token.endsWith(marker) && !this.contains(marker)) {
+ /*
+ * If the token we're looking at ends with the marker, and the
+ * remainder of the tokens does not include a token ending with
+ * the marker, we're done. Otherwise, we continue. OK?
+ */
+ break;
+ }
+ }
- return result;
- }
+ return result;
+ }
- /**
- * Specialisation: Working around the bug that the tokeniser treats PERIOD as a word character.
- */
- @Override
- public boolean contains(Object target) {
- boolean result = false;
- if (target != null) {
- String marker = target.toString();
+ /**
+ * Specialisation: Working around the bug that the tokeniser treats PERIOD
+ * as a word character.
+ */
+ @Override
+ public boolean contains(Object target) {
+ boolean result = false;
+ if (target != null) {
+ String marker = target.toString();
- for (String token : this) {
- if (token.endsWith(marker)) {
- result = true;
- break;
- }
- }
- }
- return result;
- }
+ for (String token : this) {
+ if (token.endsWith(marker)) {
+ result = true;
+ break;
+ }
+ }
+ }
+ return result;
+ }
}
diff --git a/src/cc/journeyman/milkwood/WordStack.java b/src/cc/journeyman/milkwood/WordStack.java
deleted file mode 100644
index 9760182..0000000
--- a/src/cc/journeyman/milkwood/WordStack.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package cc.journeyman.milkwood;
-
-import java.util.Stack;
-
-/**
- * Sliding window which rules may match.
- *
- * @author simon
- *
- */
-public class WordStack extends Stack {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * Create a new, empty, wordstack.
- */
- public WordStack() {
- super();
- }
-
- /**
- * create a new window from this window, having this new word as its
- * terminal and ommitting the current first word. That is, the new window
- * should be as long as the old, with each word shuffled up one place.
- *
- * @param prototype the window to copy from.
- * @param terminal the new terminal word.
- */
- public WordStack(WordStack prototype, String terminal) {
- this();
-
- WordStack copy = prototype.duplicate();
- copy.pop();
- this.populate( copy, terminal);
- }
-
- private void populate(WordStack copy, String terminal) {
- if ( copy.isEmpty()) {
- this.push(terminal);
- } else {
- String token = copy.pop();
- this.populate(copy, terminal);
- this.push( token);
- }
- }
-
- /**
- * A wrapper round clone which hides all the ugly casting.
- *
- * @return a duplicate copy of myself.
- */
- public WordStack duplicate() {
- return (WordStack) this.clone();
- }
-
-}
diff --git a/src/cc/journeyman/milkwood/Writer.java b/src/cc/journeyman/milkwood/Writer.java
index 527a342..e70e6f8 100644
--- a/src/cc/journeyman/milkwood/Writer.java
+++ b/src/cc/journeyman/milkwood/Writer.java
@@ -16,156 +16,141 @@ import java.util.Random;
/**
* A special purpose writer to write sequences of tokens, chopping them up into
* paragraphs on the fly..
- *
+ *
* @author Simon Brooke
*/
class Writer extends BufferedWriter {
- /**
- * The average number of sentences in a paragraph.
- */
- public static final int AVSENTENCESPERPARA = 5;
- /**
- * A random number generator.
- */
- private static Random RANDOM = new Random();
- /**
- * Dictionary of first-words we know about; each first-word maps onto a
- * tuple of tuples of word sequences beginning with that word, so 'I' might
- * map onto [[I, CAME, COMMA],[I, SAW, COMMA],[I CONQUERED COMMA]].
- */
- TupleDictionary dictionary = new TupleDictionary();
- /**
- * Whether or not I am in debugging mode.
- */
- @SuppressWarnings("unused")
- private final boolean debug;
+ /**
+ * Line separator on this platform.
+ */
+ public static final String NEWLINE = System.getProperty("line.separator");
+ /**
+ * The average number of sentences in a paragraph.
+ */
+ public static final int AVSENTENCESPERPARA = 5;
+ /**
+ * A random number generator.
+ */
+ private static Random RANDOM = new Random();
+ /**
+ * Whether or not I am in debugging mode.
+ */
+ @SuppressWarnings("unused")
+ private final boolean debug;
- /**
- * @param out
- * the output stream to which I shall write.
- * @param debug
- * Whether or not I am in debugging mode.
- */
- public Writer(OutputStream out, final boolean debug) {
- super(new OutputStreamWriter(out));
- this.debug = debug;
- }
+ /**
+ * @param out the output stream to which I shall write.
+ * @param debug Whether or not I am in debugging mode.
+ */
+ public Writer(OutputStream out, final boolean debug) {
+ super(new OutputStreamWriter(out));
+ this.debug = debug;
+ }
- /**
- * Write this sequence of tokens on this stream, sorting out minor issues of
- * orthography.
- *
- * @param tokens
- * the tokens.
- * @throws IOException
- * if it is impossible to write (e.g. file system full).
- */
- public void writeSequence(WordSequence tokens) throws IOException {
- boolean capitaliseNext = true;
+ /**
+ * Write this sequence of tokens on this stream, sorting out minor issues of
+ * orthography.
+ *
+ * @param tokens the tokens.
+ * @throws IOException if it is impossible to write (e.g. file system full).
+ */
+ public void writeSequence(WordSequence tokens) throws IOException {
+ boolean capitaliseNext = true;
- try {
- for (String token : tokens) {
- capitaliseNext = writeToken(capitaliseNext, token);
- }
- } finally {
- this.flush();
- this.close();
- }
- }
+ for (String token : tokens) {
+ capitaliseNext = writeToken(capitaliseNext, token);
+ }
+ this.write(NEWLINE);
+ }
- /**
- * Deal with end of paragraph, capital after full stop, and other minor
- * orthographic conventions.
- *
- * @param capitalise
- * whether or not the token should be capitalised
- * @param token
- * the token to write;
- * @returnvtrue if the next token to be written should be capitalised.
- * @throws IOException
- */
- private boolean writeToken(boolean capitalise, String token)
- throws IOException {
- if (this.spaceBefore(token)) {
- this.write(" ");
- }
- if (capitalise) {
- this.write(token.substring(0, 1).toUpperCase(Locale.getDefault()));
- this.write(token.substring(1));
- } else {
- this.write(token);
- }
+ /**
+ * Deal with end of paragraph, capital after full stop, and other minor
+ * orthographic conventions.
+ *
+ * @param capitalise whether or not the token should be capitalised
+ * @param token the token to write;
+ * @returnvtrue if the next token to be written should be capitalised.
+ * @throws IOException
+ */
+ private boolean writeToken(boolean capitalise, String token)
+ throws IOException {
+ if (this.spaceBefore(token)) {
+ this.write(" ");
+ }
+ if (capitalise) {
+ this.write(token.substring(0, 1).toUpperCase(Locale.getDefault()));
+ this.write(token.substring(1));
+ } else {
+ this.write(token);
+ }
- this.maybeParagraph(token);
+ this.maybeParagraph(token);
- return (token.endsWith(Milkwood.PERIOD));
- }
+ return (token.endsWith(Milkwood.PERIOD));
+ }
- /**
- * Return false if token is punctuation, else true. Wouldn't it be nice if
- * Java provided Character.isPunctuation(char)? However, since it doesn't, I
- * can give this slightly special semantics: return true only if this is
- * punctuation which would not normally be preceded with a space.
- *
- * @param ch
- * a character.
- * @return true if the should be preceded by a space, else false.
- */
- private boolean spaceBefore(String token) {
- final boolean result;
+ /**
+ * Return false if token is punctuation, else true. Wouldn't it be nice if
+ * Java provided Character.isPunctuation(char)? However, since it doesn't, I
+ * can give this slightly special semantics: return true only if this is
+ * punctuation which would not normally be preceded with a space.
+ *
+ * @param ch a character.
+ * @return true if the should be preceded by a space, else false.
+ */
+ private boolean spaceBefore(String token) {
+ final boolean result;
- switch (token.length()) {
- case 0:
- result = false;
- break;
- case 1:
- switch (token.charAt(0)) {
- case '.':
- case ',':
- case ':':
- case ';':
- case 's':
- /*
- * an 's' on its own is probably evidence of a possessive with
- * the apostrophe lost
- */
- case 't':
- /*
- * similar; probably 'doesn't' or 'shouldn't' or other cases of
- * 'not' with an elided 'o'.
- */
- result = false;
- break;
- default:
- result = true;
- break;
- }
- break;
- default:
- result = true;
- }
+ switch (token.length()) {
+ case 0:
+ result = false;
+ break;
+ case 1:
+ switch (token.charAt(0)) {
+ case '.':
+ case ',':
+ case ':':
+ case ';':
+ case 's':
+ /*
+ * an 's' on its own is probably evidence of a possessive with
+ * the apostrophe lost
+ */
+ case 't':
+ /*
+ * similar; probably 'doesn't' or 'shouldn't' or other cases of
+ * 'not' with an elided 'o'.
+ */
+ result = false;
+ break;
+ default:
+ result = true;
+ break;
+ }
+ break;
+ default:
+ result = true;
+ }
- return result;
- }
-
- /**
- * If this token is an end-of-sentence token, then, on one chance in some,
- * have the writer write two new lines. NOTE: The tokeniser is treating
- * PERIOD ('.') as a word character, even though it has not been told to.
- * Token.endsWith( PERIOD) is a hack to get round this problem. TODO:
- * investigate and fix.
- *
- * @param token
- * a token
- * @throws IOException
- * if Mr this has run out of ink
- */
- private void maybeParagraph(String token) throws IOException {
- if (token.endsWith(Milkwood.PERIOD)
- && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
- this.write("\n\n");
- }
- }
+ return result;
+ }
+ /**
+ * If this token is an end-of-sentence token, then, on one chance in some,
+ * have the writer write two new lines. NOTE: The tokeniser is treating
+ * PERIOD ('.') as a word character, even though it has not been told to.
+ * Token.endsWith( PERIOD) is a hack to get round this problem. TODO:
+ * investigate and fix.
+ *
+ * @param token a token
+ * @throws IOException if Mr this has run out of ink
+ */
+ private void maybeParagraph(String token) throws IOException {
+ if (token.endsWith(Milkwood.PERIOD)
+ && RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
+ this.write(NEWLINE);
+ this.write(NEWLINE);
+ }
+ }
}