Merge branch 'master' of ssh://goldsmith.journeyman.cc/srv/git/milkwood

This commit is contained in:
Simon Brooke 2013-10-31 13:57:19 +00:00
commit d91fdcab16
18 changed files with 701 additions and 832 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/build/
/dist/

View file

@ -1,5 +1,7 @@
Trigrams process
http://codekata.pragprog.com/2007/01/kata_fourteen_t.html
Started at: 20131030:12:48 GMT
OK, it's a tokeniser, with a map. The map maps token tuples onto tokens.

View file

@ -54,43 +54,6 @@ is divided into following sections:
<property file="nbproject/project.properties"/>
</target>
<target depends="-pre-init,-init-private,-init-user,-init-project,-init-macrodef-property" name="-do-init">
<j2seproject1:property name="platform.home" value="platforms.${platform.active}.home"/>
<j2seproject1:property name="platform.bootcp" value="platforms.${platform.active}.bootclasspath"/>
<j2seproject1:property name="platform.compiler" value="platforms.${platform.active}.compile"/>
<j2seproject1:property name="platform.javac.tmp" value="platforms.${platform.active}.javac"/>
<condition property="platform.javac" value="${platform.home}/bin/javac">
<equals arg1="${platform.javac.tmp}" arg2="$${platforms.${platform.active}.javac}"/>
</condition>
<property name="platform.javac" value="${platform.javac.tmp}"/>
<j2seproject1:property name="platform.java.tmp" value="platforms.${platform.active}.java"/>
<condition property="platform.java" value="${platform.home}/bin/java">
<equals arg1="${platform.java.tmp}" arg2="$${platforms.${platform.active}.java}"/>
</condition>
<property name="platform.java" value="${platform.java.tmp}"/>
<j2seproject1:property name="platform.javadoc.tmp" value="platforms.${platform.active}.javadoc"/>
<condition property="platform.javadoc" value="${platform.home}/bin/javadoc">
<equals arg1="${platform.javadoc.tmp}" arg2="$${platforms.${platform.active}.javadoc}"/>
</condition>
<property name="platform.javadoc" value="${platform.javadoc.tmp}"/>
<condition property="platform.invalid" value="true">
<or>
<contains string="${platform.javac}" substring="$${platforms."/>
<contains string="${platform.java}" substring="$${platforms."/>
<contains string="${platform.javadoc}" substring="$${platforms."/>
</or>
</condition>
<fail unless="platform.home">Must set platform.home</fail>
<fail unless="platform.bootcp">Must set platform.bootcp</fail>
<fail unless="platform.java">Must set platform.java</fail>
<fail unless="platform.javac">Must set platform.javac</fail>
<fail if="platform.invalid">
The J2SE Platform is not correctly set up.
Your active platform is: ${platform.active}, but the corresponding property "platforms.${platform.active}.home" is not found in the project's properties files.
Either open the project in the IDE and setup the Platform with the same name or add it manually.
For example like this:
ant -Duser.properties.file=&lt;path_to_property_file&gt; jar (where you put the property "platforms.${platform.active}.home" in a .properties file)
or ant -Dplatforms.${platform.active}.home=&lt;path_to_JDK_home&gt; jar (where no properties file is used)
</fail>
<available file="${manifest.file}" property="manifest.available"/>
<condition property="splashscreen.available">
<and>
@ -225,6 +188,15 @@ is divided into following sections:
<condition else="" property="endorsed.classpath.cmd.line.arg" value="-Xbootclasspath/p:'${toString:endorsed.classpath.path}'">
<length length="0" string="${endorsed.classpath}" when="greater"/>
</condition>
<condition else="false" property="jdkBug6558476">
<and>
<matches pattern="1\.[56]" string="${java.specification.version}"/>
<not>
<os family="unix"/>
</not>
</and>
</condition>
<property name="javac.fork" value="${jdkBug6558476}"/>
<property name="jar.index" value="false"/>
<property name="jar.index.metainf" value="${jar.index}"/>
<property name="copylibs.rebase" value="true"/>
@ -293,7 +265,7 @@ is divided into following sections:
<property location="${build.dir}/empty" name="empty.dir"/>
<mkdir dir="${empty.dir}"/>
<mkdir dir="@{apgeneratedsrcdir}"/>
<javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" executable="${platform.javac}" fork="yes" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}">
<javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" fork="${javac.fork}" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}">
<src>
<dirset dir="@{gensrcdir}" erroronmissingdir="false">
<include name="*"/>
@ -332,7 +304,7 @@ is divided into following sections:
<sequential>
<property location="${build.dir}/empty" name="empty.dir"/>
<mkdir dir="${empty.dir}"/>
<javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" executable="${platform.javac}" fork="yes" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}">
<javac debug="@{debug}" deprecation="${javac.deprecation}" destdir="@{destdir}" encoding="${source.encoding}" excludes="@{excludes}" fork="${javac.fork}" includeantruntime="false" includes="@{includes}" source="${javac.source}" sourcepath="@{sourcepath}" srcdir="@{srcdir}" target="${javac.target}" tempdir="${java.io.tmpdir}">
<src>
<dirset dir="@{gensrcdir}" erroronmissingdir="false">
<include name="*"/>
@ -412,7 +384,7 @@ is divided into following sections:
<element name="customize" optional="true"/>
<sequential>
<property name="junit.forkmode" value="perTest"/>
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" jvm="${platform.java}" showoutput="true" tempdir="${build.dir}">
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
<test methods="@{testmethods}" name="@{testincludes}" todir="${build.test.results.dir}"/>
<syspropertyset>
<propertyref prefix="test-sys-prop."/>
@ -435,7 +407,7 @@ is divided into following sections:
<element name="customize" optional="true"/>
<sequential>
<property name="junit.forkmode" value="perTest"/>
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" jvm="${platform.java}" showoutput="true" tempdir="${build.dir}">
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
<batchtest todir="${build.test.results.dir}">
<fileset dir="${test.src.dir}" excludes="@{excludes},${excludes}" includes="@{includes}">
<filename name="@{testincludes}"/>
@ -474,7 +446,7 @@ is divided into following sections:
</fileset>
</union>
<taskdef classname="org.testng.TestNGAntTask" classpath="${run.test.classpath}" name="testng"/>
<testng classfilesetref="test.set" failureProperty="tests.failed" jvm="${platform.java}" methods="${testng.methods.arg}" mode="${testng.mode}" outputdir="${build.test.results.dir}" suitename="milkwood" testname="TestNG tests" workingDir="${work.dir}">
<testng classfilesetref="test.set" failureProperty="tests.failed" methods="${testng.methods.arg}" mode="${testng.mode}" outputdir="${build.test.results.dir}" suitename="milkwood" testname="TestNG tests" workingDir="${work.dir}">
<xmlfileset dir="${build.test.classes.dir}" includes="@{testincludes}"/>
<propertyset>
<propertyref prefix="test-sys-prop."/>
@ -554,7 +526,7 @@ is divided into following sections:
<element name="customize" optional="true"/>
<sequential>
<property name="junit.forkmode" value="perTest"/>
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" jvm="${platform.java}" showoutput="true" tempdir="${build.dir}">
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
<test methods="@{testmethods}" name="@{testincludes}" todir="${build.test.results.dir}"/>
<syspropertyset>
<propertyref prefix="test-sys-prop."/>
@ -579,7 +551,7 @@ is divided into following sections:
<element name="customize" optional="true"/>
<sequential>
<property name="junit.forkmode" value="perTest"/>
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" jvm="${platform.java}" showoutput="true" tempdir="${build.dir}">
<junit dir="${work.dir}" errorproperty="tests.failed" failureproperty="tests.failed" fork="true" forkmode="${junit.forkmode}" showoutput="true" tempdir="${build.dir}">
<batchtest todir="${build.test.results.dir}">
<fileset dir="${test.src.dir}" excludes="@{excludes},${excludes}" includes="@{includes}">
<filename name="@{testincludes}"/>
@ -759,9 +731,6 @@ is divided into following sections:
<classpath>
<path path="@{classpath}"/>
</classpath>
<bootclasspath>
<path path="${platform.bootcp}"/>
</bootclasspath>
</nbjpdastart>
</sequential>
</macrodef>
@ -777,9 +746,7 @@ is divided into following sections:
</macrodef>
</target>
<target name="-init-debug-args">
<exec executable="${platform.java}" outputproperty="version-output">
<arg value="-version"/>
</exec>
<property name="version-output" value="java version &quot;${ant.java.version}"/>
<condition property="have-jdk-older-than-1.4">
<or>
<contains string="${version-output}" substring="java version &quot;1.0"/>
@ -804,7 +771,7 @@ is divided into following sections:
<attribute default="${debug.classpath}" name="classpath"/>
<element name="customize" optional="true"/>
<sequential>
<java classname="@{classname}" dir="${work.dir}" fork="true" jvm="${platform.java}">
<java classname="@{classname}" dir="${work.dir}" fork="true">
<jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
<jvmarg line="${debug-args-line}"/>
<jvmarg value="-Xrunjdwp:transport=${debug-transport},address=${jpda.address}"/>
@ -831,7 +798,7 @@ is divided into following sections:
<attribute default="jvm" name="jvm"/>
<element name="customize" optional="true"/>
<sequential>
<java classname="@{classname}" dir="${work.dir}" fork="true" jvm="${platform.java}">
<java classname="@{classname}" dir="${work.dir}" fork="true">
<jvmarg line="${endorsed.classpath.cmd.line.arg}"/>
<jvmarg value="-Dfile.encoding=${runtime.encoding}"/>
<redirector errorencoding="${runtime.encoding}" inputencoding="${runtime.encoding}" outputencoding="${runtime.encoding}"/>
@ -1018,7 +985,7 @@ is divided into following sections:
<path path="${run.classpath}"/>
<map from="${build.classes.dir.resolved}" to="${dist.jar.resolved}"/>
</pathconvert>
<echo level="info">${platform.java} -cp "${run.classpath.with.dist.jar}" ${main.class}</echo>
<echo level="info">java -cp "${run.classpath.with.dist.jar}" ${main.class}</echo>
</target>
<target depends="init" if="do.archive" name="-do-jar-with-libraries-create-manifest" unless="manifest.available">
<tempfile deleteonexit="true" destdir="${build.dir}" property="tmp.manifest.file"/>
@ -1045,7 +1012,7 @@ is divided into following sections:
<j2seproject3:copylibs manifest="${tmp.manifest.file}"/>
<echo level="info">To run this application from the command line without Ant, try:</echo>
<property location="${dist.jar}" name="dist.jar.resolved"/>
<echo level="info">${platform.java} -jar "${dist.jar.resolved}"</echo>
<echo level="info">java -jar "${dist.jar.resolved}"</echo>
</target>
<target depends="-do-jar-with-libraries-pack" if="do.archive" name="-do-jar-with-libraries-delete-manifest">
<delete>
@ -1236,7 +1203,7 @@ is divided into following sections:
</not>
</and>
</condition>
<javadoc additionalparam="${javadoc.additionalparam}" author="${javadoc.author}" charset="UTF-8" destdir="${dist.javadoc.dir}" docencoding="UTF-8" encoding="${javadoc.encoding.used}" executable="${platform.javadoc}" failonerror="true" noindex="${javadoc.noindex}" nonavbar="${javadoc.nonavbar}" notree="${javadoc.notree}" private="${javadoc.private}" source="${javac.source}" splitindex="${javadoc.splitindex}" use="${javadoc.use}" useexternalfile="true" version="${javadoc.version}" windowtitle="${javadoc.windowtitle}">
<javadoc additionalparam="${javadoc.additionalparam}" author="${javadoc.author}" charset="UTF-8" destdir="${dist.javadoc.dir}" docencoding="UTF-8" encoding="${javadoc.encoding.used}" failonerror="true" noindex="${javadoc.noindex}" nonavbar="${javadoc.nonavbar}" notree="${javadoc.notree}" private="${javadoc.private}" source="${javac.source}" splitindex="${javadoc.splitindex}" use="${javadoc.use}" useexternalfile="true" version="${javadoc.version}" windowtitle="${javadoc.windowtitle}">
<classpath>
<path path="${javac.classpath}"/>
</classpath>

View file

@ -1,8 +1,8 @@
build.xml.data.CRC32=d35b316e
build.xml.data.CRC32=31018a52
build.xml.script.CRC32=cd5c02b3
build.xml.stylesheet.CRC32=28e38971@1.56.1.46
# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
nbproject/build-impl.xml.data.CRC32=d35b316e
nbproject/build-impl.xml.script.CRC32=0441a68e
nbproject/build-impl.xml.data.CRC32=31018a52
nbproject/build-impl.xml.script.CRC32=fe6e4d15
nbproject/build-impl.xml.stylesheet.CRC32=c6d2a60f@1.56.1.46

View file

@ -1,2 +1,6 @@
compile.on.save=true
do.depend=false
do.jar=true
javac.debug=true
javadoc.preview=true
user.properties.file=/home/simon/.jmonkeyplatform/3.0/build.properties

View file

@ -1,9 +1,10 @@
annotation.processing.enabled=true
annotation.processing.enabled.in.editor=false
annotation.processing.processor.options=
annotation.processing.processors.list=
annotation.processing.run.all.processors=true
annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
application.title=milkwood
application.vendor=simon
build.classes.dir=${build.dir}/classes
build.classes.excludes=**/*.java,**/*.form
# This directory is removed when the project is cleaned:
@ -24,6 +25,7 @@ debug.test.classpath=\
dist.dir=dist
dist.jar=${dist.dir}/milkwood.jar
dist.javadoc.dir=${dist.dir}/javadoc
endorsed.classpath=
excludes=
includes=**
jar.compress=false
@ -33,8 +35,8 @@ javac.compilerargs=
javac.deprecation=false
javac.processorpath=\
${javac.classpath}
javac.source=1.6
javac.target=1.6
javac.source=1.7
javac.target=1.7
javac.test.classpath=\
${javac.classpath}:\
${build.classes.dir}
@ -55,7 +57,8 @@ main.class=cc.journeyman.milkwood.Milkwood
manifest.file=manifest.mf
meta.inf.dir=${src.dir}/META-INF
mkdist.disabled=false
platform.active=JDK_1.6
obfuscate.options=-keep public class * extends com.jme3.app.Application{public *;}\n-keep public class * extends com.jme3.system.JmeSystemDelegate{public *;}\n-keep public class * implements com.jme3.renderer.Renderer{public *;}\n-keep public class * implements com.jme3.asset.AssetLoader{public *;}\n-keep public class * implements com.jme3.asset.AssetLocator{public *;}\n-keep public class * implements de.lessvoid.nifty.screen.ScreenController{public *;}\n-dontwarn\n-dontnote\n
platform.active=default_platform
platforms.JDK_1.6.home=/usr/lib/jvm/java-6-openjdk-amd64/
run.classpath=\
${javac.classpath}:\

View file

@ -4,7 +4,6 @@
<configuration>
<data xmlns="http://www.netbeans.org/ns/j2se-project/3">
<name>milkwood</name>
<explicit-platform explicit-source-supported="true"/>
<source-roots>
<root id="src.dir"/>
</source-roots>

View file

@ -1,139 +1,134 @@
package cc.journeyman.milkwood;
import java.util.Collection;
import java.util.Collections;
/**
* Composes text output based on a rule tree.
*
*
* @author simon
*
*
*/
public class Composer {
/**
* Whether or not I am in debugging mode.
*/
private final boolean debug;
/**
*
* @param debug
* Whether or not I am in debugging mode.
*/
public Composer(boolean debug) {
this.debug = debug;
}
/**
* Whether or not I am in debugging mode.
*/
private final boolean debug;
/**
* Recursive, backtracking, output generator.
*
* @param rules the rule set we're working to.
* @param length the number of tokens still to be output.
* @return if a successful path forward is found, that path, else null.
*/
public WordSequence compose(RuleTreeNode rules, int length) {
WordStack preamble = composePreamble(rules);
WordSequence result = new WordSequence();
/**
*
* @param debug Whether or not I am in debugging mode.
*/
public Composer(boolean debug) {
this.debug = debug;
}
// composing the preamble will have ended with *ROOT* on top of the
// stack;
// get rid of it.
preamble.pop();
if (debug) {
System.err.println( "Preamble: " + preamble);
}
/**
* Recursive, backtracking, output generator.
*
* @param rules the rule set we're working to.
* @param length the number of tokens still to be output.
* @return if a successful path forward is found, that path, else null.
*/
protected WordSequence compose(RuleTreeNode rules, int length) {
Window preamble = composePreamble(rules);
WordSequence result = new WordSequence();
result.addAll(preamble);
WordStack body = this.compose(preamble, rules, length);
Collections.reverse(body);
result.addAll(body);
return result;
}
// composing the preamble will have ended with *ROOT* on top of the
// stack;
// get rid of it.
preamble.pop();
/**
* Recursively attempt to find sequences in the ruleset to append to what's
* been composed so far.
*
* @param glanceBack the last few words output.
* @param rules the rule set we're working to.
* @param length the number of tokens still to be output.
* @return if a successful path forward is found, that path, else null.
*/
private WordStack compose(WordStack glanceBack, RuleTreeNode rules,
int length) {
final WordStack result;
if ( debug) {
System.err.println( String.format( "%d: %s", length, glanceBack));
}
if (debug) {
System.err.println("Preamble: " + preamble);
}
/* are we there yet? */
if (length == 0) {
result = new WordStack();
} else {
/*
* are there any rules in this ruleset which matches the current
* sliding window? if so, then recurse; if not, then fail.
*/
Collection<String> words = rules.match(glanceBack.duplicate());
result.addAll(preamble);
if (words.isEmpty()) {
/* backtrack */
result = null;
} else {
result = tryOptions(words, glanceBack, rules, length);
}
}
return result;
}
/**
* Try each of these candidates in turn, attempting to recurse.
* @param candidates words which could potentially be added to the output.
* @param glanceBack the last few words output.
* @param allRules the rule set we're working to.
* @param length the number of tokens still to be output.
* @return if a successful path forward is found, that path, else null.
*/
private WordStack tryOptions(Collection<String> candidates,
WordStack glanceBack, RuleTreeNode allRules, int length) {
WordStack result = null;
for ( String candidate : candidates) {
result = compose( new WordStack(glanceBack, candidate), allRules, length - 1);
if ( result != null) {
/* by Jove, I think she's got it! */
result.push(candidate);
break;
}
}
return result;
}
result.addAll(this.compose(preamble, rules, length));
return result;
}
/**
* Random walk of the rule tree to extract (from the root) a legal sequence
* of words the length of our tuple.
*
* @param rules
* the rule tree (fragment) to walk.
* @return a sequence of words.
*/
private WordStack composePreamble(RuleTreeNode rules) {
final WordStack result;
final RuleTreeNode successor = rules.getRule();
/**
* Recursively attempt to find sequences in the ruleset to append to what's
* been composed so far.
*
* @param glanceBack the last few words output.
* @param rules the rule set we're working to.
* @param length the number of tokens still to be output.
* @return if a successful path forward is found, that path, else null.
*/
private WordSequence compose(Window glanceBack, RuleTreeNode rules,
int length) {
final WordSequence result;
if (successor == null) {
result = new WordStack();
} else {
result = this.composePreamble(successor);
result.push(rules.getWord());
}
return result;
}
if (debug) {
System.err.println(String.format("%d: %s", length, glanceBack));
}
/* are we there yet? */
if (length == 0) {
result = new WordSequence();
} else {
/*
* are there any rules in this ruleset which matches the current
* sliding window? if so, then recurse; if not, then fail.
*/
Collection<String> words = rules.match(glanceBack.duplicate());
if (words.isEmpty()) {
/* backtrack */
result = null;
} else {
result = tryOptions(words, glanceBack, rules, length);
}
}
return result;
}
/**
* Try each of these candidates in turn, attempting to recurse.
*
* @param candidates words which could potentially be added to the output.
* @param glanceBack the last few words output.
* @param allRules the rule set we're working to.
* @param length the number of tokens still to be output.
* @return if a successful path forward is found, that path, else null.
*/
private WordSequence tryOptions(Collection<String> candidates,
Window glanceBack, RuleTreeNode allRules, int length) {
WordSequence result = null;
for (String candidate : candidates) {
result = compose(new Window(glanceBack, candidate), allRules, length - 1);
if (result != null) {
/* by Jove, I think she's got it! */
result.push(candidate);
break;
}
}
return result;
}
/**
* Random walk of the rule tree to extract (from the root) a legal sequence
* of words the length of our tuple.
*
* @param rules the rule tree (fragment) to walk.
* @return a sequence of words.
*/
private Window composePreamble(RuleTreeNode rules) {
final Window result;
final RuleTreeNode successor = rules.getRule();
if (successor == null) {
result = new Window();
} else {
result = this.composePreamble(successor);
result.push(rules.getWord());
}
return result;
}
}

View file

@ -15,46 +15,44 @@ import java.util.Queue;
/**
* Read an input stream of text and digest it into a set of generation rules.
* Separated out of TextGenerator mainly to declutter tht class.
*
*
* @author simon
*
*
*/
public class Digester {
/**
* Read tokens from the input stream, and compile them into the rule tree
* below this root.
*
* @param in
* the input stream from which I read.
* @param tupleLength
* the length of the tuples I read.
* @param root
* the ruleset to which I shall add.
* @return the number of tokens read.
* @throws IOException if can't read from file system.
*/
protected int read(final InputStream in, final int tupleLength,
final RuleTreeNode root) throws IOException {
int result = 0;
final Queue<WordSequence> openTuples = new LinkedList<WordSequence>();
final Tokeniser tok = new Tokeniser(in);
for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok
.nextToken()) {
result++;
final WordSequence newTuple = new WordSequence();
String token = tok.readBareToken();
/**
* Read tokens from the input stream, and compile them into the rule tree
* below this root.
*
* @param in the input stream from which I read.
* @param tupleLength the length of the tuples I read.
* @param root the ruleset to which I shall add.
* @return the number of tokens read.
* @throws IOException if can't read from file system.
*/
protected int digest(final InputStream in, final int tupleLength,
final RuleTreeNode root) throws IOException {
int result = 0;
final Queue<WordSequence> openTuples = new LinkedList<>();
final Tokeniser tok = new Tokeniser(in);
openTuples.add(newTuple);
for (WordSequence tuple : openTuples) {
tuple.add(token);
}
for (int type = tok.nextToken(); type != StreamTokenizer.TT_EOF; type = tok
.nextToken()) {
result++;
final WordSequence newTuple = new WordSequence();
String token = tok.readBareToken();
if (openTuples.size() > tupleLength) {
root.addSequence(openTuples.remove());
}
}
openTuples.add(newTuple);
for (WordSequence tuple : openTuples) {
tuple.add(token);
}
return result;
}
if (openTuples.size() > tupleLength) {
root.addSequence(openTuples.remove());
}
}
return result;
}
}

View file

@ -1,3 +1,9 @@
/*
* Proprietary unpublished source code property of
* Simon Brooke <simon@journeyman.cc>.
*
* Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
*/
package cc.journeyman.milkwood;
import java.io.File;
@ -8,182 +14,158 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/*
* Proprietary unpublished source code property of
* Simon Brooke <simon@journeyman.cc>.
*
* Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
*/
/**
*
* Text mangler based on
* http://codekata.pragprog.com/2007/01/kata_fourteen_t.html
*
* @author Simon Brooke <simon@journeyman.cc>
*/
public class Milkwood {
/**
* The magic token which is deemed to end sentences.
*/
public static final String PERIOD = ".";
/**
* Parse command line arguments and kick off the process. Expected arguments
* include:
* <dl>
* <dt>-d, -debug</dt>
* <dd>Print debugging output to standard error</dd>
* <dt>-i [FILE], -input [FILE]</dt>
* <dd>Input file, expected to be an English (or, frankly, other natural
* language) text. Defaults to standard in.</dd>
* <dt>-l [NN], -length [NN]</dt>
* <dd>The length in tuples of the desired output. Defaults to 100.
* <dt>-n [NN], -tuple-length [NN]</dt>
* <dd>The length of tuples into which the file will be analysed, default 2.
* </dd>
* <dt>-o [FILE], -output [FILE]</dt>
* <dd>Output file, to which generated text will be written. Defaults to
* standard out.</dd>
* </dl>
*
* @param args
* the command line arguments
* @exception FileNotFoundException
* if the user specifies a file which isn't available.
* @excpetion IOException if could not read from input or write to output.
*/
public static void main(String[] args) throws FileNotFoundException,
IOException {
/* defaults */
InputStream in = System.in;
OutputStream out = System.out;
int tupleLength = 2;
boolean debug = false;
int length = 100;
/**
* The magic token which is deemed to end sentences.
*/
public static final String PERIOD = ".";
for (int cursor = 0; cursor < args.length; cursor++) {
String arg = args[cursor];
/**
* Parse command line arguments and kick off the process. Expected arguments
* include:
* <dl>
* <dt>-d, -debug</dt>
* <dd>Print debugging output to standard error</dd>
* <dt>-i [FILE], -input [FILE]</dt>
* <dd>Input file, expected to be an English (or, frankly, other natural
* language) text. Defaults to standard in.</dd>
* <dt>-l [NN], -length [NN]</dt>
* <dd>The length in tuples of the desired output. Defaults to 100.
* <dt>-n [NN], -tuple-length [NN]</dt>
* <dd>The length of tuples into which the file will be analysed, default 2.
* </dd>
* <dt>-o [FILE], -output [FILE]</dt>
* <dd>Output file, to which generated text will be written. Defaults to
* standard out.</dd>
* </dl>
*
* @param args the command line arguments
* @exception FileNotFoundException if the user specifies a file which isn't
* available.
* @excpetion IOException if could not read from input or write to output.
*/
public static void main(String[] args) throws FileNotFoundException,
IOException {
/* defaults */
InputStream in = System.in;
OutputStream out = System.out;
int tupleLength = 2;
boolean debug = false;
int length = 100;
if (arg.startsWith("-") && arg.length() > 1) {
switch (arg.charAt(1)) {
case 'd':
debug = true;
break;
case 'i':
// input
in = new FileInputStream(new File(args[++cursor]));
break;
case 'o': // output
out = new FileOutputStream(new File(args[++cursor]));
break;
case 'l': // length
length = Integer.parseInt(args[++cursor]);
break;
case 'n':
case 't': // tuple length
tupleLength = Integer.parseInt(args[++cursor]);
break;
default:
throw new IllegalArgumentException(String.format(
"Unrecognised argument '%s'", arg));
}
}
}
try {
new Milkwood().readAndGenerate(in, out, tupleLength, length, debug);
} finally {
out.close();
}
}
for (int cursor = 0; cursor < args.length; cursor++) {
String arg = args[cursor];
/**
* Read tokens from this input and use them to generate text on this output.
*
* @param in
* the input stream to read.
* @param out
* the output stream to write to.
* @param tupleLength
* the length of tuples to be used in generation.
* @param length
* the length in tokens of the output to be generated.
* @param debug
* whether to print debugging output.
* @throws IOException
* if the file system buggers up, which is not, in the cosmic
* scheme of things, very likely.
*/
void readAndGenerate(final InputStream in, final OutputStream out,
final int tupleLength, int length, boolean debug)
throws IOException {
/* The root of the rule tree I shall build. */
RuleTreeNode root = new RuleTreeNode();
read(in, tupleLength, debug, root);
if (arg.startsWith("-") && arg.length() > 1) {
switch (arg.charAt(1)) {
case 'd':
debug = true;
break;
case 'i':
// input
in = new FileInputStream(new File(args[++cursor]));
break;
case 'o': // output
out = new FileOutputStream(new File(args[++cursor]));
break;
case 'l': // length
length = Integer.parseInt(args[++cursor]);
break;
case 'n':
case 't': // tuple length
tupleLength = Integer.parseInt(args[++cursor]);
break;
default:
throw new IllegalArgumentException(String.format(
"Unrecognised argument '%s'", arg));
}
}
}
try {
new Milkwood().readAndGenerate(in, out, tupleLength, length, debug);
} finally {
out.close();
}
}
WordSequence tokens = compose(tupleLength, debug, root, length);
/**
* Read tokens from this input and use them to generate text on this output.
*
* @param in the input stream to read.
* @param out the output stream to write to.
* @param tupleLength the length of tuples to be used in generation.
* @param length the length in tokens of the output to be generated.
* @param debug whether to print debugging output.
* @throws IOException if the file system buggers up, which is not, in the
* cosmic scheme of things, very likely.
*/
void readAndGenerate(final InputStream in, final OutputStream out,
final int tupleLength, int length, boolean debug)
throws IOException {
/* The root of the rule tree I shall build. */
RuleTreeNode root = new RuleTreeNode();
read(in, tupleLength, debug, root);
write(out, debug, tokens);
if ( debug) {
System.err.println( "\n\nCompleted.");
}
}
WordSequence tokens = compose(tupleLength, debug, root, length);
/**
* Digest the input into a set of rules.
*
* @param in
* the input stream.
* @param tupleLength
* the length of tuples we shall consider.
* @param debug
* whether or not to print debugging output.
* @param root
* the root of the rule tree.
* @return the number of tokens read.
* @throws IOException
* if the file system buggers up, which is not, in the cosmic
* scheme of things, very likely.
*/
private int read(final InputStream in, final int tupleLength,
boolean debug, RuleTreeNode root) throws IOException {
int length = new Digester().read(in, tupleLength, root);
write(out, debug, tokens);
if (debug) {
System.err.println(root.toString());
}
return length;
}
if (debug) {
System.err.println("\n\nCompleted.");
}
}
private WordSequence compose(final int tupleLength, boolean debug,
RuleTreeNode root, int length) {
WordSequence tokens = new Composer(debug).compose(root, length);
/**
* Digest the input into a set of rules.
*
* @param in the input stream.
* @param tupleLength the length of tuples we shall consider.
* @param debug whether or not to print debugging output.
* @param root the root of the rule tree.
* @return the number of tokens read.
* @throws IOException if the file system buggers up, which is not, in the
* cosmic scheme of things, very likely.
*/
private int read(final InputStream in, final int tupleLength,
boolean debug, RuleTreeNode root) throws IOException {
int length = new Digester().digest(in, tupleLength, root);
if (tokens.contains(PERIOD)) {
tokens = tokens.truncateAtLastInstance(PERIOD);
}
return tokens;
}
if (debug) {
System.err.println(root.toString());
}
return length;
}
/**
* Write this sequence of tokens to this output.
*
* @param out
* the stream to which to write.
* @param debug
* whether or not to print debugging output.
* @param tokens
* the sequence of tokens to write.
* @throws IOException
* if the file system buggers up, which is not, in the cosmic
* scheme of things, very likely.
*/
private void write(final OutputStream out, boolean debug,
WordSequence tokens) throws IOException {
Writer scrivenor = new Writer(out, debug);
try {
scrivenor.writeSequence(tokens);
} finally {
scrivenor.close();
}
}
private WordSequence compose(final int tupleLength, boolean debug,
RuleTreeNode root, int length) {
WordSequence tokens = new Composer(debug).compose(root, length);
if (tokens.contains(PERIOD)) {
tokens = tokens.truncateAtLastInstance(PERIOD);
}
return tokens;
}
/**
* Write this sequence of tokens to this output.
*
* @param out the stream to which to write.
* @param debug whether or not to print debugging output.
* @param tokens the sequence of tokens to write.
* @throws IOException if the file system buggers up, which is not, in the
* cosmic scheme of things, very likely.
*/
private void write(final OutputStream out, boolean debug,
WordSequence tokens) throws IOException {
try (Writer scrivenor = new Writer(out, debug)) {
scrivenor.writeSequence(tokens);
}
}
}

View file

@ -1,17 +0,0 @@
/*
* Proprietary unpublished source code property of
* Simon Brooke <simon@journeyman.cc>.
*
* Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
*/
package cc.journeyman.milkwood;
/**
*
* @author Simon Brooke <simon@journeyman.cc>
*/
class NoSuchPathException extends Exception {
private static final long serialVersionUID = 1L;
}

View file

@ -16,17 +16,18 @@ import java.util.Random;
import java.util.Stack;
/**
* Mapping a word to its successor words. This is probably highly
* inefficient of store, but for the present purposes my withers are unwrung.
* Not thread safe in this form because of access to the random number generator.
*
* Mapping a word to its successor words. This is probably highly inefficient of
* store, but for the present purposes my withers are unwrung. Not thread safe
* in this form because of access to the random number generator.
*
* @author Simon Brooke <simon@journeyman.cc>
*/
public class RuleTreeNode {
/**
* The magic token which identifies the root node of a rule tree.
*/
public static final String ROOTMAGICTOKEN = "*ROOT*";
/**
* The magic token which identifies the root node of a rule tree.
*/
public static final String ROOTMAGICTOKEN = "*ROOT*";
/**
* The line separator on this platform.
*/
@ -35,64 +36,66 @@ public class RuleTreeNode {
* A random number generator.
*/
private static Random RANDOM = new Random();
/**
* The word at this node.
*/
private final String word;
/**
* Potential successors of this node
*/
private Map<String,RuleTreeNode> rules = new HashMap<String,RuleTreeNode>();
private Map<String, RuleTreeNode> rules = new HashMap<>();
/**
* If no argument passed, generate a root node.
*/
public RuleTreeNode() {
this( RuleTreeNode.ROOTMAGICTOKEN);
this(RuleTreeNode.ROOTMAGICTOKEN);
}
/**
* Create me wrapping this word.
*
* @param word the word I represent.
*/
public RuleTreeNode(String word) {
this.word = word;
}
/**
* Specialisation: neatly format the rule tree.
*
* @return a neatly formatted representation.
*/
@Override
public String toString() {
StringBuffer buffy = new StringBuffer();
this.printToBuffer( buffy, 0);
return buffy.toString();
StringBuffer buffy = new StringBuffer();
this.printToBuffer(buffy, 0);
return buffy.toString();
}
private void printToBuffer(StringBuffer buffy, int indent) {
for (int i = 0; i < indent; i++) {
buffy.append( '\t');
}
buffy.append( this.getWord());
if ( this.rules.isEmpty()) {
buffy.append(NEWLINE);
} else {
buffy.append( " ==>").append(NEWLINE);
for ( String successor : this.getSuccessors()) {
rules.get(successor).printToBuffer(buffy, indent + 1);
}
buffy.append(NEWLINE);
}
}
for (int i = 0; i < indent; i++) {
buffy.append('\t');
}
buffy.append(this.getWord());
/**
*
if (this.rules.isEmpty()) {
buffy.append(NEWLINE);
} else {
buffy.append(" ==>").append(NEWLINE);
for (String successor : this.getSuccessors()) {
rules.get(successor).printToBuffer(buffy, indent + 1);
}
buffy.append(NEWLINE);
}
}
/**
*
* @return my word.
*/
public String getWord() {
@ -100,60 +103,60 @@ public class RuleTreeNode {
}
/**
*
*
* @return a shuffled list of the words which could follow this one.
*/
public Collection<String> getSuccessors() {
ArrayList<String> result = new ArrayList<String>();
ArrayList<String> result = new ArrayList<>();
result.addAll(rules.keySet());
Collections.shuffle(result, RANDOM);
return result;
}
/**
* Compile this sequence of tokens into rule nodes under me.
*
* @param sequence the sequence of tokens to compile.
*/
public void addSequence(Queue<String> sequence) {
if (!sequence.isEmpty()) {
String word = sequence.remove();
RuleTreeNode successor = this.getRule(word);
String token = sequence.remove();
RuleTreeNode successor = this.getRule(token);
if (successor == null) {
successor = new RuleTreeNode(word);
this.rules.put(word, successor);
successor = new RuleTreeNode(token);
this.rules.put(token, successor);
}
successor.addSequence(sequence);
}
}
/**
/**
* Choose a successor at random.
*
*
* @return the successor chosen, or null if I have none.
*/
protected RuleTreeNode getRule() {
RuleTreeNode result = null;
protected RuleTreeNode getRule() {
RuleTreeNode result = null;
if (!rules.isEmpty()) {
int target = RANDOM.nextInt(rules.keySet().size());
if (!rules.isEmpty()) {
int target = RANDOM.nextInt(rules.keySet().size());
for (String key : rules.keySet()) {
/*
* NOTE: decrement after test.
*/
if (target-- == 0) {
result = rules.get(key);
}
}
}
for (String key : rules.keySet()) {
/*
* NOTE: decrement after test.
*/
if (target-- == 0) {
result = rules.get(key);
}
}
}
return result;
return result;
}
/**
*
*
* @param token a token to seek.
* @return the successor among my successors which has this token, if any.
*/
@ -161,44 +164,46 @@ public class RuleTreeNode {
return rules.get(token);
}
protected String getWord(Stack<String> path) throws NoSuchPathException {
protected String getWord(Stack<String> path) {
final String result;
if ( path.isEmpty()) {
if (path.isEmpty()) {
result = this.getWord();
} else {
final RuleTreeNode successor = this.getRule(path.pop());
if (successor == null) {
result = null;
} else {
result = successor.getWord(path);
}
}
return result;
}
/**
* Find all the terminal strings in the current rule set which would match this path.
* Find all the terminal strings in the current rule set which would match
* this path.
*
* @param path the path to match
* @return a collection (possibly empty) of potential successors.
*/
public Collection<String> match(WordStack path) {
final Collection<String> result;
if ( path.isEmpty()) {
public Collection<String> match(Window path) {
final Collection<String> result;
if (path.isEmpty()) {
result = this.getSuccessors();
} else {
final RuleTreeNode successor = this.getRule(path.pop());
if (successor == null) {
result = new ArrayList<String>();
result = new ArrayList<>();
} else {
result = successor.match(path);
}
}
return result;
}
return result;
}
}

View file

@ -16,66 +16,69 @@ import java.io.StreamTokenizer;
* A tokeniser which reads tokens in a manner which suits me. Although this
* implementation is based on a StreamTokenizer, the point of separating this
* out into its own class is that if I had more time I could reimplement.
*
*
* @author simon
*
*
*/
public class Tokeniser extends StreamTokenizer {
public Tokeniser(Reader r) {
super(r);
/**
* Initialise me appropriately wrapping this reader.
* @param r the reader to wrap.
*/
public Tokeniser(Reader r) {
super(r);
this.resetSyntax();
this.whitespaceChars(8, 15);
this.whitespaceChars(28, 32);
/*
* treat quotemarks as white space. Actually it would be better if quote
* marks were white space only if preceded or followed by whitespace, so
* that, e.g., 'don't' and 'can't' appeared as single tokens. But that
* means really reimplementing the parser and I don't have time.
*/
this.whitespaceChars((int) '\"', (int) '\"');
this.whitespaceChars((int) '\'', (int) '\'');
/*
* treat underscore and hyphen as whitespace as well. Again, hyphen with
* either leading or trailing non-whitespace probably ought to be
* treated specially, but...
*/
this.whitespaceChars((int) '_', (int) '_');
this.whitespaceChars((int) '-', (int) '-');
this.wordChars((int) '0', (int) '9');
this.wordChars((int) 'A', (int) 'Z');
this.wordChars((int) 'a', (int) 'z');
}
this.resetSyntax();
this.whitespaceChars(8, 15);
this.whitespaceChars(28, 32);
/*
* treat quotemarks as white space. Actually it would be better if quote
* marks were white space only if preceded or followed by whitespace, so
* that, e.g., 'don't' and 'can't' appeared as single tokens. But that
* means really reimplementing the parser and I don't have time.
*/
this.whitespaceChars((int) '\"', (int) '\"');
this.whitespaceChars((int) '\'', (int) '\'');
/*
* treat underscore and hyphen as whitespace as well. Again, hyphen with
* either leading or trailing non-whitespace probably ought to be
* treated specially, but...
*/
this.whitespaceChars((int) '_', (int) '_');
this.whitespaceChars((int) '-', (int) '-');
this.wordChars((int) '0', (int) '9');
this.wordChars((int) 'A', (int) 'Z');
this.wordChars((int) 'a', (int) 'z');
}
public Tokeniser(InputStream in) {
this(new BufferedReader(new InputStreamReader(in)));
}
public Tokeniser(InputStream in) {
this(new BufferedReader(new InputStreamReader(in)));
}
/**
* There surely must be a better way to get just the token out of a
* StreamTokenizer...!
*/
public String readBareToken() {
final String token;
switch (this.ttype) {
case StreamTokenizer.TT_EOL:
token = "FIXME"; // TODO: fix this!
break;
case StreamTokenizer.TT_NUMBER:
token = new Double(this.nval).toString();
break;
case StreamTokenizer.TT_WORD:
token = this.sval.toLowerCase();
break;
default:
StringBuffer buffy = new StringBuffer();
buffy.append((char) this.ttype);
token = buffy.toString();
break;
}
return token;
}
/**
* There surely must be a better way to get just the token out of a
* StreamTokenizer...!
*/
public String readBareToken() {
final String token;
switch (this.ttype) {
case StreamTokenizer.TT_EOL:
token = "FIXME"; // TODO: fix this!
break;
case StreamTokenizer.TT_NUMBER:
token = new Double(this.nval).toString();
break;
case StreamTokenizer.TT_WORD:
token = this.sval.toLowerCase();
break;
default:
StringBuilder bob = new StringBuilder();
bob.append((char) this.ttype);
token = bob.toString();
break;
}
return token;
}
}

View file

@ -1,60 +0,0 @@
/*
* Proprietary unpublished source code property of
* Simon Brooke <simon@journeyman.cc>.
*
* Copyright (c) 2013 Simon Brooke <simon@journeyman.cc>
*/
package cc.journeyman.milkwood;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
/**
*
* @author Simon Brooke <simon@journeyman.cc>
*/
public class TupleDictionary extends HashMap<String, Collection<WordSequence>> {
private static final long serialVersionUID = 1L;
/**
* Specialisation: if there isn't an existing entry, create one.
*
* @param token the token to look up
* @return the collection of possible tuples for that token.
*/
public Collection<WordSequence> get(String token) {
Collection<WordSequence> result = super.get(token);
if (result == null) {
result = new ArrayList<WordSequence>();
this.put(token, result);
}
return result;
}
/**
* Add a new, empty sequence to my entry for this token.
* @param token the token
* @return the new sequence which was added.
*/
protected WordSequence addSequence(String token) {
return this.addSequence(token, new WordSequence());
}
/**
* Add this sequence to my entry for this token.
* @param token the token.
* @param sequence the sequence to add. Must not be null!
* @return the sequence which was added.
*/
protected WordSequence addSequence(String token, WordSequence sequence) {
assert (sequence != null) : "invalid sequence argument";
this.get(token).add(sequence);
return sequence;
}
}

View file

@ -0,0 +1,56 @@
package cc.journeyman.milkwood;
import java.util.Stack;
/**
* Sliding window which rules may match.
*
* @author simon
*
*/
public class Window extends Stack<String> {
private static final long serialVersionUID = 1L;
/**
* Create a new, empty, wordstack.
*/
public Window() {
super();
}
/**
* create a new window from this window, having this new word as its
* terminal and ommitting the current first word. That is, the new window
* should be as long as the old, with each word shuffled up one place.
*
* @param prototype the window to copy from.
* @param terminal the new terminal word.
*/
public Window(Window prototype, String terminal) {
this();
Window copy = prototype.duplicate();
copy.pop();
this.populate(copy, terminal);
}
private void populate(Window copy, String terminal) {
if (copy.isEmpty()) {
this.push(terminal);
} else {
String token = copy.pop();
this.populate(copy, terminal);
this.push(token);
}
}
/**
* A wrapper round clone which hides all the ugly casting.
*
* @return a duplicate copy of myself.
*/
public Window duplicate() {
return (Window) this.clone();
}
}

View file

@ -12,56 +12,58 @@ import java.util.Queue;
/**
* An ordered sequence of words. Of course it implements Queue since it is a
* LinkedList and LinkedList implements Queue, but I want to make it explicitly
* clear that this is a queue and can be used as such.
*
* clear that this is a queue and can be used as such. Different from WordStack
* which is a Stack.
*
* @see WordStack
*
* @author Simon Brooke <simon@journeyman.cc>
*/
class WordSequence extends LinkedList<String> implements Queue<String> {
public class WordSequence extends LinkedList<String> implements Queue<String> {
private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 1L;
/**
*
* @param tokens
* a sequence of tokens
* @param marker
* a marker to terminate after the last occurrance of.
* @return a copy of tokens, truncated at the last occurrance of the marker.
*/
public WordSequence truncateAtLastInstance(String marker) {
final WordSequence result = new WordSequence();
/**
*
* @param tokens a sequence of tokens
* @param marker a marker to terminate after the last occurrance of.
* @return a copy of tokens, truncated at the last occurrance of the marker.
*/
public WordSequence truncateAtLastInstance(String marker) {
final WordSequence result = new WordSequence();
for (String token : this) {
if (token.endsWith(marker) && !this.contains(marker)) {
/*
* If the token we're looking at ends with the marker, and the
* remainder of the tokens does not include a token ending with
* the marker, we're done. Otherwise, we continue. OK?
*/
break;
}
result.add(token);
}
for (String token : this) {
result.add(token);
if (token.endsWith(marker) && !this.contains(marker)) {
/*
* If the token we're looking at ends with the marker, and the
* remainder of the tokens does not include a token ending with
* the marker, we're done. Otherwise, we continue. OK?
*/
break;
}
}
return result;
}
return result;
}
/**
* Specialisation: Working around the bug that the tokeniser treats PERIOD as a word character.
*/
@Override
public boolean contains(Object target) {
boolean result = false;
if (target != null) {
String marker = target.toString();
/**
* Specialisation: Working around the bug that the tokeniser treats PERIOD
* as a word character.
*/
@Override
public boolean contains(Object target) {
boolean result = false;
if (target != null) {
String marker = target.toString();
for (String token : this) {
if (token.endsWith(marker)) {
result = true;
break;
}
}
}
return result;
}
for (String token : this) {
if (token.endsWith(marker)) {
result = true;
break;
}
}
}
return result;
}
}

View file

@ -1,57 +0,0 @@
package cc.journeyman.milkwood;
import java.util.Stack;
/**
* Sliding window which rules may match.
*
* @author simon
*
*/
public class WordStack extends Stack<String> {
private static final long serialVersionUID = 1L;
/**
* Create a new, empty, wordstack.
*/
public WordStack() {
super();
}
/**
* create a new window from this window, having this new word as its
* terminal and ommitting the current first word. That is, the new window
* should be as long as the old, with each word shuffled up one place.
*
* @param prototype the window to copy from.
* @param terminal the new terminal word.
*/
public WordStack(WordStack prototype, String terminal) {
this();
WordStack copy = prototype.duplicate();
copy.pop();
this.populate( copy, terminal);
}
private void populate(WordStack copy, String terminal) {
if ( copy.isEmpty()) {
this.push(terminal);
} else {
String token = copy.pop();
this.populate(copy, terminal);
this.push( token);
}
}
/**
* A wrapper round clone which hides all the ugly casting.
*
* @return a duplicate copy of myself.
*/
public WordStack duplicate() {
return (WordStack) this.clone();
}
}

View file

@ -16,156 +16,141 @@ import java.util.Random;
/**
* A special purpose writer to write sequences of tokens, chopping them up into
* paragraphs on the fly..
*
*
* @author Simon Brooke <simon@journeyman.cc>
*/
class Writer extends BufferedWriter {
/**
* The average number of sentences in a paragraph.
*/
public static final int AVSENTENCESPERPARA = 5;
/**
* A random number generator.
*/
private static Random RANDOM = new Random();
/**
* Dictionary of first-words we know about; each first-word maps onto a
* tuple of tuples of word sequences beginning with that word, so 'I' might
* map onto [[I, CAME, COMMA],[I, SAW, COMMA],[I CONQUERED COMMA]].
*/
TupleDictionary dictionary = new TupleDictionary();
/**
* Whether or not I am in debugging mode.
*/
@SuppressWarnings("unused")
private final boolean debug;
/**
* Line separator on this platform.
*/
public static final String NEWLINE = System.getProperty("line.separator");
/**
* The average number of sentences in a paragraph.
*/
public static final int AVSENTENCESPERPARA = 5;
/**
* A random number generator.
*/
private static Random RANDOM = new Random();
/**
* Whether or not I am in debugging mode.
*/
@SuppressWarnings("unused")
private final boolean debug;
/**
* @param out
* the output stream to which I shall write.
* @param debug
* Whether or not I am in debugging mode.
*/
public Writer(OutputStream out, final boolean debug) {
super(new OutputStreamWriter(out));
this.debug = debug;
}
/**
* @param out the output stream to which I shall write.
* @param debug Whether or not I am in debugging mode.
*/
public Writer(OutputStream out, final boolean debug) {
super(new OutputStreamWriter(out));
this.debug = debug;
}
/**
* Write this sequence of tokens on this stream, sorting out minor issues of
* orthography.
*
* @param tokens
* the tokens.
* @throws IOException
* if it is impossible to write (e.g. file system full).
*/
public void writeSequence(WordSequence tokens) throws IOException {
boolean capitaliseNext = true;
/**
* Write this sequence of tokens on this stream, sorting out minor issues of
* orthography.
*
* @param tokens the tokens.
* @throws IOException if it is impossible to write (e.g. file system full).
*/
public void writeSequence(WordSequence tokens) throws IOException {
boolean capitaliseNext = true;
try {
for (String token : tokens) {
capitaliseNext = writeToken(capitaliseNext, token);
}
} finally {
this.flush();
this.close();
}
}
for (String token : tokens) {
capitaliseNext = writeToken(capitaliseNext, token);
}
this.write(NEWLINE);
}
/**
* Deal with end of paragraph, capital after full stop, and other minor
* orthographic conventions.
*
* @param capitalise
* whether or not the token should be capitalised
* @param token
* the token to write;
* @returnvtrue if the next token to be written should be capitalised.
* @throws IOException
*/
private boolean writeToken(boolean capitalise, String token)
throws IOException {
if (this.spaceBefore(token)) {
this.write(" ");
}
if (capitalise) {
this.write(token.substring(0, 1).toUpperCase(Locale.getDefault()));
this.write(token.substring(1));
} else {
this.write(token);
}
/**
* Deal with end of paragraph, capital after full stop, and other minor
* orthographic conventions.
*
* @param capitalise whether or not the token should be capitalised
* @param token the token to write;
* @returnvtrue if the next token to be written should be capitalised.
* @throws IOException
*/
private boolean writeToken(boolean capitalise, String token)
throws IOException {
if (this.spaceBefore(token)) {
this.write(" ");
}
if (capitalise) {
this.write(token.substring(0, 1).toUpperCase(Locale.getDefault()));
this.write(token.substring(1));
} else {
this.write(token);
}
this.maybeParagraph(token);
this.maybeParagraph(token);
return (token.endsWith(Milkwood.PERIOD));
}
return (token.endsWith(Milkwood.PERIOD));
}
/**
* Return false if token is punctuation, else true. Wouldn't it be nice if
* Java provided Character.isPunctuation(char)? However, since it doesn't, I
* can give this slightly special semantics: return true only if this is
* punctuation which would not normally be preceded with a space.
*
* @param ch
* a character.
* @return true if the should be preceded by a space, else false.
*/
private boolean spaceBefore(String token) {
final boolean result;
/**
* Return false if token is punctuation, else true. Wouldn't it be nice if
* Java provided Character.isPunctuation(char)? However, since it doesn't, I
* can give this slightly special semantics: return true only if this is
* punctuation which would not normally be preceded with a space.
*
* @param ch a character.
* @return true if the should be preceded by a space, else false.
*/
private boolean spaceBefore(String token) {
final boolean result;
switch (token.length()) {
case 0:
result = false;
break;
case 1:
switch (token.charAt(0)) {
case '.':
case ',':
case ':':
case ';':
case 's':
/*
* an 's' on its own is probably evidence of a possessive with
* the apostrophe lost
*/
case 't':
/*
* similar; probably 'doesn't' or 'shouldn't' or other cases of
* 'not' with an elided 'o'.
*/
result = false;
break;
default:
result = true;
break;
}
break;
default:
result = true;
}
switch (token.length()) {
case 0:
result = false;
break;
case 1:
switch (token.charAt(0)) {
case '.':
case ',':
case ':':
case ';':
case 's':
/*
* an 's' on its own is probably evidence of a possessive with
* the apostrophe lost
*/
case 't':
/*
* similar; probably 'doesn't' or 'shouldn't' or other cases of
* 'not' with an elided 'o'.
*/
result = false;
break;
default:
result = true;
break;
}
break;
default:
result = true;
}
return result;
}
/**
* If this token is an end-of-sentence token, then, on one chance in some,
* have the writer write two new lines. NOTE: The tokeniser is treating
* PERIOD ('.') as a word character, even though it has not been told to.
* Token.endsWith( PERIOD) is a hack to get round this problem. TODO:
* investigate and fix.
*
* @param token
* a token
* @throws IOException
* if Mr this has run out of ink
*/
private void maybeParagraph(String token) throws IOException {
if (token.endsWith(Milkwood.PERIOD)
&& RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
this.write("\n\n");
}
}
return result;
}
/**
* If this token is an end-of-sentence token, then, on one chance in some,
* have the writer write two new lines. NOTE: The tokeniser is treating
* PERIOD ('.') as a word character, even though it has not been told to.
* Token.endsWith( PERIOD) is a hack to get round this problem. TODO:
* investigate and fix.
*
* @param token a token
* @throws IOException if Mr this has run out of ink
*/
private void maybeParagraph(String token) throws IOException {
if (token.endsWith(Milkwood.PERIOD)
&& RANDOM.nextInt(AVSENTENCESPERPARA) == 0) {
this.write(NEWLINE);
this.write(NEWLINE);
}
}
}