64 lines
2.3 KiB
Clojure
64 lines
2.3 KiB
Clojure
(ns milkwood-clj.analyse
|
|
(require
|
|
[milkwood-clj.utils :as utils]
|
|
[clojure.set :as set])
|
|
(:gen-class))
|
|
|
|
(defn compose-rule
|
|
"Compose a new rule tree (containing (obviously) only one rule) from this path.
|
|
|
|
path: a flat sequence of tokens."
|
|
[path]
|
|
(cond
|
|
(empty? path) nil
|
|
true (hash-map (first path) (compose-rule (rest path)))))
|
|
|
|
|
|
(defn merge-rules [these those]
|
|
(utils/deep-merge-with set/union these those))
|
|
|
|
(defn add-rule
|
|
"Add the rule defined by this path to these rules.
|
|
|
|
rules: a rule tree (i.e. a recursively nested map token => rule-tree);
|
|
path: a flat sequence of tokens."
|
|
[rules path]
|
|
(cond
|
|
;; if we have no more path, we're done.
|
|
(empty? path) nil
|
|
;; if we have no more rules, compose a rule from what's left of the path
|
|
(empty? rules) (compose-rule path)
|
|
;; replace in the rules the rule for the first of the path, with this new
|
|
;; rule generated from the rest of the path and the old rule for the first
|
|
;; of the path.
|
|
true (merge-rules rules (add-rule (rules (first path)) (rest path)))))
|
|
|
|
(defn analyse-tokens
|
|
"Read this sequence of tokens and process it into rules.
|
|
|
|
rules: a rule tree, which is to say a map which maps words onto rule trees (yes, it's recursive);
|
|
anger: a lookback window, holding the last n tokens read, where n = depth;
|
|
tokens: the sequence of tokens we're reading;
|
|
depth: the depth of rules/length of window we're considering."
|
|
[rules anger tokens depth]
|
|
(cond
|
|
(empty? tokens) rules
|
|
true (let [token (first tokens) rage (utils/slide-window anger token depth)]
|
|
;; take the next token to consider off the front of the tokens and add it to the end of the
|
|
;; sliding window
|
|
(cond
|
|
;; if the new sliding window is deep enough, add a rule and continue.
|
|
(= (count rage) depth) (analyse-tokens (add-rule rules rage) rage (rest tokens) depth)
|
|
;; else just continue without adding a rule.
|
|
true (analyse-tokens rules rage (rest tokens) depth)))))
|
|
|
|
|
|
(defn analyse-file
|
|
"Read this file and process it into rules.
|
|
|
|
file: the path name of a file to read;
|
|
depth: the depth of rules/length of window we're considering"
|
|
[file depth]
|
|
(analyse-tokens nil nil (map (fn [string] (.toLowerCase string)) (re-seq #"\w+\'s|\w+|\p{Punct}" (slurp file))) depth))
|
|
|