Compare commits

..

No commits in common. "86665db3b8006d98970cb0dfb4c0b6f854be1805" and "73549a5c90c42337d27d7d222546e997a69974e8" have entirely different histories.

5 changed files with 5 additions and 43 deletions

4
.gitignore vendored
View file

@ -13,6 +13,4 @@ pom.xml.asc
.lein-failures .lein-failures
.nrepl-port .nrepl-port
.cpcache/ .cpcache/
.lsp/
.clj-kondo/
.portal/

View file

@ -3,8 +3,7 @@
:url "http://example.com/FIXME" :url "http://example.com/FIXME"
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
:url "https://www.eclipse.org/legal/epl-2.0/"} :url "https://www.eclipse.org/legal/epl-2.0/"}
:dependencies [[dev.weavejester/medley "1.9.0"] :dependencies [[org.clojure/clojure "1.11.1"]
[org.clojure/clojure "1.11.1"]
[peco "0.1.6"]] [peco "0.1.6"]]
:repl-options {:init-ns cc.journeyman.elboob.core} :repl-options {:init-ns cc.journeyman.elboob.core}
:source-paths ["src/clj"]) :source-paths ["src/clj"])

View file

@ -1 +0,0 @@
ignorable-words.en.edn

View file

@ -1,43 +1,9 @@
(ns cc.journeyman.elboob.core (ns cc.journeyman.elboob.core)
(:require [clojure.java.io :refer [as-relative-path file resource]]
[clojure.string :refer [ends-with?]]
[medley.core :refer [deep-merge]]
[peco.core :refer [tokenizer]]))
(defn get-ignorable-words [] (defn compile
(let [locale (java.util.Locale/getDefault)
tag (.toLanguageTag locale)
language (.getLanguage locale)]
(first
(map #(try (println (format "resources/ignorable-words%s.edn" %))
(read-string
(slurp
(file (format "resources/ignorable-words%s.edn" %))))
(catch Exception e (println (.getMessage e)) nil))
[(str "." tag) (str "." language) ""]))))
(defn compile-file
"Compile an index for an individual file `f`, tokenised with `tokenise` and
filtered with `ignorable?`."
[f tokenise ignorable?]
(let [f' (file f)
rel (as-relative-path f')
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
(defn compile-index
"scans `dir-paths` as directories of Markdown files. Returns a map which keys "scans `dir-paths` as directories of Markdown files. Returns a map which keys
each lexical token occurring in each file (with Markdown formatting, common each lexical token occurring in each file (with Markdown formatting, common
words, punctuation etc excepted) to a map which keys the relative file path words, punctuation etc excepted) to a map which keys the relative file path
of each file in which the token occurs to the frequency the token occurs within the file." of each file in which the token occurs to the frequency the token occurs within the file."
[& dir-paths] [& dir-paths]
(let [ignorable-word? (set (get-ignorable-words)) (println "Hello, World!"))
tokenise (tokenizer [:lower-case :concat-singles])]
(reduce deep-merge {}
(map (fn [dir]
(map #(compile-file % tokenise ignorable-word?)
(filter
#(ends-with? (.getName %) ".md")
(file-seq (file dir))))) dir-paths))))