Compare commits
No commits in common. "86665db3b8006d98970cb0dfb4c0b6f854be1805" and "73549a5c90c42337d27d7d222546e997a69974e8" have entirely different histories.
86665db3b8
...
73549a5c90
5 changed files with 5 additions and 43 deletions
4
.gitignore
vendored
4
.gitignore
vendored
|
|
@ -13,6 +13,4 @@ pom.xml.asc
|
||||||
.lein-failures
|
.lein-failures
|
||||||
.nrepl-port
|
.nrepl-port
|
||||||
.cpcache/
|
.cpcache/
|
||||||
.lsp/
|
|
||||||
.clj-kondo/
|
|
||||||
.portal/
|
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,7 @@
|
||||||
:url "http://example.com/FIXME"
|
:url "http://example.com/FIXME"
|
||||||
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
|
||||||
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
:url "https://www.eclipse.org/legal/epl-2.0/"}
|
||||||
:dependencies [[dev.weavejester/medley "1.9.0"]
|
:dependencies [[org.clojure/clojure "1.11.1"]
|
||||||
[org.clojure/clojure "1.11.1"]
|
|
||||||
[peco "0.1.6"]]
|
[peco "0.1.6"]]
|
||||||
:repl-options {:init-ns cc.journeyman.elboob.core}
|
:repl-options {:init-ns cc.journeyman.elboob.core}
|
||||||
:source-paths ["src/clj"])
|
:source-paths ["src/clj"])
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
ignorable-words.en.edn
|
|
||||||
|
|
@ -1,43 +1,9 @@
|
||||||
(ns cc.journeyman.elboob.core
|
(ns cc.journeyman.elboob.core)
|
||||||
(:require [clojure.java.io :refer [as-relative-path file resource]]
|
|
||||||
[clojure.string :refer [ends-with?]]
|
|
||||||
[medley.core :refer [deep-merge]]
|
|
||||||
[peco.core :refer [tokenizer]]))
|
|
||||||
|
|
||||||
(defn get-ignorable-words []
|
(defn compile
|
||||||
(let [locale (java.util.Locale/getDefault)
|
|
||||||
tag (.toLanguageTag locale)
|
|
||||||
language (.getLanguage locale)]
|
|
||||||
|
|
||||||
(first
|
|
||||||
(map #(try (println (format "resources/ignorable-words%s.edn" %))
|
|
||||||
(read-string
|
|
||||||
(slurp
|
|
||||||
(file (format "resources/ignorable-words%s.edn" %))))
|
|
||||||
(catch Exception e (println (.getMessage e)) nil))
|
|
||||||
[(str "." tag) (str "." language) ""]))))
|
|
||||||
|
|
||||||
(defn compile-file
|
|
||||||
"Compile an index for an individual file `f`, tokenised with `tokenise` and
|
|
||||||
filtered with `ignorable?`."
|
|
||||||
[f tokenise ignorable?]
|
|
||||||
(let [f' (file f)
|
|
||||||
rel (as-relative-path f')
|
|
||||||
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
|
|
||||||
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
|
|
||||||
|
|
||||||
(defn compile-index
|
|
||||||
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
|
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
|
||||||
each lexical token occurring in each file (with Markdown formatting, common
|
each lexical token occurring in each file (with Markdown formatting, common
|
||||||
words, punctuation etc excepted) to a map which keys the relative file path
|
words, punctuation etc excepted) to a map which keys the relative file path
|
||||||
of each file in which the token occurs to the frequency the token occurs within the file."
|
of each file in which the token occurs to the frequency the token occurs within the file."
|
||||||
[& dir-paths]
|
[& dir-paths]
|
||||||
(let [ignorable-word? (set (get-ignorable-words))
|
(println "Hello, World!"))
|
||||||
tokenise (tokenizer [:lower-case :concat-singles])]
|
|
||||||
(reduce deep-merge {}
|
|
||||||
(map (fn [dir]
|
|
||||||
(map #(compile-file % tokenise ignorable-word?)
|
|
||||||
(filter
|
|
||||||
#(ends-with? (.getName %) ".md")
|
|
||||||
(file-seq (file dir))))) dir-paths))))
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue