Now successfully creating indexes. Started work on a search function, but out of steam.
This commit is contained in:
parent
86665db3b8
commit
5e33f2c815
2 changed files with 15 additions and 13 deletions
|
|
@ -9,13 +9,13 @@
|
||||||
tag (.toLanguageTag locale)
|
tag (.toLanguageTag locale)
|
||||||
language (.getLanguage locale)]
|
language (.getLanguage locale)]
|
||||||
|
|
||||||
(first
|
(first
|
||||||
(map #(try (println (format "resources/ignorable-words%s.edn" %))
|
(map #(try (println (format "resources/ignorable-words%s.edn" %))
|
||||||
(read-string
|
(read-string
|
||||||
(slurp
|
(slurp
|
||||||
(file (format "resources/ignorable-words%s.edn" %))))
|
(file (format "resources/ignorable-words%s.edn" %))))
|
||||||
(catch Exception e (println (.getMessage e)) nil))
|
(catch Exception e (println (.getMessage e)) nil))
|
||||||
[(str "." tag) (str "." language) ""]))))
|
[(str "." tag) (str "." language) ""]))))
|
||||||
|
|
||||||
(defn compile-file
|
(defn compile-file
|
||||||
"Compile an index for an individual file `f`, tokenised with `tokenise` and
|
"Compile an index for an individual file `f`, tokenised with `tokenise` and
|
||||||
|
|
@ -26,6 +26,11 @@
|
||||||
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
|
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
|
||||||
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
|
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
|
||||||
|
|
||||||
|
(defn files-with-suffix [dir suffix]
|
||||||
|
(filter
|
||||||
|
#(ends-with? (.getName %) suffix)
|
||||||
|
(file-seq (file dir))))
|
||||||
|
|
||||||
(defn compile-index
|
(defn compile-index
|
||||||
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
|
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
|
||||||
each lexical token occurring in each file (with Markdown formatting, common
|
each lexical token occurring in each file (with Markdown formatting, common
|
||||||
|
|
@ -35,9 +40,6 @@
|
||||||
(let [ignorable-word? (set (get-ignorable-words))
|
(let [ignorable-word? (set (get-ignorable-words))
|
||||||
tokenise (tokenizer [:lower-case :concat-singles])]
|
tokenise (tokenizer [:lower-case :concat-singles])]
|
||||||
(reduce deep-merge {}
|
(reduce deep-merge {}
|
||||||
(map (fn [dir]
|
|
||||||
(map #(compile-file % tokenise ignorable-word?)
|
(map #(compile-file % tokenise ignorable-word?)
|
||||||
(filter
|
(flatten (map #(files-with-suffix % ".md") dir-paths))))))
|
||||||
#(ends-with? (.getName %) ".md")
|
|
||||||
(file-seq (file dir))))) dir-paths))))
|
|
||||||
|
|
||||||
|
|
|
||||||
0
src/clj/cc/journeyman/elboob/search.cljc
Normal file
0
src/clj/cc/journeyman/elboob/search.cljc
Normal file
Loading…
Add table
Add a link
Reference in a new issue