Now successfully creating indexes. Started work on a search function, but out of steam.
This commit is contained in:
parent
86665db3b8
commit
5e33f2c815
2 changed files with 15 additions and 13 deletions
|
|
@ -26,6 +26,11 @@
|
||||||
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
|
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
|
||||||
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
|
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
|
||||||
|
|
||||||
|
(defn files-with-suffix [dir suffix]
|
||||||
|
(filter
|
||||||
|
#(ends-with? (.getName %) suffix)
|
||||||
|
(file-seq (file dir))))
|
||||||
|
|
||||||
(defn compile-index
|
(defn compile-index
|
||||||
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
|
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
|
||||||
each lexical token occurring in each file (with Markdown formatting, common
|
each lexical token occurring in each file (with Markdown formatting, common
|
||||||
|
|
@ -35,9 +40,6 @@
|
||||||
(let [ignorable-word? (set (get-ignorable-words))
|
(let [ignorable-word? (set (get-ignorable-words))
|
||||||
tokenise (tokenizer [:lower-case :concat-singles])]
|
tokenise (tokenizer [:lower-case :concat-singles])]
|
||||||
(reduce deep-merge {}
|
(reduce deep-merge {}
|
||||||
(map (fn [dir]
|
|
||||||
(map #(compile-file % tokenise ignorable-word?)
|
(map #(compile-file % tokenise ignorable-word?)
|
||||||
(filter
|
(flatten (map #(files-with-suffix % ".md") dir-paths))))))
|
||||||
#(ends-with? (.getName %) ".md")
|
|
||||||
(file-seq (file dir))))) dir-paths))))
|
|
||||||
|
|
||||||
|
|
|
||||||
0
src/clj/cc/journeyman/elboob/search.cljc
Normal file
0
src/clj/cc/journeyman/elboob/search.cljc
Normal file
Loading…
Add table
Add a link
Reference in a new issue