Now successfully creating indexes. Started work on a search function, but out of steam.

This commit is contained in:
Simon Brooke 2025-10-31 16:20:31 +00:00
parent 86665db3b8
commit 5e33f2c815
2 changed files with 15 additions and 13 deletions

View file

@ -8,14 +8,14 @@
(let [locale (java.util.Locale/getDefault)
tag (.toLanguageTag locale)
language (.getLanguage locale)]
(first
(map #(try (println (format "resources/ignorable-words%s.edn" %))
(read-string
(slurp
(file (format "resources/ignorable-words%s.edn" %))))
(catch Exception e (println (.getMessage e)) nil))
[(str "." tag) (str "." language) ""]))))
(first
(map #(try (println (format "resources/ignorable-words%s.edn" %))
(read-string
(slurp
(file (format "resources/ignorable-words%s.edn" %))))
(catch Exception e (println (.getMessage e)) nil))
[(str "." tag) (str "." language) ""]))))
(defn compile-file
"Compile an index for an individual file `f`, tokenised with `tokenise` and
@ -26,6 +26,11 @@
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
(defn files-with-suffix [dir suffix]
(filter
#(ends-with? (.getName %) suffix)
(file-seq (file dir))))
(defn compile-index
"scans `dir-paths` as directories of Markdown files. Returns a map which keys
each lexical token occurring in each file (with Markdown formatting, common
@ -35,9 +40,6 @@
(let [ignorable-word? (set (get-ignorable-words))
tokenise (tokenizer [:lower-case :concat-singles])]
(reduce deep-merge {}
(map (fn [dir]
(map #(compile-file % tokenise ignorable-word?)
(filter
#(ends-with? (.getName %) ".md")
(file-seq (file dir))))) dir-paths))))
(map #(compile-file % tokenise ignorable-word?)
(flatten (map #(files-with-suffix % ".md") dir-paths))))))

View file