Now successfully creating indexes. Started work on a search function, but out of steam.

This commit is contained in:
Simon Brooke 2025-10-31 16:20:31 +00:00
parent 86665db3b8
commit 5e33f2c815
2 changed files with 15 additions and 13 deletions

View file

@ -9,13 +9,13 @@
tag (.toLanguageTag locale) tag (.toLanguageTag locale)
language (.getLanguage locale)] language (.getLanguage locale)]
(first (first
(map #(try (println (format "resources/ignorable-words%s.edn" %)) (map #(try (println (format "resources/ignorable-words%s.edn" %))
(read-string (read-string
(slurp (slurp
(file (format "resources/ignorable-words%s.edn" %)))) (file (format "resources/ignorable-words%s.edn" %))))
(catch Exception e (println (.getMessage e)) nil)) (catch Exception e (println (.getMessage e)) nil))
[(str "." tag) (str "." language) ""])))) [(str "." tag) (str "." language) ""]))))
(defn compile-file (defn compile-file
"Compile an index for an individual file `f`, tokenised with `tokenise` and "Compile an index for an individual file `f`, tokenised with `tokenise` and
@ -26,6 +26,11 @@
tokens (frequencies (remove ignorable? (tokenise (slurp f'))))] tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
(reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens)))) (reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
(defn files-with-suffix [dir suffix]
(filter
#(ends-with? (.getName %) suffix)
(file-seq (file dir))))
(defn compile-index (defn compile-index
"scans `dir-paths` as directories of Markdown files. Returns a map which keys "scans `dir-paths` as directories of Markdown files. Returns a map which keys
each lexical token occurring in each file (with Markdown formatting, common each lexical token occurring in each file (with Markdown formatting, common
@ -35,9 +40,6 @@
(let [ignorable-word? (set (get-ignorable-words)) (let [ignorable-word? (set (get-ignorable-words))
tokenise (tokenizer [:lower-case :concat-singles])] tokenise (tokenizer [:lower-case :concat-singles])]
(reduce deep-merge {} (reduce deep-merge {}
(map (fn [dir]
(map #(compile-file % tokenise ignorable-word?) (map #(compile-file % tokenise ignorable-word?)
(filter (flatten (map #(files-with-suffix % ".md") dir-paths))))))
#(ends-with? (.getName %) ".md")
(file-seq (file dir))))) dir-paths))))

View file