diff --git a/src/clj/cc/journeyman/elboob/core.clj b/src/clj/cc/journeyman/elboob/core.clj index 5a7bc89..7016490 100644 --- a/src/clj/cc/journeyman/elboob/core.clj +++ b/src/clj/cc/journeyman/elboob/core.clj @@ -8,14 +8,14 @@ (let [locale (java.util.Locale/getDefault) tag (.toLanguageTag locale) language (.getLanguage locale)] - - (first - (map #(try (println (format "resources/ignorable-words%s.edn" %)) - (read-string - (slurp - (file (format "resources/ignorable-words%s.edn" %)))) - (catch Exception e (println (.getMessage e)) nil)) - [(str "." tag) (str "." language) ""])))) + + (first + (map #(try (println (format "resources/ignorable-words%s.edn" %)) + (read-string + (slurp + (file (format "resources/ignorable-words%s.edn" %)))) + (catch Exception e (println (.getMessage e)) nil)) + [(str "." tag) (str "." language) ""])))) (defn compile-file "Compile an index for an individual file `f`, tokenised with `tokenise` and @@ -26,6 +26,11 @@ tokens (frequencies (remove ignorable? (tokenise (slurp f'))))] (reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens)))) +(defn files-with-suffix [dir suffix] + (filter + #(ends-with? (.getName %) suffix) + (file-seq (file dir)))) + (defn compile-index "scans `dir-paths` as directories of Markdown files. Returns a map which keys each lexical token occurring in each file (with Markdown formatting, common @@ -35,9 +40,6 @@ (let [ignorable-word? (set (get-ignorable-words)) tokenise (tokenizer [:lower-case :concat-singles])] (reduce deep-merge {} - (map (fn [dir] - (map #(compile-file % tokenise ignorable-word?) - (filter - #(ends-with? (.getName %) ".md") - (file-seq (file dir))))) dir-paths)))) + (map #(compile-file % tokenise ignorable-word?) + (flatten (map #(files-with-suffix % ".md") dir-paths)))))) diff --git a/src/clj/cc/journeyman/elboob/search.cljc b/src/clj/cc/journeyman/elboob/search.cljc new file mode 100644 index 0000000..e69de29