diff --git a/src/clj/cc/journeyman/elboob/search.cljc b/src/clj/cc/journeyman/elboob/search.cljc index d6cbba2..523cd88 100644 --- a/src/clj/cc/journeyman/elboob/search.cljc +++ b/src/clj/cc/journeyman/elboob/search.cljc @@ -1 +1,37 @@ -(ns cc.journeyman.elboob.search) \ No newline at end of file +(ns cc.journeyman.elboob.search + "Search the index for arbitrary tokens. It would be really nice if + this could run in Scittle.") + +(defn score-token + "Score this `token` in the context of this `index` and `path`. + + `index` is expected to be an index of the form compiled by + `cc.journeyman.elboob.core/compile-index`, q.v." + [index path token] + (or ((index token) path) 1)) + +(defn score-path + "Score this `path`, in the context of this `index` and `tokens`. + + `index` is expected to be an index of the form compiled by + `cc.journeyman.elboob.core/compile-index`, q.v." + [index path tokens] + (reduce * (remove zero? + (map #(score-token index path %) + tokens)))) + +(defn search + "Search this `index`, expected to be an index of the form compiled by + `cc.journeyman.elboob.core/compile-index`, q.v., for these tokens, + expected to be a sequence of strings representing individual lower + case words. Returns a list with an ordering of file paths derived + from the product of the frequencies of the tokens in the indexed pages" + [index tokens] + (let [results (reduce #(assoc %1 %2 (index %2)) {} tokens) + paths (set (flatten (map keys (vals results)))) + inverted (reduce + (fn [map path] + (assoc map path + (score-path index path tokens))) + {} paths)] + (sort-by #(inverted %) > (keys inverted))))