Compare commits

..

No commits in common. "e9d0c1b806284a609cfc36c1981fa4479afa12f9" and "21b6bfd67ea9bfe796f61785746e11ea022971e2" have entirely different histories.

View file

@ -1,37 +1 @@
(ns cc.journeyman.elboob.search (ns cc.journeyman.elboob.search)
"Search the index for arbitrary tokens. It would be really nice if
this could run in Scittle.")
(defn score-token
"Score this `token` in the context of this `index` and `path`.
`index` is expected to be an index of the form compiled by
`cc.journeyman.elboob.core/compile-index`, q.v."
[index path token]
(or ((index token) path) 1))
(defn score-path
"Score this `path`, in the context of this `index` and `tokens`.
`index` is expected to be an index of the form compiled by
`cc.journeyman.elboob.core/compile-index`, q.v."
[index path tokens]
(reduce * (remove zero?
(map #(score-token index path %)
tokens))))
(defn search
"Search this `index`, expected to be an index of the form compiled by
`cc.journeyman.elboob.core/compile-index`, q.v., for these tokens,
expected to be a sequence of strings representing individual lower
case words. Returns a list with an ordering of file paths derived
from the product of the frequencies of the tokens in the indexed pages"
[index tokens]
(let [results (reduce #(assoc %1 %2 (index %2)) {} tokens)
paths (set (flatten (map keys (vals results))))
inverted (reduce
(fn [map path]
(assoc map path
(score-path index path tokens)))
{} paths)]
(sort-by #(inverted %) > (keys inverted))))