Well, we now have a working search algorithm. However, as we don't

yet have human-readable metadata, this only counts as a proof of
concept.
This commit is contained in:
Simon Brooke 2025-10-31 21:27:56 +00:00
parent 21b6bfd67e
commit eab8c9737b

View file

@ -1 +1,35 @@
(ns cc.journeyman.elboob.search)
(ns cc.journeyman.elboob.search)
(defn score-token
"Score this `token` in the context of this `index` and `path`.
`index` is expected to be an index of the form compiled by
`cc.journeyman.elboob.core/compile-index`, q.v."
[index path token]
(or ((index token) path) 1))
(defn score-path
"Score this `path`, in the context of this `index` and `tokens`.
`index` is expected to be an index of the form compiled by
`cc.journeyman.elboob.core/compile-index`, q.v."
[index path tokens]
(reduce * (remove zero?
(map #(score-token index path %)
tokens))))
(defn search
"Search this `index`, expected to be an index of the form compiled by
`cc.journeyman.elboob.core/compile-index`, q.v., for these tokens,
expected to be a sequence of strings representing individual lower
case words. Returns a list with an ordering of file paths derived
from the product of the frequencies of the tokens in the indexed pages"
[index tokens]
(let [results (reduce #(assoc %1 %2 (index %2)) {} tokens)
paths (set (flatten (map keys (vals results))))
inverted (reduce
(fn [map path]
(assoc map path
(score-path index path tokens)))
{} paths)]
(sort-by #(inverted %) > (keys inverted))))