From eab8c9737b08c61286740b932213b83a98659077 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Fri, 31 Oct 2025 21:27:56 +0000 Subject: [PATCH 1/2] Well, we now have a working search algorithm. However, as we don't yet have human-readable metadata, this only counts as a proof of concept. --- src/clj/cc/journeyman/elboob/search.cljc | 36 +++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/src/clj/cc/journeyman/elboob/search.cljc b/src/clj/cc/journeyman/elboob/search.cljc index d6cbba2..795258b 100644 --- a/src/clj/cc/journeyman/elboob/search.cljc +++ b/src/clj/cc/journeyman/elboob/search.cljc @@ -1 +1,35 @@ -(ns cc.journeyman.elboob.search) \ No newline at end of file +(ns cc.journeyman.elboob.search) + +(defn score-token + "Score this `token` in the context of this `index` and `path`. + + `index` is expected to be an index of the form compiled by + `cc.journeyman.elboob.core/compile-index`, q.v." + [index path token] + (or ((index token) path) 1)) + +(defn score-path + "Score this `path`, in the context of this `index` and `tokens`. + + `index` is expected to be an index of the form compiled by + `cc.journeyman.elboob.core/compile-index`, q.v." + [index path tokens] + (reduce * (remove zero? + (map #(score-token index path %) + tokens)))) + +(defn search + "Search this `index`, expected to be an index of the form compiled by + `cc.journeyman.elboob.core/compile-index`, q.v., for these tokens, + expected to be a sequence of strings representing individual lower + case words. Returns a list with an ordering of file paths derived + from the product of the frequencies of the tokens in the indexed pages" + [index tokens] + (let [results (reduce #(assoc %1 %2 (index %2)) {} tokens) + paths (set (flatten (map keys (vals results)))) + inverted (reduce + (fn [map path] + (assoc map path + (score-path index path tokens))) + {} paths)] + (sort-by #(inverted %) > (keys inverted)))) From e9d0c1b806284a609cfc36c1981fa4479afa12f9 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Fri, 31 Oct 2025 21:30:47 +0000 Subject: [PATCH 2/2] Added namespace doc! --- src/clj/cc/journeyman/elboob/search.cljc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/clj/cc/journeyman/elboob/search.cljc b/src/clj/cc/journeyman/elboob/search.cljc index 795258b..523cd88 100644 --- a/src/clj/cc/journeyman/elboob/search.cljc +++ b/src/clj/cc/journeyman/elboob/search.cljc @@ -1,4 +1,6 @@ -(ns cc.journeyman.elboob.search) +(ns cc.journeyman.elboob.search + "Search the index for arbitrary tokens. It would be really nice if + this could run in Scittle.") (defn score-token "Score this `token` in the context of this `index` and `path`.