Well, we now have a working search algorithm. However, as we don't
yet have human-readable metadata, this only counts as a proof of concept.
This commit is contained in:
		
							parent
							
								
									21b6bfd67e
								
							
						
					
					
						commit
						eab8c9737b
					
				
					 1 changed files with 35 additions and 1 deletions
				
			
		| 
						 | 
				
			
			@ -1 +1,35 @@
 | 
			
		|||
(ns cc.journeyman.elboob.search)
 | 
			
		||||
(ns cc.journeyman.elboob.search)
 | 
			
		||||
 | 
			
		||||
(defn score-token 
 | 
			
		||||
  "Score this `token` in the context of this `index` and `path`.
 | 
			
		||||
   
 | 
			
		||||
   `index` is expected to be an index of the form compiled by
 | 
			
		||||
   `cc.journeyman.elboob.core/compile-index`, q.v."
 | 
			
		||||
  [index path token]
 | 
			
		||||
  (or ((index token) path) 1))
 | 
			
		||||
 | 
			
		||||
(defn score-path 
 | 
			
		||||
  "Score this `path`, in the context of this `index` and `tokens`.
 | 
			
		||||
   
 | 
			
		||||
   `index` is expected to be an index of the form compiled by
 | 
			
		||||
   `cc.journeyman.elboob.core/compile-index`, q.v."
 | 
			
		||||
  [index path tokens]
 | 
			
		||||
  (reduce * (remove zero?
 | 
			
		||||
                    (map #(score-token index path %)
 | 
			
		||||
                         tokens))))
 | 
			
		||||
 | 
			
		||||
(defn search
 | 
			
		||||
  "Search this `index`, expected to be an index of the form compiled by
 | 
			
		||||
   `cc.journeyman.elboob.core/compile-index`, q.v., for these tokens, 
 | 
			
		||||
   expected to be a sequence of strings representing individual lower
 | 
			
		||||
   case words. Returns a list with an ordering of file paths derived 
 | 
			
		||||
   from the product of the frequencies of the tokens in the indexed pages"
 | 
			
		||||
  [index tokens]
 | 
			
		||||
  (let [results (reduce #(assoc %1 %2 (index %2)) {} tokens)
 | 
			
		||||
        paths (set (flatten (map keys (vals results))))
 | 
			
		||||
        inverted (reduce
 | 
			
		||||
                  (fn [map path]
 | 
			
		||||
                    (assoc map path
 | 
			
		||||
                           (score-path index path tokens)))
 | 
			
		||||
                  {} paths)]
 | 
			
		||||
    (sort-by #(inverted %) > (keys inverted))))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue