Now successfully creating indexes. Started work on a search function, but out of steam.
This commit is contained in:
		
							parent
							
								
									86665db3b8
								
							
						
					
					
						commit
						5e33f2c815
					
				
					 2 changed files with 15 additions and 13 deletions
				
			
		| 
						 | 
					@ -8,14 +8,14 @@
 | 
				
			||||||
  (let [locale (java.util.Locale/getDefault)
 | 
					  (let [locale (java.util.Locale/getDefault)
 | 
				
			||||||
        tag (.toLanguageTag locale)
 | 
					        tag (.toLanguageTag locale)
 | 
				
			||||||
        language (.getLanguage locale)]
 | 
					        language (.getLanguage locale)]
 | 
				
			||||||
    
 | 
					
 | 
				
			||||||
     (first
 | 
					    (first
 | 
				
			||||||
      (map #(try (println (format "resources/ignorable-words%s.edn" %))
 | 
					     (map #(try (println (format "resources/ignorable-words%s.edn" %))
 | 
				
			||||||
                 (read-string
 | 
					                (read-string
 | 
				
			||||||
                  (slurp
 | 
					                 (slurp
 | 
				
			||||||
                   (file (format "resources/ignorable-words%s.edn" %))))
 | 
					                  (file (format "resources/ignorable-words%s.edn" %))))
 | 
				
			||||||
                 (catch Exception e (println (.getMessage e)) nil))
 | 
					                (catch Exception e (println (.getMessage e)) nil))
 | 
				
			||||||
           [(str "." tag) (str "." language) ""]))))
 | 
					          [(str "." tag) (str "." language) ""]))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
(defn compile-file
 | 
					(defn compile-file
 | 
				
			||||||
  "Compile an index for an individual file `f`, tokenised with `tokenise` and 
 | 
					  "Compile an index for an individual file `f`, tokenised with `tokenise` and 
 | 
				
			||||||
| 
						 | 
					@ -26,6 +26,11 @@
 | 
				
			||||||
        tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
 | 
					        tokens (frequencies (remove ignorable? (tokenise (slurp f'))))]
 | 
				
			||||||
    (reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
 | 
					    (reduce #(assoc %1 %2 {rel (tokens %2)}) {} (keys tokens))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					(defn files-with-suffix [dir suffix]
 | 
				
			||||||
 | 
					  (filter
 | 
				
			||||||
 | 
					   #(ends-with? (.getName %) suffix)
 | 
				
			||||||
 | 
					   (file-seq (file dir))))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
(defn compile-index
 | 
					(defn compile-index
 | 
				
			||||||
  "scans `dir-paths` as directories of Markdown files. Returns a map which keys
 | 
					  "scans `dir-paths` as directories of Markdown files. Returns a map which keys
 | 
				
			||||||
   each lexical token occurring in each file (with Markdown formatting, common
 | 
					   each lexical token occurring in each file (with Markdown formatting, common
 | 
				
			||||||
| 
						 | 
					@ -35,9 +40,6 @@
 | 
				
			||||||
  (let [ignorable-word? (set (get-ignorable-words))
 | 
					  (let [ignorable-word? (set (get-ignorable-words))
 | 
				
			||||||
        tokenise (tokenizer [:lower-case :concat-singles])]
 | 
					        tokenise (tokenizer [:lower-case :concat-singles])]
 | 
				
			||||||
    (reduce deep-merge {} 
 | 
					    (reduce deep-merge {} 
 | 
				
			||||||
            (map (fn [dir]
 | 
					                   (map #(compile-file % tokenise ignorable-word?)
 | 
				
			||||||
                   (map #(compile-file % tokenise ignorable-word?) 
 | 
					                        (flatten (map #(files-with-suffix % ".md") dir-paths))))))
 | 
				
			||||||
                        (filter 
 | 
					 | 
				
			||||||
                         #(ends-with? (.getName %) ".md")
 | 
					 | 
				
			||||||
                         (file-seq (file dir))))) dir-paths))))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										0
									
								
								src/clj/cc/journeyman/elboob/search.cljc
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/clj/cc/journeyman/elboob/search.cljc
									
										
									
									
									
										Normal file
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue