Added first sketch of ignorable words
This commit is contained in:
		
							parent
							
								
									e5875a2a19
								
							
						
					
					
						commit
						f2fc1acc80
					
				
					 3 changed files with 108 additions and 1 deletions
				
			
		| 
						 | 
					@ -3,6 +3,7 @@
 | 
				
			||||||
  :url "http://example.com/FIXME"
 | 
					  :url "http://example.com/FIXME"
 | 
				
			||||||
  :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
 | 
					  :license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0"
 | 
				
			||||||
            :url "https://www.eclipse.org/legal/epl-2.0/"}
 | 
					            :url "https://www.eclipse.org/legal/epl-2.0/"}
 | 
				
			||||||
  :dependencies [[org.clojure/clojure "1.11.1"]]
 | 
					  :dependencies [[org.clojure/clojure "1.11.1"]
 | 
				
			||||||
 | 
					                 [peco "0.1.6"]]
 | 
				
			||||||
  :repl-options {:init-ns elboob.core}
 | 
					  :repl-options {:init-ns elboob.core}
 | 
				
			||||||
  :source-paths      ["src/clj"])
 | 
					  :source-paths      ["src/clj"])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										105
									
								
								resources/ignorable-words.en.edn
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								resources/ignorable-words.en.edn
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,105 @@
 | 
				
			||||||
 | 
					;; list of English language words that should not be indexed.
 | 
				
			||||||
 | 
					;; taken from the first hundred words in [Peter Norvig's analysis of the 
 | 
				
			||||||
 | 
					;; frequency of English words](https://norvig.com/ngrams/count_1w.txt);
 | 
				
			||||||
 | 
					;; I've then commented out from the list those words which, although
 | 
				
			||||||
 | 
					;; common, I think it may be reasonable for people to search for.
 | 
				
			||||||
 | 
					["the"
 | 
				
			||||||
 | 
					"of"
 | 
				
			||||||
 | 
					"and"
 | 
				
			||||||
 | 
					"to"
 | 
				
			||||||
 | 
					"a"
 | 
				
			||||||
 | 
					"in"
 | 
				
			||||||
 | 
					"for"
 | 
				
			||||||
 | 
					"is"
 | 
				
			||||||
 | 
					"on"
 | 
				
			||||||
 | 
					"that"
 | 
				
			||||||
 | 
					"by"
 | 
				
			||||||
 | 
					"this"
 | 
				
			||||||
 | 
					"with"
 | 
				
			||||||
 | 
					"i"
 | 
				
			||||||
 | 
					"you"
 | 
				
			||||||
 | 
					"it"
 | 
				
			||||||
 | 
					"not"
 | 
				
			||||||
 | 
					"or"
 | 
				
			||||||
 | 
					"be"
 | 
				
			||||||
 | 
					"are"
 | 
				
			||||||
 | 
					"from"
 | 
				
			||||||
 | 
					"at"
 | 
				
			||||||
 | 
					"as"
 | 
				
			||||||
 | 
					"your"
 | 
				
			||||||
 | 
					"all"
 | 
				
			||||||
 | 
					"have"
 | 
				
			||||||
 | 
					"new"
 | 
				
			||||||
 | 
					"more"
 | 
				
			||||||
 | 
					"an"
 | 
				
			||||||
 | 
					"was"
 | 
				
			||||||
 | 
					"we"
 | 
				
			||||||
 | 
					"will"
 | 
				
			||||||
 | 
					"home"
 | 
				
			||||||
 | 
					"can"
 | 
				
			||||||
 | 
					"us"
 | 
				
			||||||
 | 
					"about"
 | 
				
			||||||
 | 
					"if"
 | 
				
			||||||
 | 
					"page"
 | 
				
			||||||
 | 
					"my"
 | 
				
			||||||
 | 
					"has"
 | 
				
			||||||
 | 
					"search"
 | 
				
			||||||
 | 
					"free"
 | 
				
			||||||
 | 
					"but"
 | 
				
			||||||
 | 
					"our"
 | 
				
			||||||
 | 
					"one"
 | 
				
			||||||
 | 
					"other"
 | 
				
			||||||
 | 
					"do"
 | 
				
			||||||
 | 
					"no"
 | 
				
			||||||
 | 
					;; "information"
 | 
				
			||||||
 | 
					"time"
 | 
				
			||||||
 | 
					"they"
 | 
				
			||||||
 | 
					"site"
 | 
				
			||||||
 | 
					"he"
 | 
				
			||||||
 | 
					"up"
 | 
				
			||||||
 | 
					"may"
 | 
				
			||||||
 | 
					"what"
 | 
				
			||||||
 | 
					"which"
 | 
				
			||||||
 | 
					"their"
 | 
				
			||||||
 | 
					"news"
 | 
				
			||||||
 | 
					"out"
 | 
				
			||||||
 | 
					"use"
 | 
				
			||||||
 | 
					"any"
 | 
				
			||||||
 | 
					"there"
 | 
				
			||||||
 | 
					"see"
 | 
				
			||||||
 | 
					"only"
 | 
				
			||||||
 | 
					"so"
 | 
				
			||||||
 | 
					"his"
 | 
				
			||||||
 | 
					"when"
 | 
				
			||||||
 | 
					;; "contact"
 | 
				
			||||||
 | 
					"here"
 | 
				
			||||||
 | 
					;; "business"
 | 
				
			||||||
 | 
					"who"
 | 
				
			||||||
 | 
					"web"
 | 
				
			||||||
 | 
					"also"
 | 
				
			||||||
 | 
					"now"
 | 
				
			||||||
 | 
					;; "help"
 | 
				
			||||||
 | 
					"get"
 | 
				
			||||||
 | 
					"pm"
 | 
				
			||||||
 | 
					"view"
 | 
				
			||||||
 | 
					;; "online"
 | 
				
			||||||
 | 
					"c"
 | 
				
			||||||
 | 
					"e"
 | 
				
			||||||
 | 
					"first"
 | 
				
			||||||
 | 
					"am"
 | 
				
			||||||
 | 
					"been"
 | 
				
			||||||
 | 
					"would"
 | 
				
			||||||
 | 
					"how"
 | 
				
			||||||
 | 
					"were"
 | 
				
			||||||
 | 
					"me"
 | 
				
			||||||
 | 
					"s"
 | 
				
			||||||
 | 
					;; "services"
 | 
				
			||||||
 | 
					"some"
 | 
				
			||||||
 | 
					"these"
 | 
				
			||||||
 | 
					"click"
 | 
				
			||||||
 | 
					"its"
 | 
				
			||||||
 | 
					"like"
 | 
				
			||||||
 | 
					;; "service"
 | 
				
			||||||
 | 
					"x"
 | 
				
			||||||
 | 
					"than"
 | 
				
			||||||
 | 
					"find"]
 | 
				
			||||||
							
								
								
									
										1
									
								
								resources/ignorable-words.en_GB.edn
									
										
									
									
									
										Symbolic link
									
								
							
							
						
						
									
										1
									
								
								resources/ignorable-words.en_GB.edn
									
										
									
									
									
										Symbolic link
									
								
							| 
						 | 
					@ -0,0 +1 @@
 | 
				
			||||||
 | 
					ignorable-words.en.edn
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue