Tess4J crashed on MacOs, but basics seem to work
This commit is contained in:
parent
0d3efe1323
commit
d6afc46cb5
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -16,3 +16,5 @@ profiles.clj
|
|||
\.rebel_readline_history
|
||||
|
||||
[0-9a-f]*-init\.clj
|
||||
|
||||
*.log
|
||||
|
|
2
env/dev/resources/config.edn
vendored
2
env/dev/resources/config.edn
vendored
|
@ -1 +1 @@
|
|||
{}
|
||||
{:tess-data "/usr/local/Cellar/tesseract/4.0.0_1/share/tessdata/"}
|
||||
|
|
3
env/prod/resources/config.edn
vendored
3
env/prod/resources/config.edn
vendored
|
@ -1,2 +1,3 @@
|
|||
{:prod true
|
||||
:port 3000}
|
||||
:port 8889
|
||||
:tess-data "/usr/share/tesseract-ocr/tessdata"}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
(defproject ireadit "0.1.0-SNAPSHOT"
|
||||
|
||||
:description "FIXME: write description"
|
||||
:description "a bot to automatically OCR memes and other text-as-graphics posted to social media"
|
||||
:url "http://example.com/FIXME"
|
||||
|
||||
:dependencies [[baking-soda "0.2.0" :exclusions [cljsjs/react-bootstrap]]
|
||||
|
@ -18,6 +18,7 @@
|
|||
[cprop "0.1.13"]
|
||||
[day8.re-frame/http-fx "0.1.6"]
|
||||
[funcool/struct "1.3.0"]
|
||||
[com.github.jai-imageio/jai-imageio-core "1.4.0"]
|
||||
[luminus-immutant "0.2.5"]
|
||||
[luminus-transit "0.1.1"]
|
||||
[luminus/ring-ttl-session "0.3.2"]
|
||||
|
@ -40,7 +41,8 @@
|
|||
[ring-webjars "0.2.0"]
|
||||
[ring/ring-core "1.7.1"]
|
||||
[ring/ring-defaults "0.3.2"]
|
||||
[selmer "1.12.6"]]
|
||||
[selmer "1.12.6"]
|
||||
[net.sourceforge.tess4j/tess4j "4.3.1"]]
|
||||
|
||||
:min-lein-version "2.0.0"
|
||||
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
(ns ireadit.nrepl
|
||||
(ns ^{:doc "Meme transcriber: command line support"
|
||||
:author "Simon Brooke"}
|
||||
ireadit.nrepl
|
||||
(:require [nrepl.server :as nrepl]
|
||||
[clojure.tools.logging :as log]))
|
||||
|
||||
|
|
|
@ -1,45 +1,22 @@
|
|||
(ns ireadit.routes.services
|
||||
(:require [ring.util.http-response :refer :all]
|
||||
[cemerick.url :refer (url-decode)]
|
||||
[compojure.api.sweet :refer :all]
|
||||
[ireadit.tesseractor :refer [ocr]]
|
||||
[schema.core :as s]))
|
||||
|
||||
(def service-routes
|
||||
(api
|
||||
{:swagger {:ui "/swagger-ui"
|
||||
:spec "/swagger.json"
|
||||
:data {:info {:version "1.0.0"
|
||||
:title "Sample API"
|
||||
:description "Sample Services"}}}}
|
||||
|
||||
(context "/api" []
|
||||
:tags ["thingie"]
|
||||
|
||||
(GET "/plus" []
|
||||
:return Long
|
||||
:query-params [x :- Long, {y :- Long 1}]
|
||||
:summary "x+y with query-parameters. y defaults to 1."
|
||||
(ok (+ x y)))
|
||||
{:swagger {:ui "/swagger-ui"
|
||||
:spec "/swagger.json"
|
||||
:data {:info {:version "1.0.0"
|
||||
:title "Sample API"
|
||||
:description "Sample Services"}}}}
|
||||
|
||||
(POST "/minus" []
|
||||
:return Long
|
||||
:body-params [x :- Long, y :- Long]
|
||||
:summary "x-y with body-parameters."
|
||||
(ok (- x y)))
|
||||
(context "/api" []
|
||||
:tags ["tesseractor"]
|
||||
|
||||
(GET "/times/:x/:y" []
|
||||
:return Long
|
||||
:path-params [x :- Long, y :- Long]
|
||||
:summary "x*y with path-parameters"
|
||||
(ok (* x y)))
|
||||
|
||||
(POST "/divide" []
|
||||
:return Double
|
||||
:form-params [x :- Long, y :- Long]
|
||||
:summary "x/y with form-parameters"
|
||||
(ok (/ x y)))
|
||||
|
||||
(GET "/power" []
|
||||
:return Long
|
||||
:header-params [x :- Long, y :- Long]
|
||||
:summary "x^y with header-parameters"
|
||||
(ok (long (Math/pow x y)))))))
|
||||
(POST "/ocr/:uri" []
|
||||
:return String
|
||||
:path-params [uri :- String]
|
||||
(ocr (url-decode uri))))))
|
||||
|
|
65
src/clj/ireadit/tesseractor.clj
Normal file
65
src/clj/ireadit/tesseractor.clj
Normal file
|
@ -0,0 +1,65 @@
|
|||
(ns ^{:doc "Meme transcriber: actual OCR interface"
|
||||
:author "Simon Brooke"}
|
||||
ireadit.tesseractor
|
||||
(:require [clojure.java.io :as io]
|
||||
[clojure.tools.logging :as log]
|
||||
[ireadit.config :refer [env]])
|
||||
(:import net.sourceforge.tess4j.Tesseract
|
||||
java.io.File
|
||||
java.net.URL
|
||||
javax.imageio.ImageIO))
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;;;
|
||||
;;;; ireadit.tesseractor: actual OCR interface.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU General Public License
|
||||
;;;; as published by the Free Software Foundation; either version 2
|
||||
;;;; of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this program; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
|
||||
;;;; USA.
|
||||
;;;;
|
||||
;;;; Copyright (C) 2016 Simon Brooke for Radical Independence Campaign
|
||||
;;;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;;; Cribbed partly from https://github.com/hugoArregui/tesseract-clojure
|
||||
|
||||
;; (def tesseract-data-dir "/usr/share/tessdata")
|
||||
;; (def language "eng")
|
||||
;; (def test-file "eurotext.png")
|
||||
|
||||
(defn prepare-tesseract [data-path]
|
||||
(let [t (Tesseract.)]
|
||||
(.setDatapath t data-path)
|
||||
t))
|
||||
|
||||
|
||||
(def tesseractor (prepare-tesseract (:tess-data env)))
|
||||
|
||||
(defn ocr
|
||||
"Perform optical charactor representation on `imgage` using the OCR engine
|
||||
`t`, assuming the ISO 639-3 language `lang`, and return any text found as a
|
||||
string. `image` may be supplied as a `File`, as `BufferedImage`, or as a
|
||||
string, in which case it will be treated as a URL."
|
||||
([image]
|
||||
(ocr image tesseractor))
|
||||
([image t]
|
||||
(ocr image t "eng"))
|
||||
([image t lang]
|
||||
(let [img (if
|
||||
(string? image)
|
||||
(ImageIO/read (URL. image))
|
||||
image)]
|
||||
(.setLanguage t lang)
|
||||
(.doOCR t img))))
|
||||
|
|
@ -36,7 +36,7 @@
|
|||
(rf/reg-event-fx
|
||||
:fetch-transcription
|
||||
(fn [{db :db} _]
|
||||
(let [uri (str "http://loriner.journeyman.cc:8888/v1/tesseract/" (url-encode (:url db)))]
|
||||
(let [uri (str "/api/ocr/" (url-encode (:url db)))]
|
||||
(js/console.log
|
||||
(str
|
||||
"Fetching transcription data: " uri))
|
||||
|
|
Loading…
Reference in a new issue