Tess4J crashed on MacOs, but basics seem to work

This commit is contained in:
Simon Brooke 2019-02-17 18:40:05 +00:00
parent 0d3efe1323
commit d6afc46cb5
8 changed files with 91 additions and 42 deletions

View file

@ -1,4 +1,6 @@
(ns ireadit.nrepl
(ns ^{:doc "Meme transcriber: command line support"
:author "Simon Brooke"}
ireadit.nrepl
(:require [nrepl.server :as nrepl]
[clojure.tools.logging :as log]))

View file

@ -1,45 +1,22 @@
(ns ireadit.routes.services
(:require [ring.util.http-response :refer :all]
[cemerick.url :refer (url-decode)]
[compojure.api.sweet :refer :all]
[ireadit.tesseractor :refer [ocr]]
[schema.core :as s]))
(def service-routes
(api
{:swagger {:ui "/swagger-ui"
:spec "/swagger.json"
:data {:info {:version "1.0.0"
:title "Sample API"
:description "Sample Services"}}}}
(context "/api" []
:tags ["thingie"]
(GET "/plus" []
:return Long
:query-params [x :- Long, {y :- Long 1}]
:summary "x+y with query-parameters. y defaults to 1."
(ok (+ x y)))
{:swagger {:ui "/swagger-ui"
:spec "/swagger.json"
:data {:info {:version "1.0.0"
:title "Sample API"
:description "Sample Services"}}}}
(POST "/minus" []
:return Long
:body-params [x :- Long, y :- Long]
:summary "x-y with body-parameters."
(ok (- x y)))
(context "/api" []
:tags ["tesseractor"]
(GET "/times/:x/:y" []
:return Long
:path-params [x :- Long, y :- Long]
:summary "x*y with path-parameters"
(ok (* x y)))
(POST "/divide" []
:return Double
:form-params [x :- Long, y :- Long]
:summary "x/y with form-parameters"
(ok (/ x y)))
(GET "/power" []
:return Long
:header-params [x :- Long, y :- Long]
:summary "x^y with header-parameters"
(ok (long (Math/pow x y)))))))
(POST "/ocr/:uri" []
:return String
:path-params [uri :- String]
(ocr (url-decode uri))))))

View file

@ -0,0 +1,65 @@
(ns ^{:doc "Meme transcriber: actual OCR interface"
:author "Simon Brooke"}
ireadit.tesseractor
(:require [clojure.java.io :as io]
[clojure.tools.logging :as log]
[ireadit.config :refer [env]])
(:import net.sourceforge.tess4j.Tesseract
java.io.File
java.net.URL
javax.imageio.ImageIO))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;
;;;; ireadit.tesseractor: actual OCR interface.
;;;;
;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU General Public License
;;;; as published by the Free Software Foundation; either version 2
;;;; of the License, or (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this program; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
;;;; USA.
;;;;
;;;; Copyright (C) 2016 Simon Brooke for Radical Independence Campaign
;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Cribbed partly from https://github.com/hugoArregui/tesseract-clojure
;; (def tesseract-data-dir "/usr/share/tessdata")
;; (def language "eng")
;; (def test-file "eurotext.png")
(defn prepare-tesseract [data-path]
(let [t (Tesseract.)]
(.setDatapath t data-path)
t))
(def tesseractor (prepare-tesseract (:tess-data env)))
(defn ocr
"Perform optical charactor representation on `imgage` using the OCR engine
`t`, assuming the ISO 639-3 language `lang`, and return any text found as a
string. `image` may be supplied as a `File`, as `BufferedImage`, or as a
string, in which case it will be treated as a URL."
([image]
(ocr image tesseractor))
([image t]
(ocr image t "eng"))
([image t lang]
(let [img (if
(string? image)
(ImageIO/read (URL. image))
image)]
(.setLanguage t lang)
(.doOCR t img))))

View file

@ -36,7 +36,7 @@
(rf/reg-event-fx
:fetch-transcription
(fn [{db :db} _]
(let [uri (str "http://loriner.journeyman.cc:8888/v1/tesseract/" (url-encode (:url db)))]
(let [uri (str "/api/ocr/" (url-encode (:url db)))]
(js/console.log
(str
"Fetching transcription data: " uri))