HTML to Markdown very largely working.
This commit is contained in:
parent
b406ef92c0
commit
7f50863d83
4 changed files with 269 additions and 90 deletions
157
src/html_to_md/html_to_md.clj
Normal file
157
src/html_to_md/html_to_md.clj
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
(ns html-to-md.html-to-md
|
||||
(:require
|
||||
[clojure.string :as s]
|
||||
[net.cgrand.enlive-html :as html]
|
||||
[html-to-md.transformer :refer [process]]))
|
||||
|
||||
(defn markdown-a
|
||||
"Process the anchor element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"["
|
||||
(map #(process % d) (:content e))
|
||||
"]("
|
||||
(-> e :attrs :href)
|
||||
")"))))
|
||||
|
||||
(defn markdown-strong
|
||||
"Process the strong emphasis element `e` into markdown, using dispatcher
|
||||
`d`."
|
||||
[e d]
|
||||
(str
|
||||
"**"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"**"))
|
||||
|
||||
(defn markdown-div
|
||||
"Process the division element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n" (map #(process % d) (:content e)) "\n"))))
|
||||
|
||||
(defn markdown-em
|
||||
"Process the emphasis element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(str
|
||||
"*"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"*"))
|
||||
|
||||
(defn markdown-header
|
||||
"Process the header element `e` into markdown, with level `level`,
|
||||
using dispatcher `d`."
|
||||
[e d level]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"\n"
|
||||
(take level (repeat "#"))
|
||||
" "
|
||||
(map #(process % d) (:content e))
|
||||
"\n"))))
|
||||
|
||||
(defn markdown-h1
|
||||
"Process the header element `e` into markdown, with level 1, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 1))
|
||||
|
||||
(defn markdown-h2
|
||||
"Process the header element `e` into markdown, with level 2, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 2))
|
||||
|
||||
(defn markdown-h3
|
||||
"Process the header element `e` into markdown, with level 3, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 3))
|
||||
|
||||
(defn markdown-h4
|
||||
"Process the header element `e` into markdown, with level 4, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 4))
|
||||
|
||||
(defn markdown-h5
|
||||
"Process the header element `e` into markdown, with level 5, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 5))
|
||||
|
||||
(defn markdown-h6
|
||||
"Process the header element `e` into markdown, with level 6, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 6))
|
||||
|
||||
(defn markdown-html
|
||||
"Process this HTML element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(apply str (process (html/select e [:body]) d) ))
|
||||
|
||||
(defn markdown-img
|
||||
"Process this image element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(str " ")"))
|
||||
|
||||
(defn markdown-ol
|
||||
"Process this ordered list element `e` into markdown, using dispatcher
|
||||
`d`."
|
||||
[e d]
|
||||
(str
|
||||
"\n"
|
||||
(apply str
|
||||
(doall
|
||||
(map
|
||||
#(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n" (inc %2) ". " (process %1 d))))
|
||||
(:content e)
|
||||
(range))))
|
||||
"\n\n"))
|
||||
|
||||
(defn markdown-ul
|
||||
"Process this unordered list element `e` into markdown, using dispatcher
|
||||
`d`."
|
||||
[e d]
|
||||
(str
|
||||
"\n"
|
||||
(apply str
|
||||
(doall
|
||||
(map
|
||||
#(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n* " (process % d))))
|
||||
(:content e))))
|
||||
"\n\n"))
|
||||
|
||||
|
||||
(def markdown-dispatcher
|
||||
{:a markdown-a
|
||||
:b markdown-strong
|
||||
:div markdown-div
|
||||
:em markdown-em
|
||||
:h1 markdown-h1
|
||||
:h2 markdown-h2
|
||||
:h3 markdown-h3
|
||||
:h4 markdown-h4
|
||||
:h5 markdown-h5
|
||||
:h6 markdown-h6
|
||||
:html markdown-html
|
||||
:i markdown-em
|
||||
:img markdown-img
|
||||
:ol markdown-ol
|
||||
:strong markdown-strong
|
||||
:ul markdown-ul
|
||||
})
|
||||
|
||||
|
|
@ -1,86 +1,8 @@
|
|||
(ns html-to-md.transformer
|
||||
(:require
|
||||
[clojure.string :as s]
|
||||
[net.cgrand.enlive-html :as html]
|
||||
[net.cgrand.tagsoup :as tagsoup]))
|
||||
|
||||
(declare process)
|
||||
|
||||
(defn markdown-a
|
||||
"Process the anchor element `e` into markdown"
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"["
|
||||
(map #(process % d) (:content e))
|
||||
"]("
|
||||
(-> e :attrs :href)
|
||||
")"))))
|
||||
|
||||
(defn markdown-strong
|
||||
[e d]
|
||||
;; same as `:strong`, q.v.
|
||||
(str
|
||||
"**"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"**"))
|
||||
|
||||
(defn markdown-div
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n" (map #(process % d) (:content e)) "\n"))))
|
||||
|
||||
|
||||
(def markdown-dispatcher
|
||||
{:a markdown-a
|
||||
:b markdown-strong
|
||||
:div markdown-div
|
||||
:em (fn [e d]
|
||||
;; same as `:i`, q.v.
|
||||
(str
|
||||
"*"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"*"))
|
||||
:h1 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n# " (map #(process % d) (:content e)) "\n"))))
|
||||
:h2 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n## " (map #(process % d) (:content e)) "\n"))))
|
||||
:h3 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n### " (map #(process % d) (:content e)) "\n"))))
|
||||
:h4 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"\n#### "
|
||||
(map #(process % d) (:content e))
|
||||
"\n"))))
|
||||
:h5 (fn [e d]
|
||||
(apply
|
||||
str (flatten (list "\n##### " (map #(process % d) (:content e)) "\n"))))
|
||||
:h6 (fn [e d] (apply str (flatten (list "\n###### " (map #(process % d) (:content e)) "\n"))))
|
||||
:html (fn [e d] (apply str (process (html/select e [:body]) d) ))
|
||||
:i (fn [e d] (str "*" (s/trim (apply str (map #(process % d) (:content e)))) "*"))
|
||||
:img (fn [e d] (str " ")"))
|
||||
:strong (fn [e d]
|
||||
(str
|
||||
"**"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"**"))
|
||||
})
|
||||
|
||||
(defn process
|
||||
"Process this `element`, assumed to be a [HT|SG|X]ML element in Enlive
|
||||
|
|
@ -109,26 +31,21 @@
|
|||
(defmulti transform
|
||||
"Transform the `obj` which is my first argument using the `dispatcher`
|
||||
which is my second argument."
|
||||
(fn [obj dispatcher] (type obj)) :default :default)
|
||||
[class class] :default :default)
|
||||
|
||||
(defmethod transform :default [obj dispatcher]
|
||||
(process obj dispatcher))
|
||||
|
||||
(defmethod transform java.net.URI [uri dispatcher]
|
||||
(defmethod transform [java.net.URI Object] [uri dispatcher]
|
||||
(process (html/html-resource uri) dispatcher))
|
||||
|
||||
(defmethod transform java.net.URL [url dispatcher]
|
||||
(defmethod transform [java.net.URL Object] [url dispatcher]
|
||||
(transform (.toURI url) dispatcher))
|
||||
|
||||
(defmethod transform String [s dispatcher]
|
||||
(defmethod transform [String Object] [s dispatcher]
|
||||
(let [url (try (java.net.URL. s) (catch Exception any))]
|
||||
(if url (transform url dispatcher)
|
||||
;; otherwise, if s is not a URL, consider it as an HTML fragment,
|
||||
;; parse and process it
|
||||
(process (tagsoup/parser (java.io.StringReader s)) dispatcher)
|
||||
)))
|
||||
|
||||
(process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher)
|
||||
|
||||
|
||||
(transform "<h1>Hello dere!</h1>" markdown-despatcher)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue