HTML to Markdown very largely working.
This commit is contained in:
parent
b406ef92c0
commit
7f50863d83
157
src/html_to_md/html_to_md.clj
Normal file
157
src/html_to_md/html_to_md.clj
Normal file
|
@ -0,0 +1,157 @@
|
|||
(ns html-to-md.html-to-md
|
||||
(:require
|
||||
[clojure.string :as s]
|
||||
[net.cgrand.enlive-html :as html]
|
||||
[html-to-md.transformer :refer [process]]))
|
||||
|
||||
(defn markdown-a
|
||||
"Process the anchor element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"["
|
||||
(map #(process % d) (:content e))
|
||||
"]("
|
||||
(-> e :attrs :href)
|
||||
")"))))
|
||||
|
||||
(defn markdown-strong
|
||||
"Process the strong emphasis element `e` into markdown, using dispatcher
|
||||
`d`."
|
||||
[e d]
|
||||
(str
|
||||
"**"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"**"))
|
||||
|
||||
(defn markdown-div
|
||||
"Process the division element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n" (map #(process % d) (:content e)) "\n"))))
|
||||
|
||||
(defn markdown-em
|
||||
"Process the emphasis element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(str
|
||||
"*"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"*"))
|
||||
|
||||
(defn markdown-header
|
||||
"Process the header element `e` into markdown, with level `level`,
|
||||
using dispatcher `d`."
|
||||
[e d level]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"\n"
|
||||
(take level (repeat "#"))
|
||||
" "
|
||||
(map #(process % d) (:content e))
|
||||
"\n"))))
|
||||
|
||||
(defn markdown-h1
|
||||
"Process the header element `e` into markdown, with level 1, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 1))
|
||||
|
||||
(defn markdown-h2
|
||||
"Process the header element `e` into markdown, with level 2, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 2))
|
||||
|
||||
(defn markdown-h3
|
||||
"Process the header element `e` into markdown, with level 3, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 3))
|
||||
|
||||
(defn markdown-h4
|
||||
"Process the header element `e` into markdown, with level 4, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 4))
|
||||
|
||||
(defn markdown-h5
|
||||
"Process the header element `e` into markdown, with level 5, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 5))
|
||||
|
||||
(defn markdown-h6
|
||||
"Process the header element `e` into markdown, with level 6, using
|
||||
dispatcher `d`."
|
||||
[e d]
|
||||
(markdown-header e d 6))
|
||||
|
||||
(defn markdown-html
|
||||
"Process this HTML element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(apply str (process (html/select e [:body]) d) ))
|
||||
|
||||
(defn markdown-img
|
||||
"Process this image element `e` into markdown, using dispatcher `d`."
|
||||
[e d]
|
||||
(str " ")"))
|
||||
|
||||
(defn markdown-ol
|
||||
"Process this ordered list element `e` into markdown, using dispatcher
|
||||
`d`."
|
||||
[e d]
|
||||
(str
|
||||
"\n"
|
||||
(apply str
|
||||
(doall
|
||||
(map
|
||||
#(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n" (inc %2) ". " (process %1 d))))
|
||||
(:content e)
|
||||
(range))))
|
||||
"\n\n"))
|
||||
|
||||
(defn markdown-ul
|
||||
"Process this unordered list element `e` into markdown, using dispatcher
|
||||
`d`."
|
||||
[e d]
|
||||
(str
|
||||
"\n"
|
||||
(apply str
|
||||
(doall
|
||||
(map
|
||||
#(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n* " (process % d))))
|
||||
(:content e))))
|
||||
"\n\n"))
|
||||
|
||||
|
||||
(def markdown-dispatcher
|
||||
{:a markdown-a
|
||||
:b markdown-strong
|
||||
:div markdown-div
|
||||
:em markdown-em
|
||||
:h1 markdown-h1
|
||||
:h2 markdown-h2
|
||||
:h3 markdown-h3
|
||||
:h4 markdown-h4
|
||||
:h5 markdown-h5
|
||||
:h6 markdown-h6
|
||||
:html markdown-html
|
||||
:i markdown-em
|
||||
:img markdown-img
|
||||
:ol markdown-ol
|
||||
:strong markdown-strong
|
||||
:ul markdown-ul
|
||||
})
|
||||
|
|
@ -1,86 +1,8 @@
|
|||
(ns html-to-md.transformer
|
||||
(:require
|
||||
[clojure.string :as s]
|
||||
[net.cgrand.enlive-html :as html]
|
||||
[net.cgrand.tagsoup :as tagsoup]))
|
||||
|
||||
(declare process)
|
||||
|
||||
(defn markdown-a
|
||||
"Process the anchor element `e` into markdown"
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"["
|
||||
(map #(process % d) (:content e))
|
||||
"]("
|
||||
(-> e :attrs :href)
|
||||
")"))))
|
||||
|
||||
(defn markdown-strong
|
||||
[e d]
|
||||
;; same as `:strong`, q.v.
|
||||
(str
|
||||
"**"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"**"))
|
||||
|
||||
(defn markdown-div
|
||||
[e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n" (map #(process % d) (:content e)) "\n"))))
|
||||
|
||||
|
||||
(def markdown-dispatcher
|
||||
{:a markdown-a
|
||||
:b markdown-strong
|
||||
:div markdown-div
|
||||
:em (fn [e d]
|
||||
;; same as `:i`, q.v.
|
||||
(str
|
||||
"*"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"*"))
|
||||
:h1 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n# " (map #(process % d) (:content e)) "\n"))))
|
||||
:h2 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n## " (map #(process % d) (:content e)) "\n"))))
|
||||
:h3 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list "\n### " (map #(process % d) (:content e)) "\n"))))
|
||||
:h4 (fn [e d]
|
||||
(apply
|
||||
str
|
||||
(flatten
|
||||
(list
|
||||
"\n#### "
|
||||
(map #(process % d) (:content e))
|
||||
"\n"))))
|
||||
:h5 (fn [e d]
|
||||
(apply
|
||||
str (flatten (list "\n##### " (map #(process % d) (:content e)) "\n"))))
|
||||
:h6 (fn [e d] (apply str (flatten (list "\n###### " (map #(process % d) (:content e)) "\n"))))
|
||||
:html (fn [e d] (apply str (process (html/select e [:body]) d) ))
|
||||
:i (fn [e d] (str "*" (s/trim (apply str (map #(process % d) (:content e)))) "*"))
|
||||
:img (fn [e d] (str " ")"))
|
||||
:strong (fn [e d]
|
||||
(str
|
||||
"**"
|
||||
(s/trim (apply str (map #(process % d) (:content e))))
|
||||
"**"))
|
||||
})
|
||||
|
||||
(defn process
|
||||
"Process this `element`, assumed to be a [HT|SG|X]ML element in Enlive
|
||||
|
@ -109,26 +31,21 @@
|
|||
(defmulti transform
|
||||
"Transform the `obj` which is my first argument using the `dispatcher`
|
||||
which is my second argument."
|
||||
(fn [obj dispatcher] (type obj)) :default :default)
|
||||
[class class] :default :default)
|
||||
|
||||
(defmethod transform :default [obj dispatcher]
|
||||
(process obj dispatcher))
|
||||
|
||||
(defmethod transform java.net.URI [uri dispatcher]
|
||||
(defmethod transform [java.net.URI Object] [uri dispatcher]
|
||||
(process (html/html-resource uri) dispatcher))
|
||||
|
||||
(defmethod transform java.net.URL [url dispatcher]
|
||||
(defmethod transform [java.net.URL Object] [url dispatcher]
|
||||
(transform (.toURI url) dispatcher))
|
||||
|
||||
(defmethod transform String [s dispatcher]
|
||||
(defmethod transform [String Object] [s dispatcher]
|
||||
(let [url (try (java.net.URL. s) (catch Exception any))]
|
||||
(if url (transform url dispatcher)
|
||||
;; otherwise, if s is not a URL, consider it as an HTML fragment,
|
||||
;; parse and process it
|
||||
(process (tagsoup/parser (java.io.StringReader s)) dispatcher)
|
||||
)))
|
||||
|
||||
(process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher)
|
||||
|
||||
|
||||
(transform "<h1>Hello dere!</h1>" markdown-despatcher)
|
||||
|
|
|
@ -2,6 +2,6 @@
|
|||
(:require [clojure.test :refer :all]
|
||||
[html-to-md.core :refer :all]))
|
||||
|
||||
(deftest a-test
|
||||
(testing "FIXME, I fail."
|
||||
(is (= 0 1))))
|
||||
;; (deftest a-test
|
||||
;; (testing "FIXME, I fail."
|
||||
;; (is (= 0 1))))
|
||||
|
|
105
test/html_to_md/html_to_md_test.clj
Normal file
105
test/html_to_md/html_to_md_test.clj
Normal file
|
@ -0,0 +1,105 @@
|
|||
(ns html-to-md.html-to-md-test
|
||||
(:require [clojure.test :refer :all]
|
||||
[html-to-md.transformer :refer [process]]
|
||||
[html-to-md.html-to-md :refer :all]))
|
||||
|
||||
(deftest a-test
|
||||
(testing "Anchor tag."
|
||||
(let [expected "[Hello dere!](http://foo.bar)"
|
||||
actual (process {:tag :a :attrs {:href "http://foo.bar"} :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest b-test
|
||||
(testing "Bold tag."
|
||||
(let [expected "**Hello dere!**"
|
||||
actual (process {:tag :b :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual))))
|
||||
(testing "STRONG emphasis tag."
|
||||
(let [expected "**Hello dere!**"
|
||||
actual (process {:tag :strong :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest div-test
|
||||
(testing "DIVision tag."
|
||||
(let [expected "\nHello dere!\n"
|
||||
actual (process {:tag :div :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest em-test
|
||||
(testing "EMphasis tag."
|
||||
(let [expected "*Hello dere!*"
|
||||
actual (process {:tag :em :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual))))
|
||||
(testing "Italics tag"
|
||||
(let [expected "*Hello dere!*"
|
||||
actual (process {:tag :i :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest h1-test
|
||||
(testing "Level 1 header tag."
|
||||
(let [expected "\n# Hello dere!\n"
|
||||
actual (process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest h2-test
|
||||
(testing "Level 2 header tag."
|
||||
(let [expected "\n## Hello dere!\n"
|
||||
actual (process {:tag :h2 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest h3-test
|
||||
(testing "Level 3 header tag."
|
||||
(let [expected "\n### Hello dere!\n"
|
||||
actual (process {:tag :h3 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest h4-test
|
||||
(testing "Level 4 header tag."
|
||||
(let [expected "\n#### Hello dere!\n"
|
||||
actual (process {:tag :h4 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest h5-test
|
||||
(testing "Level 5 header tag."
|
||||
(let [expected "\n##### Hello dere!\n"
|
||||
actual (process {:tag :h5 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest h6-test
|
||||
(testing "Level 6 header tag."
|
||||
(let [expected "\n###### Hello dere!\n"
|
||||
actual (process {:tag :h6 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest img-test
|
||||
(testing "Image tag."
|
||||
(let [expected ""
|
||||
actual (process
|
||||
{:tag :img
|
||||
:attrs {:src "http://foo.bar/image.png"
|
||||
:alt "Hello dere!"}}
|
||||
markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
||||
(deftest list-test
|
||||
(testing "ordered list tag."
|
||||
(let [expected "\n\n1. foo\n2. bar\n3. ban\n\n"
|
||||
actual (process
|
||||
{:tag :ol
|
||||
:content
|
||||
[{:tag :li :content ["foo"]}
|
||||
{:tag :li :content ["bar"]}
|
||||
{:tag :li :content ["ban"]}]}
|
||||
markdown-dispatcher)]
|
||||
(is (= expected actual))))
|
||||
(testing "umordered list tag."
|
||||
(let [expected "\n\n* foo\n* bar\n* ban\n\n"
|
||||
actual (process
|
||||
{:tag :ul
|
||||
:content
|
||||
[{:tag :li :content ["foo"]}
|
||||
{:tag :li :content ["bar"]}
|
||||
{:tag :li :content ["ban"]}]}
|
||||
markdown-dispatcher)]
|
||||
(is (= expected actual)))))
|
||||
|
Loading…
Reference in a new issue