HTML to Markdown very largely working.
This commit is contained in:
parent
b406ef92c0
commit
7f50863d83
157
src/html_to_md/html_to_md.clj
Normal file
157
src/html_to_md/html_to_md.clj
Normal file
|
@ -0,0 +1,157 @@
|
||||||
|
(ns html-to-md.html-to-md
|
||||||
|
(:require
|
||||||
|
[clojure.string :as s]
|
||||||
|
[net.cgrand.enlive-html :as html]
|
||||||
|
[html-to-md.transformer :refer [process]]))
|
||||||
|
|
||||||
|
(defn markdown-a
|
||||||
|
"Process the anchor element `e` into markdown, using dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(apply
|
||||||
|
str
|
||||||
|
(flatten
|
||||||
|
(list
|
||||||
|
"["
|
||||||
|
(map #(process % d) (:content e))
|
||||||
|
"]("
|
||||||
|
(-> e :attrs :href)
|
||||||
|
")"))))
|
||||||
|
|
||||||
|
(defn markdown-strong
|
||||||
|
"Process the strong emphasis element `e` into markdown, using dispatcher
|
||||||
|
`d`."
|
||||||
|
[e d]
|
||||||
|
(str
|
||||||
|
"**"
|
||||||
|
(s/trim (apply str (map #(process % d) (:content e))))
|
||||||
|
"**"))
|
||||||
|
|
||||||
|
(defn markdown-div
|
||||||
|
"Process the division element `e` into markdown, using dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(apply
|
||||||
|
str
|
||||||
|
(flatten
|
||||||
|
(list "\n" (map #(process % d) (:content e)) "\n"))))
|
||||||
|
|
||||||
|
(defn markdown-em
|
||||||
|
"Process the emphasis element `e` into markdown, using dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(str
|
||||||
|
"*"
|
||||||
|
(s/trim (apply str (map #(process % d) (:content e))))
|
||||||
|
"*"))
|
||||||
|
|
||||||
|
(defn markdown-header
|
||||||
|
"Process the header element `e` into markdown, with level `level`,
|
||||||
|
using dispatcher `d`."
|
||||||
|
[e d level]
|
||||||
|
(apply
|
||||||
|
str
|
||||||
|
(flatten
|
||||||
|
(list
|
||||||
|
"\n"
|
||||||
|
(take level (repeat "#"))
|
||||||
|
" "
|
||||||
|
(map #(process % d) (:content e))
|
||||||
|
"\n"))))
|
||||||
|
|
||||||
|
(defn markdown-h1
|
||||||
|
"Process the header element `e` into markdown, with level 1, using
|
||||||
|
dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(markdown-header e d 1))
|
||||||
|
|
||||||
|
(defn markdown-h2
|
||||||
|
"Process the header element `e` into markdown, with level 2, using
|
||||||
|
dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(markdown-header e d 2))
|
||||||
|
|
||||||
|
(defn markdown-h3
|
||||||
|
"Process the header element `e` into markdown, with level 3, using
|
||||||
|
dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(markdown-header e d 3))
|
||||||
|
|
||||||
|
(defn markdown-h4
|
||||||
|
"Process the header element `e` into markdown, with level 4, using
|
||||||
|
dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(markdown-header e d 4))
|
||||||
|
|
||||||
|
(defn markdown-h5
|
||||||
|
"Process the header element `e` into markdown, with level 5, using
|
||||||
|
dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(markdown-header e d 5))
|
||||||
|
|
||||||
|
(defn markdown-h6
|
||||||
|
"Process the header element `e` into markdown, with level 6, using
|
||||||
|
dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(markdown-header e d 6))
|
||||||
|
|
||||||
|
(defn markdown-html
|
||||||
|
"Process this HTML element `e` into markdown, using dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(apply str (process (html/select e [:body]) d) ))
|
||||||
|
|
||||||
|
(defn markdown-img
|
||||||
|
"Process this image element `e` into markdown, using dispatcher `d`."
|
||||||
|
[e d]
|
||||||
|
(str " ")"))
|
||||||
|
|
||||||
|
(defn markdown-ol
|
||||||
|
"Process this ordered list element `e` into markdown, using dispatcher
|
||||||
|
`d`."
|
||||||
|
[e d]
|
||||||
|
(str
|
||||||
|
"\n"
|
||||||
|
(apply str
|
||||||
|
(doall
|
||||||
|
(map
|
||||||
|
#(apply
|
||||||
|
str
|
||||||
|
(flatten
|
||||||
|
(list "\n" (inc %2) ". " (process %1 d))))
|
||||||
|
(:content e)
|
||||||
|
(range))))
|
||||||
|
"\n\n"))
|
||||||
|
|
||||||
|
(defn markdown-ul
|
||||||
|
"Process this unordered list element `e` into markdown, using dispatcher
|
||||||
|
`d`."
|
||||||
|
[e d]
|
||||||
|
(str
|
||||||
|
"\n"
|
||||||
|
(apply str
|
||||||
|
(doall
|
||||||
|
(map
|
||||||
|
#(apply
|
||||||
|
str
|
||||||
|
(flatten
|
||||||
|
(list "\n* " (process % d))))
|
||||||
|
(:content e))))
|
||||||
|
"\n\n"))
|
||||||
|
|
||||||
|
|
||||||
|
(def markdown-dispatcher
|
||||||
|
{:a markdown-a
|
||||||
|
:b markdown-strong
|
||||||
|
:div markdown-div
|
||||||
|
:em markdown-em
|
||||||
|
:h1 markdown-h1
|
||||||
|
:h2 markdown-h2
|
||||||
|
:h3 markdown-h3
|
||||||
|
:h4 markdown-h4
|
||||||
|
:h5 markdown-h5
|
||||||
|
:h6 markdown-h6
|
||||||
|
:html markdown-html
|
||||||
|
:i markdown-em
|
||||||
|
:img markdown-img
|
||||||
|
:ol markdown-ol
|
||||||
|
:strong markdown-strong
|
||||||
|
:ul markdown-ul
|
||||||
|
})
|
||||||
|
|
|
@ -1,86 +1,8 @@
|
||||||
(ns html-to-md.transformer
|
(ns html-to-md.transformer
|
||||||
(:require
|
(:require
|
||||||
[clojure.string :as s]
|
|
||||||
[net.cgrand.enlive-html :as html]
|
[net.cgrand.enlive-html :as html]
|
||||||
[net.cgrand.tagsoup :as tagsoup]))
|
[net.cgrand.tagsoup :as tagsoup]))
|
||||||
|
|
||||||
(declare process)
|
|
||||||
|
|
||||||
(defn markdown-a
|
|
||||||
"Process the anchor element `e` into markdown"
|
|
||||||
[e d]
|
|
||||||
(apply
|
|
||||||
str
|
|
||||||
(flatten
|
|
||||||
(list
|
|
||||||
"["
|
|
||||||
(map #(process % d) (:content e))
|
|
||||||
"]("
|
|
||||||
(-> e :attrs :href)
|
|
||||||
")"))))
|
|
||||||
|
|
||||||
(defn markdown-strong
|
|
||||||
[e d]
|
|
||||||
;; same as `:strong`, q.v.
|
|
||||||
(str
|
|
||||||
"**"
|
|
||||||
(s/trim (apply str (map #(process % d) (:content e))))
|
|
||||||
"**"))
|
|
||||||
|
|
||||||
(defn markdown-div
|
|
||||||
[e d]
|
|
||||||
(apply
|
|
||||||
str
|
|
||||||
(flatten
|
|
||||||
(list "\n" (map #(process % d) (:content e)) "\n"))))
|
|
||||||
|
|
||||||
|
|
||||||
(def markdown-dispatcher
|
|
||||||
{:a markdown-a
|
|
||||||
:b markdown-strong
|
|
||||||
:div markdown-div
|
|
||||||
:em (fn [e d]
|
|
||||||
;; same as `:i`, q.v.
|
|
||||||
(str
|
|
||||||
"*"
|
|
||||||
(s/trim (apply str (map #(process % d) (:content e))))
|
|
||||||
"*"))
|
|
||||||
:h1 (fn [e d]
|
|
||||||
(apply
|
|
||||||
str
|
|
||||||
(flatten
|
|
||||||
(list "\n# " (map #(process % d) (:content e)) "\n"))))
|
|
||||||
:h2 (fn [e d]
|
|
||||||
(apply
|
|
||||||
str
|
|
||||||
(flatten
|
|
||||||
(list "\n## " (map #(process % d) (:content e)) "\n"))))
|
|
||||||
:h3 (fn [e d]
|
|
||||||
(apply
|
|
||||||
str
|
|
||||||
(flatten
|
|
||||||
(list "\n### " (map #(process % d) (:content e)) "\n"))))
|
|
||||||
:h4 (fn [e d]
|
|
||||||
(apply
|
|
||||||
str
|
|
||||||
(flatten
|
|
||||||
(list
|
|
||||||
"\n#### "
|
|
||||||
(map #(process % d) (:content e))
|
|
||||||
"\n"))))
|
|
||||||
:h5 (fn [e d]
|
|
||||||
(apply
|
|
||||||
str (flatten (list "\n##### " (map #(process % d) (:content e)) "\n"))))
|
|
||||||
:h6 (fn [e d] (apply str (flatten (list "\n###### " (map #(process % d) (:content e)) "\n"))))
|
|
||||||
:html (fn [e d] (apply str (process (html/select e [:body]) d) ))
|
|
||||||
:i (fn [e d] (str "*" (s/trim (apply str (map #(process % d) (:content e)))) "*"))
|
|
||||||
:img (fn [e d] (str " ")"))
|
|
||||||
:strong (fn [e d]
|
|
||||||
(str
|
|
||||||
"**"
|
|
||||||
(s/trim (apply str (map #(process % d) (:content e))))
|
|
||||||
"**"))
|
|
||||||
})
|
|
||||||
|
|
||||||
(defn process
|
(defn process
|
||||||
"Process this `element`, assumed to be a [HT|SG|X]ML element in Enlive
|
"Process this `element`, assumed to be a [HT|SG|X]ML element in Enlive
|
||||||
|
@ -109,26 +31,21 @@
|
||||||
(defmulti transform
|
(defmulti transform
|
||||||
"Transform the `obj` which is my first argument using the `dispatcher`
|
"Transform the `obj` which is my first argument using the `dispatcher`
|
||||||
which is my second argument."
|
which is my second argument."
|
||||||
(fn [obj dispatcher] (type obj)) :default :default)
|
[class class] :default :default)
|
||||||
|
|
||||||
(defmethod transform :default [obj dispatcher]
|
(defmethod transform :default [obj dispatcher]
|
||||||
(process obj dispatcher))
|
(process obj dispatcher))
|
||||||
|
|
||||||
(defmethod transform java.net.URI [uri dispatcher]
|
(defmethod transform [java.net.URI Object] [uri dispatcher]
|
||||||
(process (html/html-resource uri) dispatcher))
|
(process (html/html-resource uri) dispatcher))
|
||||||
|
|
||||||
(defmethod transform java.net.URL [url dispatcher]
|
(defmethod transform [java.net.URL Object] [url dispatcher]
|
||||||
(transform (.toURI url) dispatcher))
|
(transform (.toURI url) dispatcher))
|
||||||
|
|
||||||
(defmethod transform String [s dispatcher]
|
(defmethod transform [String Object] [s dispatcher]
|
||||||
(let [url (try (java.net.URL. s) (catch Exception any))]
|
(let [url (try (java.net.URL. s) (catch Exception any))]
|
||||||
(if url (transform url dispatcher)
|
(if url (transform url dispatcher)
|
||||||
;; otherwise, if s is not a URL, consider it as an HTML fragment,
|
;; otherwise, if s is not a URL, consider it as an HTML fragment,
|
||||||
;; parse and process it
|
;; parse and process it
|
||||||
(process (tagsoup/parser (java.io.StringReader s)) dispatcher)
|
(process (tagsoup/parser (java.io.StringReader s)) dispatcher)
|
||||||
)))
|
)))
|
||||||
|
|
||||||
(process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher)
|
|
||||||
|
|
||||||
|
|
||||||
(transform "<h1>Hello dere!</h1>" markdown-despatcher)
|
|
||||||
|
|
|
@ -2,6 +2,6 @@
|
||||||
(:require [clojure.test :refer :all]
|
(:require [clojure.test :refer :all]
|
||||||
[html-to-md.core :refer :all]))
|
[html-to-md.core :refer :all]))
|
||||||
|
|
||||||
(deftest a-test
|
;; (deftest a-test
|
||||||
(testing "FIXME, I fail."
|
;; (testing "FIXME, I fail."
|
||||||
(is (= 0 1))))
|
;; (is (= 0 1))))
|
||||||
|
|
105
test/html_to_md/html_to_md_test.clj
Normal file
105
test/html_to_md/html_to_md_test.clj
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
(ns html-to-md.html-to-md-test
|
||||||
|
(:require [clojure.test :refer :all]
|
||||||
|
[html-to-md.transformer :refer [process]]
|
||||||
|
[html-to-md.html-to-md :refer :all]))
|
||||||
|
|
||||||
|
(deftest a-test
|
||||||
|
(testing "Anchor tag."
|
||||||
|
(let [expected "[Hello dere!](http://foo.bar)"
|
||||||
|
actual (process {:tag :a :attrs {:href "http://foo.bar"} :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest b-test
|
||||||
|
(testing "Bold tag."
|
||||||
|
(let [expected "**Hello dere!**"
|
||||||
|
actual (process {:tag :b :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual))))
|
||||||
|
(testing "STRONG emphasis tag."
|
||||||
|
(let [expected "**Hello dere!**"
|
||||||
|
actual (process {:tag :strong :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest div-test
|
||||||
|
(testing "DIVision tag."
|
||||||
|
(let [expected "\nHello dere!\n"
|
||||||
|
actual (process {:tag :div :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest em-test
|
||||||
|
(testing "EMphasis tag."
|
||||||
|
(let [expected "*Hello dere!*"
|
||||||
|
actual (process {:tag :em :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual))))
|
||||||
|
(testing "Italics tag"
|
||||||
|
(let [expected "*Hello dere!*"
|
||||||
|
actual (process {:tag :i :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest h1-test
|
||||||
|
(testing "Level 1 header tag."
|
||||||
|
(let [expected "\n# Hello dere!\n"
|
||||||
|
actual (process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest h2-test
|
||||||
|
(testing "Level 2 header tag."
|
||||||
|
(let [expected "\n## Hello dere!\n"
|
||||||
|
actual (process {:tag :h2 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest h3-test
|
||||||
|
(testing "Level 3 header tag."
|
||||||
|
(let [expected "\n### Hello dere!\n"
|
||||||
|
actual (process {:tag :h3 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest h4-test
|
||||||
|
(testing "Level 4 header tag."
|
||||||
|
(let [expected "\n#### Hello dere!\n"
|
||||||
|
actual (process {:tag :h4 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest h5-test
|
||||||
|
(testing "Level 5 header tag."
|
||||||
|
(let [expected "\n##### Hello dere!\n"
|
||||||
|
actual (process {:tag :h5 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest h6-test
|
||||||
|
(testing "Level 6 header tag."
|
||||||
|
(let [expected "\n###### Hello dere!\n"
|
||||||
|
actual (process {:tag :h6 :content ["Hello dere!"]} markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest img-test
|
||||||
|
(testing "Image tag."
|
||||||
|
(let [expected ""
|
||||||
|
actual (process
|
||||||
|
{:tag :img
|
||||||
|
:attrs {:src "http://foo.bar/image.png"
|
||||||
|
:alt "Hello dere!"}}
|
||||||
|
markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
||||||
|
(deftest list-test
|
||||||
|
(testing "ordered list tag."
|
||||||
|
(let [expected "\n\n1. foo\n2. bar\n3. ban\n\n"
|
||||||
|
actual (process
|
||||||
|
{:tag :ol
|
||||||
|
:content
|
||||||
|
[{:tag :li :content ["foo"]}
|
||||||
|
{:tag :li :content ["bar"]}
|
||||||
|
{:tag :li :content ["ban"]}]}
|
||||||
|
markdown-dispatcher)]
|
||||||
|
(is (= expected actual))))
|
||||||
|
(testing "umordered list tag."
|
||||||
|
(let [expected "\n\n* foo\n* bar\n* ban\n\n"
|
||||||
|
actual (process
|
||||||
|
{:tag :ul
|
||||||
|
:content
|
||||||
|
[{:tag :li :content ["foo"]}
|
||||||
|
{:tag :li :content ["bar"]}
|
||||||
|
{:tag :li :content ["ban"]}]}
|
||||||
|
markdown-dispatcher)]
|
||||||
|
(is (= expected actual)))))
|
||||||
|
|
Loading…
Reference in a new issue