diff --git a/src/html_to_md/html_to_md.clj b/src/html_to_md/html_to_md.clj new file mode 100644 index 0000000..3358bde --- /dev/null +++ b/src/html_to_md/html_to_md.clj @@ -0,0 +1,157 @@ +(ns html-to-md.html-to-md + (:require + [clojure.string :as s] + [net.cgrand.enlive-html :as html] + [html-to-md.transformer :refer [process]])) + +(defn markdown-a + "Process the anchor element `e` into markdown, using dispatcher `d`." + [e d] + (apply + str + (flatten + (list + "[" + (map #(process % d) (:content e)) + "](" + (-> e :attrs :href) + ")")))) + +(defn markdown-strong + "Process the strong emphasis element `e` into markdown, using dispatcher + `d`." + [e d] + (str + "**" + (s/trim (apply str (map #(process % d) (:content e)))) + "**")) + +(defn markdown-div + "Process the division element `e` into markdown, using dispatcher `d`." + [e d] + (apply + str + (flatten + (list "\n" (map #(process % d) (:content e)) "\n")))) + +(defn markdown-em + "Process the emphasis element `e` into markdown, using dispatcher `d`." + [e d] + (str + "*" + (s/trim (apply str (map #(process % d) (:content e)))) + "*")) + +(defn markdown-header + "Process the header element `e` into markdown, with level `level`, + using dispatcher `d`." + [e d level] + (apply + str + (flatten + (list + "\n" + (take level (repeat "#")) + " " + (map #(process % d) (:content e)) + "\n")))) + +(defn markdown-h1 + "Process the header element `e` into markdown, with level 1, using + dispatcher `d`." + [e d] + (markdown-header e d 1)) + +(defn markdown-h2 + "Process the header element `e` into markdown, with level 2, using + dispatcher `d`." + [e d] + (markdown-header e d 2)) + +(defn markdown-h3 + "Process the header element `e` into markdown, with level 3, using + dispatcher `d`." + [e d] + (markdown-header e d 3)) + +(defn markdown-h4 + "Process the header element `e` into markdown, with level 4, using + dispatcher `d`." + [e d] + (markdown-header e d 4)) + +(defn markdown-h5 + "Process the header element `e` into markdown, with level 5, using + dispatcher `d`." + [e d] + (markdown-header e d 5)) + +(defn markdown-h6 + "Process the header element `e` into markdown, with level 6, using + dispatcher `d`." + [e d] + (markdown-header e d 6)) + +(defn markdown-html + "Process this HTML element `e` into markdown, using dispatcher `d`." + [e d] + (apply str (process (html/select e [:body]) d) )) + +(defn markdown-img + "Process this image element `e` into markdown, using dispatcher `d`." + [e d] + (str "![" (-> e :attrs :alt) "](" (-> e :attrs :src) ")")) + +(defn markdown-ol + "Process this ordered list element `e` into markdown, using dispatcher + `d`." + [e d] + (str + "\n" + (apply str + (doall + (map + #(apply + str + (flatten + (list "\n" (inc %2) ". " (process %1 d)))) + (:content e) + (range)))) + "\n\n")) + +(defn markdown-ul + "Process this unordered list element `e` into markdown, using dispatcher + `d`." + [e d] + (str + "\n" + (apply str + (doall + (map + #(apply + str + (flatten + (list "\n* " (process % d)))) + (:content e)))) + "\n\n")) + + +(def markdown-dispatcher + {:a markdown-a + :b markdown-strong + :div markdown-div + :em markdown-em + :h1 markdown-h1 + :h2 markdown-h2 + :h3 markdown-h3 + :h4 markdown-h4 + :h5 markdown-h5 + :h6 markdown-h6 + :html markdown-html + :i markdown-em + :img markdown-img + :ol markdown-ol + :strong markdown-strong + :ul markdown-ul + }) + diff --git a/src/html_to_md/transformer.clj b/src/html_to_md/transformer.clj index 82465e1..6a42fc6 100644 --- a/src/html_to_md/transformer.clj +++ b/src/html_to_md/transformer.clj @@ -1,86 +1,8 @@ (ns html-to-md.transformer (:require - [clojure.string :as s] [net.cgrand.enlive-html :as html] [net.cgrand.tagsoup :as tagsoup])) -(declare process) - -(defn markdown-a - "Process the anchor element `e` into markdown" - [e d] - (apply - str - (flatten - (list - "[" - (map #(process % d) (:content e)) - "](" - (-> e :attrs :href) - ")")))) - -(defn markdown-strong - [e d] - ;; same as `:strong`, q.v. - (str - "**" - (s/trim (apply str (map #(process % d) (:content e)))) - "**")) - -(defn markdown-div - [e d] - (apply - str - (flatten - (list "\n" (map #(process % d) (:content e)) "\n")))) - - -(def markdown-dispatcher - {:a markdown-a - :b markdown-strong - :div markdown-div - :em (fn [e d] - ;; same as `:i`, q.v. - (str - "*" - (s/trim (apply str (map #(process % d) (:content e)))) - "*")) - :h1 (fn [e d] - (apply - str - (flatten - (list "\n# " (map #(process % d) (:content e)) "\n")))) - :h2 (fn [e d] - (apply - str - (flatten - (list "\n## " (map #(process % d) (:content e)) "\n")))) - :h3 (fn [e d] - (apply - str - (flatten - (list "\n### " (map #(process % d) (:content e)) "\n")))) - :h4 (fn [e d] - (apply - str - (flatten - (list - "\n#### " - (map #(process % d) (:content e)) - "\n")))) - :h5 (fn [e d] - (apply - str (flatten (list "\n##### " (map #(process % d) (:content e)) "\n")))) - :h6 (fn [e d] (apply str (flatten (list "\n###### " (map #(process % d) (:content e)) "\n")))) - :html (fn [e d] (apply str (process (html/select e [:body]) d) )) - :i (fn [e d] (str "*" (s/trim (apply str (map #(process % d) (:content e)))) "*")) - :img (fn [e d] (str "![" (-> e :attrs :alt) "](" (-> e :attrs :src) ")")) - :strong (fn [e d] - (str - "**" - (s/trim (apply str (map #(process % d) (:content e)))) - "**")) - }) (defn process "Process this `element`, assumed to be a [HT|SG|X]ML element in Enlive @@ -109,26 +31,21 @@ (defmulti transform "Transform the `obj` which is my first argument using the `dispatcher` which is my second argument." - (fn [obj dispatcher] (type obj)) :default :default) + [class class] :default :default) (defmethod transform :default [obj dispatcher] (process obj dispatcher)) -(defmethod transform java.net.URI [uri dispatcher] +(defmethod transform [java.net.URI Object] [uri dispatcher] (process (html/html-resource uri) dispatcher)) -(defmethod transform java.net.URL [url dispatcher] +(defmethod transform [java.net.URL Object] [url dispatcher] (transform (.toURI url) dispatcher)) -(defmethod transform String [s dispatcher] +(defmethod transform [String Object] [s dispatcher] (let [url (try (java.net.URL. s) (catch Exception any))] (if url (transform url dispatcher) ;; otherwise, if s is not a URL, consider it as an HTML fragment, ;; parse and process it (process (tagsoup/parser (java.io.StringReader s)) dispatcher) ))) - -(process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher) - - -(transform "

Hello dere!

" markdown-despatcher) diff --git a/test/html_to_md/core_test.clj b/test/html_to_md/core_test.clj index 17b5a01..7a742d0 100644 --- a/test/html_to_md/core_test.clj +++ b/test/html_to_md/core_test.clj @@ -2,6 +2,6 @@ (:require [clojure.test :refer :all] [html-to-md.core :refer :all])) -(deftest a-test - (testing "FIXME, I fail." - (is (= 0 1)))) +;; (deftest a-test +;; (testing "FIXME, I fail." +;; (is (= 0 1)))) diff --git a/test/html_to_md/html_to_md_test.clj b/test/html_to_md/html_to_md_test.clj new file mode 100644 index 0000000..c4f02b4 --- /dev/null +++ b/test/html_to_md/html_to_md_test.clj @@ -0,0 +1,105 @@ +(ns html-to-md.html-to-md-test + (:require [clojure.test :refer :all] + [html-to-md.transformer :refer [process]] + [html-to-md.html-to-md :refer :all])) + +(deftest a-test + (testing "Anchor tag." + (let [expected "[Hello dere!](http://foo.bar)" + actual (process {:tag :a :attrs {:href "http://foo.bar"} :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest b-test + (testing "Bold tag." + (let [expected "**Hello dere!**" + actual (process {:tag :b :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual)))) + (testing "STRONG emphasis tag." + (let [expected "**Hello dere!**" + actual (process {:tag :strong :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest div-test + (testing "DIVision tag." + (let [expected "\nHello dere!\n" + actual (process {:tag :div :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest em-test + (testing "EMphasis tag." + (let [expected "*Hello dere!*" + actual (process {:tag :em :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual)))) + (testing "Italics tag" + (let [expected "*Hello dere!*" + actual (process {:tag :i :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest h1-test + (testing "Level 1 header tag." + (let [expected "\n# Hello dere!\n" + actual (process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest h2-test + (testing "Level 2 header tag." + (let [expected "\n## Hello dere!\n" + actual (process {:tag :h2 :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest h3-test + (testing "Level 3 header tag." + (let [expected "\n### Hello dere!\n" + actual (process {:tag :h3 :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest h4-test + (testing "Level 4 header tag." + (let [expected "\n#### Hello dere!\n" + actual (process {:tag :h4 :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest h5-test + (testing "Level 5 header tag." + (let [expected "\n##### Hello dere!\n" + actual (process {:tag :h5 :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest h6-test + (testing "Level 6 header tag." + (let [expected "\n###### Hello dere!\n" + actual (process {:tag :h6 :content ["Hello dere!"]} markdown-dispatcher)] + (is (= expected actual))))) + +(deftest img-test + (testing "Image tag." + (let [expected "![Hello dere!](http://foo.bar/image.png)" + actual (process + {:tag :img + :attrs {:src "http://foo.bar/image.png" + :alt "Hello dere!"}} + markdown-dispatcher)] + (is (= expected actual))))) + +(deftest list-test + (testing "ordered list tag." + (let [expected "\n\n1. foo\n2. bar\n3. ban\n\n" + actual (process + {:tag :ol + :content + [{:tag :li :content ["foo"]} + {:tag :li :content ["bar"]} + {:tag :li :content ["ban"]}]} + markdown-dispatcher)] + (is (= expected actual)))) + (testing "umordered list tag." + (let [expected "\n\n* foo\n* bar\n* ban\n\n" + actual (process + {:tag :ul + :content + [{:tag :li :content ["foo"]} + {:tag :li :content ["bar"]} + {:tag :li :content ["ban"]}]} + markdown-dispatcher)] + (is (= expected actual))))) +