diff --git a/src/html_to_md/html_to_md.clj b/src/html_to_md/html_to_md.clj new file mode 100644 index 0000000..3358bde --- /dev/null +++ b/src/html_to_md/html_to_md.clj @@ -0,0 +1,157 @@ +(ns html-to-md.html-to-md + (:require + [clojure.string :as s] + [net.cgrand.enlive-html :as html] + [html-to-md.transformer :refer [process]])) + +(defn markdown-a + "Process the anchor element `e` into markdown, using dispatcher `d`." + [e d] + (apply + str + (flatten + (list + "[" + (map #(process % d) (:content e)) + "](" + (-> e :attrs :href) + ")")))) + +(defn markdown-strong + "Process the strong emphasis element `e` into markdown, using dispatcher + `d`." + [e d] + (str + "**" + (s/trim (apply str (map #(process % d) (:content e)))) + "**")) + +(defn markdown-div + "Process the division element `e` into markdown, using dispatcher `d`." + [e d] + (apply + str + (flatten + (list "\n" (map #(process % d) (:content e)) "\n")))) + +(defn markdown-em + "Process the emphasis element `e` into markdown, using dispatcher `d`." + [e d] + (str + "*" + (s/trim (apply str (map #(process % d) (:content e)))) + "*")) + +(defn markdown-header + "Process the header element `e` into markdown, with level `level`, + using dispatcher `d`." + [e d level] + (apply + str + (flatten + (list + "\n" + (take level (repeat "#")) + " " + (map #(process % d) (:content e)) + "\n")))) + +(defn markdown-h1 + "Process the header element `e` into markdown, with level 1, using + dispatcher `d`." + [e d] + (markdown-header e d 1)) + +(defn markdown-h2 + "Process the header element `e` into markdown, with level 2, using + dispatcher `d`." + [e d] + (markdown-header e d 2)) + +(defn markdown-h3 + "Process the header element `e` into markdown, with level 3, using + dispatcher `d`." + [e d] + (markdown-header e d 3)) + +(defn markdown-h4 + "Process the header element `e` into markdown, with level 4, using + dispatcher `d`." + [e d] + (markdown-header e d 4)) + +(defn markdown-h5 + "Process the header element `e` into markdown, with level 5, using + dispatcher `d`." + [e d] + (markdown-header e d 5)) + +(defn markdown-h6 + "Process the header element `e` into markdown, with level 6, using + dispatcher `d`." + [e d] + (markdown-header e d 6)) + +(defn markdown-html + "Process this HTML element `e` into markdown, using dispatcher `d`." + [e d] + (apply str (process (html/select e [:body]) d) )) + +(defn markdown-img + "Process this image element `e` into markdown, using dispatcher `d`." + [e d] + (str " ")")) + +(defn markdown-ol + "Process this ordered list element `e` into markdown, using dispatcher + `d`." + [e d] + (str + "\n" + (apply str + (doall + (map + #(apply + str + (flatten + (list "\n" (inc %2) ". " (process %1 d)))) + (:content e) + (range)))) + "\n\n")) + +(defn markdown-ul + "Process this unordered list element `e` into markdown, using dispatcher + `d`." + [e d] + (str + "\n" + (apply str + (doall + (map + #(apply + str + (flatten + (list "\n* " (process % d)))) + (:content e)))) + "\n\n")) + + +(def markdown-dispatcher + {:a markdown-a + :b markdown-strong + :div markdown-div + :em markdown-em + :h1 markdown-h1 + :h2 markdown-h2 + :h3 markdown-h3 + :h4 markdown-h4 + :h5 markdown-h5 + :h6 markdown-h6 + :html markdown-html + :i markdown-em + :img markdown-img + :ol markdown-ol + :strong markdown-strong + :ul markdown-ul + }) + diff --git a/src/html_to_md/transformer.clj b/src/html_to_md/transformer.clj index 82465e1..6a42fc6 100644 --- a/src/html_to_md/transformer.clj +++ b/src/html_to_md/transformer.clj @@ -1,86 +1,8 @@ (ns html-to-md.transformer (:require - [clojure.string :as s] [net.cgrand.enlive-html :as html] [net.cgrand.tagsoup :as tagsoup])) -(declare process) - -(defn markdown-a - "Process the anchor element `e` into markdown" - [e d] - (apply - str - (flatten - (list - "[" - (map #(process % d) (:content e)) - "](" - (-> e :attrs :href) - ")")))) - -(defn markdown-strong - [e d] - ;; same as `:strong`, q.v. - (str - "**" - (s/trim (apply str (map #(process % d) (:content e)))) - "**")) - -(defn markdown-div - [e d] - (apply - str - (flatten - (list "\n" (map #(process % d) (:content e)) "\n")))) - - -(def markdown-dispatcher - {:a markdown-a - :b markdown-strong - :div markdown-div - :em (fn [e d] - ;; same as `:i`, q.v. - (str - "*" - (s/trim (apply str (map #(process % d) (:content e)))) - "*")) - :h1 (fn [e d] - (apply - str - (flatten - (list "\n# " (map #(process % d) (:content e)) "\n")))) - :h2 (fn [e d] - (apply - str - (flatten - (list "\n## " (map #(process % d) (:content e)) "\n")))) - :h3 (fn [e d] - (apply - str - (flatten - (list "\n### " (map #(process % d) (:content e)) "\n")))) - :h4 (fn [e d] - (apply - str - (flatten - (list - "\n#### " - (map #(process % d) (:content e)) - "\n")))) - :h5 (fn [e d] - (apply - str (flatten (list "\n##### " (map #(process % d) (:content e)) "\n")))) - :h6 (fn [e d] (apply str (flatten (list "\n###### " (map #(process % d) (:content e)) "\n")))) - :html (fn [e d] (apply str (process (html/select e [:body]) d) )) - :i (fn [e d] (str "*" (s/trim (apply str (map #(process % d) (:content e)))) "*")) - :img (fn [e d] (str " ")")) - :strong (fn [e d] - (str - "**" - (s/trim (apply str (map #(process % d) (:content e)))) - "**")) - }) (defn process "Process this `element`, assumed to be a [HT|SG|X]ML element in Enlive @@ -109,26 +31,21 @@ (defmulti transform "Transform the `obj` which is my first argument using the `dispatcher` which is my second argument." - (fn [obj dispatcher] (type obj)) :default :default) + [class class] :default :default) (defmethod transform :default [obj dispatcher] (process obj dispatcher)) -(defmethod transform java.net.URI [uri dispatcher] +(defmethod transform [java.net.URI Object] [uri dispatcher] (process (html/html-resource uri) dispatcher)) -(defmethod transform java.net.URL [url dispatcher] +(defmethod transform [java.net.URL Object] [url dispatcher] (transform (.toURI url) dispatcher)) -(defmethod transform String [s dispatcher] +(defmethod transform [String Object] [s dispatcher] (let [url (try (java.net.URL. s) (catch Exception any))] (if url (transform url dispatcher) ;; otherwise, if s is not a URL, consider it as an HTML fragment, ;; parse and process it (process (tagsoup/parser (java.io.StringReader s)) dispatcher) ))) - -(process {:tag :h1 :content ["Hello dere!"]} markdown-dispatcher) - - -(transform "