From 81a7337eb3d9866f85fef30465466027b4bd7ce5 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Tue, 30 Apr 2019 18:35:34 +0100 Subject: [PATCH] Lots of improvements from running against real live tag soup --- project.clj | 3 +- src/html_to_md/html_to_md.clj | 50 +++++++++++++++++------------ src/html_to_md/transformer.clj | 2 +- test/html_to_md/html_to_md_test.clj | 2 +- 4 files changed, 33 insertions(+), 24 deletions(-) diff --git a/project.clj b/project.clj index 654fd11..4edbf3a 100644 --- a/project.clj +++ b/project.clj @@ -5,6 +5,7 @@ :url "http://www.eclipse.org/legal/epl-v10.html"} :dependencies [[org.clojure/clojure "1.8.0"] [enlive "1.1.6"]] - :plugins [[lein-codox "0.10.3"]] + :plugins [[lein-codox "0.10.3"] + [lein-release "1.0.5"]] :lein-release {:deploy-via :clojars} :signing {:gpg-key "Simon Brooke (Stultus in monte) "}) diff --git a/src/html_to_md/html_to_md.clj b/src/html_to_md/html_to_md.clj index 5bc9716..662a23d 100644 --- a/src/html_to_md/html_to_md.clj +++ b/src/html_to_md/html_to_md.clj @@ -7,15 +7,18 @@ (defn markdown-a "Process the anchor element `e` into markdown, using dispatcher `d`." [e d] - (apply - str - (flatten - (list - "[" - (map #(process % d) (:content e)) - "](" - (-> e :attrs :href) - ")")))) + (str + "[" + (s/trim (apply str (process (:content e) d))) + "](" + (-> e :attrs :href) + ")")) + +(defn markdown-br + "Process the line-break element `e`, so beloved of tag-soupers, into + markdown" + [e d] + "\n\n") (defn markdown-code "Process the code or samp `e` into markdown, using dispatcher `d`." @@ -51,15 +54,12 @@ "Process the header element `e` into markdown, with level `level`, using dispatcher `d`." [e d level] - (apply - str - (flatten - (list - "\n" - (take level (repeat "#")) - " " - (map #(process % d) (:content e)) - "\n")))) + (str + "\n" + (apply str (take level (repeat "#"))) + " " + (s/trim (apply str (process (:content e) d))) + "\n")) (defn markdown-h1 "Process the header element `e` into markdown, with level 1, using @@ -105,7 +105,7 @@ (defn markdown-img "Process this image element `e` into markdown, using dispatcher `d`." [e d] - (str "![" (-> e :attrs :alt) "](" (-> e :attrs :src) ")")) + (str "![image: " (-> e :attrs :alt) "](" (-> e :attrs :src) ")")) (defn markdown-ol "Process this ordered list element `e` into markdown, using dispatcher @@ -120,10 +120,15 @@ str (flatten (list "\n" (inc %2) ". " (process %1 d)))) - (:content e) + (html/select e [:li]) (range)))) "\n\n")) +(defn markdown-omit + "Don't process the element `e` into markdown, but return `nil`." + [e d] + nil) + (defn markdown-pre "Process the preformatted emphasis element `e` into markdown, using dispatcher `d`." @@ -155,13 +160,14 @@ str (flatten (list "\n* " (process % d)))) - (:content e)))) + (html/select e [:li])))) "\n\n")) (def markdown-dispatcher {:a markdown-a :b markdown-strong + :br markdown-br :code markdown-code :body markdown-default :div markdown-div @@ -179,8 +185,10 @@ :p markdown-div :pre markdown-pre :samp markdown-code + :script markdown-omit :span markdown-default :strong markdown-strong + :style markdown-omit :ul markdown-ul }) diff --git a/src/html_to_md/transformer.clj b/src/html_to_md/transformer.clj index b08ef9d..931343c 100644 --- a/src/html_to_md/transformer.clj +++ b/src/html_to_md/transformer.clj @@ -29,7 +29,7 @@ (string? element) element (or (seq? element) (vector? element)) - (map #(process % dispatcher) element))) + (doall (map #(process % dispatcher) element)))) (defn- transformer-dispatch [a _] diff --git a/test/html_to_md/html_to_md_test.clj b/test/html_to_md/html_to_md_test.clj index 0778ec7..b328976 100644 --- a/test/html_to_md/html_to_md_test.clj +++ b/test/html_to_md/html_to_md_test.clj @@ -73,7 +73,7 @@ (deftest img-test (testing "Image tag." - (let [expected "![Hello dere!](http://foo.bar/image.png)" + (let [expected "![image: Hello dere!](http://foo.bar/image.png)" actual (process {:tag :img :attrs {:src "http://foo.bar/image.png"