Added the blogger scraper.
This commit is contained in:
parent
81a7337eb3
commit
cb801b193f
5 changed files with 140 additions and 20 deletions
|
|
@ -1,6 +1,11 @@
|
|||
(ns html-to-md.core)
|
||||
(ns html-to-md.core
|
||||
(:require [html-to-md.transformer :refer [transform process]]
|
||||
[html-to-md.html-to-md :refer [markdown-dispatcher]]))
|
||||
|
||||
(defn foo
|
||||
"I don't do a whole lot."
|
||||
[x]
|
||||
(println x "Hello, World!"))
|
||||
(defn html-to-md
|
||||
"Transform the HTML document referenced by `url` into Markdown, and write
|
||||
it to `output`, if supplied."
|
||||
([url]
|
||||
(apply str (transform url markdown-dispatcher)))
|
||||
([url output]
|
||||
(spit output (html-to-md url))))
|
||||
|
|
|
|||
|
|
@ -165,6 +165,7 @@
|
|||
|
||||
|
||||
(def markdown-dispatcher
|
||||
"A despatcher for transforming (X)HTML into Markdown."
|
||||
{:a markdown-a
|
||||
:b markdown-strong
|
||||
:br markdown-br
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
(string? element) element
|
||||
(or (seq? element) (vector? element))
|
||||
(doall (map #(process % dispatcher) element))))
|
||||
(remove nil? (map #(process % dispatcher) element))))
|
||||
|
||||
(defn- transformer-dispatch
|
||||
[a _]
|
||||
|
|
@ -45,7 +45,7 @@
|
|||
(process obj dispatcher))
|
||||
|
||||
(defmethod transform java.net.URI [uri dispatcher]
|
||||
(process (html/html-resource uri) dispatcher))
|
||||
(remove nil? (process (html/html-resource uri) dispatcher)))
|
||||
|
||||
(defmethod transform java.net.URL [url dispatcher]
|
||||
(transform (.toURI url) dispatcher))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue