diff --git a/.gitignore b/.gitignore index 4a06faa..5acfe7c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ target/classes/META-INF/maven/squirrel-parse/squirrel-parse/pom\.properties target/ \.lein-failures + +*.dump diff --git a/README.md b/README.md index 8b7588f..ad6450e 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,41 @@ # squirrel-parse -A Clojure library designed to parse SQL files into usable Clojure data structures, for automatic generation of things like [HugSQL](https://www.hugsql.org/) and [SQL Korma](http://sqlkorma.com/) (etc) boilerplate code. +A Clojure library designed to parse SQL files into usable Clojure data +structures, for automatic generation of things like +[HugSQL](https://www.hugsql.org/) and [SQL Korma](http://sqlkorma.com/) +(etc) boilerplate code. ## Usage -You can't use this yet. It doesn't work. When it does work it's likely that it will initially support only the bits of Postgres SQL that I habitually use; however I hope that it will be sufficiently adaptable that you can tailor it for your preferred variety of SQL. +This is not production ready code as yet. Nevertheless if you want a +sensible entry point, look at the two example functions in `squirrel-parse.core`. + + + +## Status + +What is here at present is proof-of-concept code. It does sort-of +work, for a limited subset of Postgres SQL. But it's pretty fragile +and the main issue is that slurping a whole SQL dump at a time tends +to make `instaparse` crash out of memory - even for seriously large +amounts of memory. + +Line-by-line parsing won't work because SQL statements tend to span +multiple lines. So what's needed is + +1. Clear the input buffer; +2. If the input stream is at end of stream, terminate; +3. Read a line from the stream and append it to the input buffer; +4. Attempt to parse a statement; +5. If successful, append the parsed statement to the current list of +parsed statements and go to 1; else go to 2. + +It may also be desirable to split the grammar into modules, each of +which is capable of parsing one sort of statement. This would limit +the memory cost of a parse operation, at the expense of requiring many +more parse operations. + +Obviously all this is doable but I'm not there yet! ## License diff --git a/src/squirrel_parse/core.clj b/src/squirrel_parse/core.clj index 0c94fea..ad09901 100644 --- a/src/squirrel_parse/core.clj +++ b/src/squirrel_parse/core.clj @@ -1,6 +1,16 @@ -(ns squirrel-parse.core) +(ns squirrel-parse.core + (:require [squirrel-parse.parser :refer [parse]] + [squirrel-parse.simplify :refer [simplify]] + [squirrel-parse.to-adl :refer [to-adl]])) -(defn foo - "I don't do a whole lot." - [x] - (println x "Hello, World!")) +;;; This is get-you-started code. + +(defn parsed-statements-from-file + "Parses the file of SQL commands indicated by `filename`, and returns a sequence of parsed statements." + [filename] + (simplify (parse (slurp filename)))) + +(defn mappy-structure-from-file + "Parses the file of SQL commands indicated by `filename`, and returns a more useful map of maps." + [filename] + (table-definitions-to-entities (parsed-statements-from-file filename))) diff --git a/src/squirrel_parse/to_adl.clj b/src/squirrel_parse/to_adl.clj index bcdbf54..9e556b5 100644 --- a/src/squirrel_parse/to_adl.clj +++ b/src/squirrel_parse/to_adl.clj @@ -6,7 +6,7 @@ [clj-time.format :refer [formatters unparse]] [squirrel-parse.parser :refer [parse]] [squirrel-parse.simplify :refer [simplify]] -;; [squirrel-parse.utils :refer [deep-merge]] + [squirrel-parse.utils :refer [deep-merge]] )) @@ -288,6 +288,8 @@ (defn to-adl-xml + "Turn `object`, a fragment of the mappy sort of structure created + by `table-definitions-to-entities`, into serialisable XML" [object] (cond (keyword? object)