diff --git a/.gitignore b/.gitignore index 83ebb80..4a06faa 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ Leiningen.gitignore target/classes/META-INF/maven/squirrel-parse/squirrel-parse/pom\.properties target/ + +\.lein-failures diff --git a/src/squirrel_parse/parser.clj b/src/squirrel_parse/parser.clj index dfbdce3..764faa8 100644 --- a/src/squirrel_parse/parser.clj +++ b/src/squirrel_parse/parser.clj @@ -103,7 +103,7 @@ There will be some differences from ANSI 92. Rules are generally expressed as single strings. Order is not very significant, but instaparse treats the first rule as special." (list - "STATEMENTS := OPT-SPACE STATEMENT | OPT-SPACE STATEMENT OPT-SPACE STATEMENTS ;" + "STATEMENTS := OPT-SPACE STATEMENT + ;" "STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;" "ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;" @@ -112,7 +112,7 @@ ;; taken from https://www.postgresql.org/docs/10/static/sql-createsequence.html "SEQUENCE-DECL := KW-CREATE PERMANENCE KW-SEQUENCE EXISTENCE NAME SEQ-SPEC-ELEMENTS TERMINATOR ;" - "SEQ-SPEC-ELEMENTS := SEQ-SPEC-ELEMENT | SEQ-SPEC-ELEMENT SEQ-SPEC-ELEMENTS ;" + "SEQ-SPEC-ELEMENTS := SEQ-SPEC-ELEMENT + ;" "SEQ-SPEC-ELEMENT := SEQ-SPEC-AS | SEQ-SPEC-INCREMENT | SEQ-SPEC-MIN | SEQ-SPEC-MAX | SEQ-SPEC-START | SEQ-SPEC-CACHE | SEQ-SPEC-CYCLE | SEQ-SPEC-OWNER ;" "SEQ-SPEC-AS := KW-AS DATATYPE ;" "SEQ-SPEC-INCREMENT := INT-VAL | KW-INCREMENT INT-VAL | KW-INCREMENT KW-BY INT-VAL ;" @@ -126,7 +126,7 @@ ;; from https://www.postgresql.org/docs/10/static/sql-altersequence.html "ALTER-SEQUENCE := KW-ALTER KW-SEQUENCE EXISTENCE NAME ALTER-SEQ-ELEMENTS TERMINATOR ;" - "ALTER-SEQ-ELEMENTS := ALTER-SEQ-ELEMENT | ALTER-SEQ-ELEMENT ALTER-SEQ-ELEMENTS ;" + "ALTER-SEQ-ELEMENTS := ALTER-SEQ-ELEMENT + ;" "ALTER-SEQ-ELEMENT := SEQ-SPEC-ELEMENT | OWNER-TO | SEQ-RENAME | SEQ-SET-SCHEMA ;" "OWNER-TO := KW-OWNER KW-TO NAME ;" "SEQ-RENAME := KW-RENAME KW-TO NAME ;" @@ -138,11 +138,11 @@ ;; taken from https://www.postgresql.org/docs/10/static/sql-createtable.html ;; but by no means all of that is implemented. "TABLE-DECL := KW-CREATE PERMANENCE KW-TABLE EXISTENCE NAME LPAR TABLE-SPEC-ELEMENTS RPAR TERMINATOR ;" - "TABLE-SPEC-ELEMENTS := TABLE-SPEC-ELEMENT | TABLE-SPEC-ELEMENT COMMA TABLE-SPEC-ELEMENTS ; " + "TABLE-SPEC-ELEMENTS := TABLE-SPEC-ELEMENT +; " "TABLE-SPEC-ELEMENT := COLUMN-SPEC | TABLE-CONSTRAINT ;" - "COLUMN-SPEC := NAME SPACE DATATYPE | COLUMN-SPEC OPT-SPACE COLUMN-CONSTRAINTS ;" - "TABLE-CONSTRAINT := 'TODO' ;" - "COLUMN-CONSTRAINTS := COLUMN-CONSTRAINT | COLUMN-CONSTRAINT SPACE COLUMN-CONSTRAINT ;" + "COLUMN-SPEC := NAME SPACE DATATYPE COLUMN-CONSTRAINTS OPT-COMMA;" + "TABLE-CONSTRAINT := 'TODO' OPT-COMMA ;" + "COLUMN-CONSTRAINTS := COLUMN-CONSTRAINT * ;" "COLUMN-CONSTRAINT := KW-CONSTRAINT NAME COLUMN-CONSTRAINT | NOT-NULL-CC | NULL-CC | DEFAULT-CC | UNIQUE-CC | PRIMARY-CC | REFERENCES-CC ;" "NOT-NULL-CC := KW-NOT KW-NULL ;" "NULL-CC := KW-NULL ;" @@ -150,7 +150,7 @@ "UNIQUE-CC := KW-UNIQUE INDEX-PARAMS ;" "PRIMARY-CC := KW-PRIMARY KW-KEY INDEX-PARAMS ;" "REFERENCES-CC := KW-REFERENCES NAME LPAR NAMES RPAR REF-DIRECTIVES | KW-FOREIGN KW-KEY LPAR NAMES RPAR REFERENCES-CC;" - "REF-DIRECTIVES := '' | REF-DIRECTIVE REF-DIRECTIVES ;" + "REF-DIRECTIVES := REF-DIRECTIVE * ;" "REF-DIRECTIVE := REF-MATCH | REF-ON-UPDATE | REF-ON-DELETE ;" "REF-MATCH := KW-MATCH MATCH-TYPE ;" "REF-ON-DELETE := KW-ON KW-DELETE REF-ACTION ;" @@ -163,7 +163,7 @@ "ONLY := KW-ONLY | '' ;" "ALTER-TABLE := KW-ALTER KW-TABLE EXISTENCE ONLY QUAL-NAME OPT-SPACE ALTER-TABLE-ELEMENTS TERMINATOR ;" - "ALTER-TABLE-ELEMENTS := ALTER-TABLE-ELEMENT | ALTER-TABLE-ELEMENT OPT-SPACE ALTER-TABLE-ELEMENTS ;" + "ALTER-TABLE-ELEMENTS := ALTER-TABLE-ELEMENT + ;" "ALTER-TABLE-ELEMENT := OWNER-TO | ALTER-COLUMN | ADD-CONSTRAINT;" "ALTER-COLUMN := KW-ALTER KW-COLUMN NAME ALTER-COL-SPEC ;" "ALTER-COL-SPEC := ALTER-COL-TYPE | ALTER-COL-DFLT;" @@ -171,13 +171,12 @@ "ALTER-COL-DFLT := KW-SET KW-DEFAULT EXPRESSION ;" "ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;" "OPT-KW-DATA := KW-DATA | '' ;" -;; "TABLE-ADD-COL := "PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;" "REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;" "GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;" - "PERMISSIONS := PERMISSION | PERMISSION COMMA PERMISSIONS ;" + "PERMISSIONS := PERMISSION COMMA * ;" "PERMISSION := KW-ALL | KW-SELECT | KW-INSERT | KW-UPDATE | KW-DELETE ;" "OPT-KW-SCHEMA := KW-SCHEMA | '' ;" @@ -187,7 +186,7 @@ "EXPRESSION := VALUE | KEYWORD | FUNCTION | LPAR EXPRESSION RPAR;" "FUNCTION := NAME LPAR VALUES RPAR | NAME LPAR RPAR ;" ;; TODO: same for values. - "VALUES := VALUE | VALUE COMMA VALUES ;" + "VALUES := VALUE COMMA *;" "VALUE := INT-VAL | CHAR-VAL | TRUTH-VAL | KW-NULL | NAMES | VALUE '::' NAME;" "INT-VAL := #'[0-9]+' ;" "CHAR-VAL := #'\\047.*\\047' ;" @@ -198,6 +197,7 @@ "LPAR := OPT-SPACE '(' OPT-SPACE ;" "RPAR := OPT-SPACE ')' OPT-SPACE ; " "EQUALS := OPT-SPACE '=' OPT-SPACE ;" + "OPT-COMMA := COMMA | '' ;" ;; OPT-SPACE is optional space - it's acceptable as OPT-SPACE if there are no whitespace characters. Comments are ;; also acceptable wherever OPT-SPACE is acceptable "OPT-SPACE := #'\\s*' | OPT-SPACE COMMENT OPT-SPACE;" diff --git a/src/squirrel_parse/simplify.clj b/src/squirrel_parse/simplify.clj index 871a683..b65cd27 100644 --- a/src/squirrel_parse/simplify.clj +++ b/src/squirrel_parse/simplify.clj @@ -32,7 +32,7 @@ "True if `x` is something which just clutters up the parse tree." [x] (and - (coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA} (first x)))) + (coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA :TERMINATOR} (first x)))) (defn remove-recursive @@ -46,25 +46,8 @@ %) (remove predicate collection))) -(defn flatten-statements - [parse-tree] - (if - (and (coll? parse-tree) (not (empty? parse-tree))) - (if - (= (first parse-tree) :STATEMENTS) - (cond - (>= (count parse-tree) 3) - (cons (nth parse-tree 1) (flatten-statements (nth parse-tree 2))) - (>= (count parse-tree) 2) - (list (nth parse-tree 1)) - true - ()) - parse-tree) - parse-tree)) - (defn simplify [parse-tree] - (flatten-statements - (remove-recursive ignorable? parse-tree))) + (remove-recursive ignorable? parse-tree)) diff --git a/src/squirrel_parse/to_adl.clj b/src/squirrel_parse/to_adl.clj new file mode 100644 index 0000000..4684a5c --- /dev/null +++ b/src/squirrel_parse/to_adl.clj @@ -0,0 +1,155 @@ +(ns ^{:doc "A parser for SQL: generate Application Description Language." + :author "Simon Brooke"} + squirrel-parse.to-adl + (:require [clojure.xml :refer [emit-element]] + [squirrel-parse.parser :refer [parse]] + [squirrel-parse.simplify :refer [simplify]])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;; +;;;; squirrel-parse.to-adl: generate Application Description Language. +;;;; +;;;; This program is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public License +;;;; as published by the Free Software Foundation; either version 2 +;;;; of the License, or (at your option) any later version. +;;;; +;;;; This program is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;;; GNU General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public License +;;;; along with this program; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, +;;;; USA. +;;;; +;;;; Copyright (C) 2018 Simon Brooke +;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +(def sql-datatype-to-adl-datatype + "Map to convert SQL datatypes to the nearest ADL equivalent." + {:DT-BIGINT :integer + :DT-BIGSERIAL :integer + :DT-BIT :integer + :DT-BOOLEAN :boolean + :DT-BYTEA :unsupported + :DT-DATE :date + :DT-DOUBLE-PRECISION :real + :DT-FLOAT :real + :DT-INTEGER :integer + :DT-MONEY :money + :DT-NUMERIC :real + :DT-REAL :real + :DT-SERIAL :integer + :DT-TEXT :string + :DT-CHAR :string + :DT-CHARACTER :string + :DT-CHARACTER-VARYING :string + :DT-VARCHAR :string + :DT-TIME :string + :DT-TIMESTAMP :timestamp + :DT-INTERVAL :unsupported + }) + +(defn is-subtree-of-type? + "Is this `subtree` a parser subtree of the specified `type`, expected to be a keyword?" + [subtree type] + (and (coll? subtree) (= (first subtree) type))) + +(defn is-create-table-statement? + "Is this statement a create table statement?" + [statement] + (and + (is-subtree-of-type? statement :STATEMENT) + (is-subtree-of-type? (second statement) :TABLE-DECL))) + +(defn get-children-of-type [subtree type] + (if + (coll? subtree) + (remove + nil? + (map + #(if + (and (coll? %) (= (first %) type)) + %) + subtree)))) + +(defn get-first-child-of-type [subtree type] + (first (get-children-of-type subtree type))) + +(defn get-name + "Return the value the first top-level :NAME element of this `subtree`." + [subtree] + (let [name-elt (make-property subtree :NAME)] + (if name-elt (second name-elt)))) + +(defn get-column-datatype + "Get the datatype of this column specification." + [column-spec] + (let [datatype-spec (get-first-child-of-type column-spec :DATATYPE) + sql-datatype (first (second datatype-spec))] + (sql-datatype-to-adl-datatype sql-datatype))) + +(defn make-property + "Make an ADL property representing this column specification." + [column-spec] + (if + (is-subtree-of-type? (second column-spec) :COLUMN-SPEC) + (make-property (second column-spec)) + {:tag :property + :attrs + { + :name (get-name column-spec) + :type (get-column-datatype column-spec) + }} + )) + +(defn make-entity [table-decl] + "Make an ADL entity representing this table declaration" + {:tag :entity + :name (get-name table-decl) + :content + (apply + vector + (map + make-property + (remove + nil? + (map + #(if + (and + (is-subtree-of-type? % :TABLE-SPEC-ELEMENT) + (is-subtree-of-type? (second %) :COLUMN-SPEC)) + (second %)) + (get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))}) + + +(defn table-definition-to-entity + "Return a map like this `map` with, if this `statement` is a table declaration, + an ADL entity representing that table added to it." + [entity-map statement] + (if + (is-create-table-statement? statement) + (let [table-decl (second statement) + table-name (get-name table-decl)] + (merge entity-map {table-name (make-entity table-decl)})) + entity-map)) + +(defn table-definitions-to-entities + "Extract table definitions from these `statements` as a map of ADL + entities indexed by name." + ([statements] + (reduce table-definition-to-entity {} statements))) + + +(defn to-adl [filename name] + (let [entities (table-definitions-to-entities (simplify (parse (slurp filename))))] + [{:tag :application + :name name + :content (vals entities)} + nil])) + diff --git a/test/squirrel_parse/to_adl_test.clj b/test/squirrel_parse/to_adl_test.clj new file mode 100644 index 0000000..0684021 --- /dev/null +++ b/test/squirrel_parse/to_adl_test.clj @@ -0,0 +1,15 @@ +(ns squirrel-parse.to-adl-test + (:require [clojure.test :refer :all] + [squirrel-parse.to-adl :refer :all])) + +(deftest test-get-column-datatype + (testing "Testing whether right datatype is returned for a valid column" + (let [column-spec '(:COLUMN-SPEC + (:NAME "longitude") + (:DATATYPE (:DT-REAL "real")) + (:COLUMN-CONSTRAINTS) + (:OPT-COMMA)) + actual (get-column-datatype column-spec) + expected :real] + + (is (= expected actual)))))