Simplified the parser slightly, and now generating a subset of ADL.

This commit is contained in:
Simon Brooke 2018-02-14 17:51:38 +00:00
parent ad252a0d78
commit 3dc84787b5
5 changed files with 186 additions and 31 deletions

2
.gitignore vendored
View file

@ -6,3 +6,5 @@ Leiningen.gitignore
target/classes/META-INF/maven/squirrel-parse/squirrel-parse/pom\.properties
target/
\.lein-failures

View file

@ -103,7 +103,7 @@
There will be some differences from ANSI 92. Rules are generally expressed as single
strings. Order is not very significant, but instaparse treats the first rule as special."
(list
"STATEMENTS := OPT-SPACE STATEMENT | OPT-SPACE STATEMENT OPT-SPACE STATEMENTS ;"
"STATEMENTS := OPT-SPACE STATEMENT + ;"
"STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;"
"ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;"
@ -112,7 +112,7 @@
;; taken from https://www.postgresql.org/docs/10/static/sql-createsequence.html
"SEQUENCE-DECL := KW-CREATE PERMANENCE KW-SEQUENCE EXISTENCE NAME SEQ-SPEC-ELEMENTS TERMINATOR ;"
"SEQ-SPEC-ELEMENTS := SEQ-SPEC-ELEMENT | SEQ-SPEC-ELEMENT SEQ-SPEC-ELEMENTS ;"
"SEQ-SPEC-ELEMENTS := SEQ-SPEC-ELEMENT + ;"
"SEQ-SPEC-ELEMENT := SEQ-SPEC-AS | SEQ-SPEC-INCREMENT | SEQ-SPEC-MIN | SEQ-SPEC-MAX | SEQ-SPEC-START | SEQ-SPEC-CACHE | SEQ-SPEC-CYCLE | SEQ-SPEC-OWNER ;"
"SEQ-SPEC-AS := KW-AS DATATYPE ;"
"SEQ-SPEC-INCREMENT := INT-VAL | KW-INCREMENT INT-VAL | KW-INCREMENT KW-BY INT-VAL ;"
@ -126,7 +126,7 @@
;; from https://www.postgresql.org/docs/10/static/sql-altersequence.html
"ALTER-SEQUENCE := KW-ALTER KW-SEQUENCE EXISTENCE NAME ALTER-SEQ-ELEMENTS TERMINATOR ;"
"ALTER-SEQ-ELEMENTS := ALTER-SEQ-ELEMENT | ALTER-SEQ-ELEMENT ALTER-SEQ-ELEMENTS ;"
"ALTER-SEQ-ELEMENTS := ALTER-SEQ-ELEMENT + ;"
"ALTER-SEQ-ELEMENT := SEQ-SPEC-ELEMENT | OWNER-TO | SEQ-RENAME | SEQ-SET-SCHEMA ;"
"OWNER-TO := KW-OWNER KW-TO NAME ;"
"SEQ-RENAME := KW-RENAME KW-TO NAME ;"
@ -138,11 +138,11 @@
;; taken from https://www.postgresql.org/docs/10/static/sql-createtable.html
;; but by no means all of that is implemented.
"TABLE-DECL := KW-CREATE PERMANENCE KW-TABLE EXISTENCE NAME LPAR TABLE-SPEC-ELEMENTS RPAR TERMINATOR ;"
"TABLE-SPEC-ELEMENTS := TABLE-SPEC-ELEMENT | TABLE-SPEC-ELEMENT COMMA TABLE-SPEC-ELEMENTS ; "
"TABLE-SPEC-ELEMENTS := TABLE-SPEC-ELEMENT +; "
"TABLE-SPEC-ELEMENT := COLUMN-SPEC | TABLE-CONSTRAINT ;"
"COLUMN-SPEC := NAME SPACE DATATYPE | COLUMN-SPEC OPT-SPACE COLUMN-CONSTRAINTS ;"
"TABLE-CONSTRAINT := 'TODO' ;"
"COLUMN-CONSTRAINTS := COLUMN-CONSTRAINT | COLUMN-CONSTRAINT SPACE COLUMN-CONSTRAINT ;"
"COLUMN-SPEC := NAME SPACE DATATYPE COLUMN-CONSTRAINTS OPT-COMMA;"
"TABLE-CONSTRAINT := 'TODO' OPT-COMMA ;"
"COLUMN-CONSTRAINTS := COLUMN-CONSTRAINT * ;"
"COLUMN-CONSTRAINT := KW-CONSTRAINT NAME COLUMN-CONSTRAINT | NOT-NULL-CC | NULL-CC | DEFAULT-CC | UNIQUE-CC | PRIMARY-CC | REFERENCES-CC ;"
"NOT-NULL-CC := KW-NOT KW-NULL ;"
"NULL-CC := KW-NULL ;"
@ -150,7 +150,7 @@
"UNIQUE-CC := KW-UNIQUE INDEX-PARAMS ;"
"PRIMARY-CC := KW-PRIMARY KW-KEY INDEX-PARAMS ;"
"REFERENCES-CC := KW-REFERENCES NAME LPAR NAMES RPAR REF-DIRECTIVES | KW-FOREIGN KW-KEY LPAR NAMES RPAR REFERENCES-CC;"
"REF-DIRECTIVES := '' | REF-DIRECTIVE REF-DIRECTIVES ;"
"REF-DIRECTIVES := REF-DIRECTIVE * ;"
"REF-DIRECTIVE := REF-MATCH | REF-ON-UPDATE | REF-ON-DELETE ;"
"REF-MATCH := KW-MATCH MATCH-TYPE ;"
"REF-ON-DELETE := KW-ON KW-DELETE REF-ACTION ;"
@ -163,7 +163,7 @@
"ONLY := KW-ONLY | '' ;"
"ALTER-TABLE := KW-ALTER KW-TABLE EXISTENCE ONLY QUAL-NAME OPT-SPACE ALTER-TABLE-ELEMENTS TERMINATOR ;"
"ALTER-TABLE-ELEMENTS := ALTER-TABLE-ELEMENT | ALTER-TABLE-ELEMENT OPT-SPACE ALTER-TABLE-ELEMENTS ;"
"ALTER-TABLE-ELEMENTS := ALTER-TABLE-ELEMENT + ;"
"ALTER-TABLE-ELEMENT := OWNER-TO | ALTER-COLUMN | ADD-CONSTRAINT;"
"ALTER-COLUMN := KW-ALTER KW-COLUMN NAME ALTER-COL-SPEC ;"
"ALTER-COL-SPEC := ALTER-COL-TYPE | ALTER-COL-DFLT;"
@ -171,13 +171,12 @@
"ALTER-COL-DFLT := KW-SET KW-DEFAULT EXPRESSION ;"
"ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;"
"OPT-KW-DATA := KW-DATA | '' ;"
;; "TABLE-ADD-COL :=
"PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;"
"REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;"
"GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;"
"PERMISSIONS := PERMISSION | PERMISSION COMMA PERMISSIONS ;"
"PERMISSIONS := PERMISSION COMMA * ;"
"PERMISSION := KW-ALL | KW-SELECT | KW-INSERT | KW-UPDATE | KW-DELETE ;"
"OPT-KW-SCHEMA := KW-SCHEMA | '' ;"
@ -187,7 +186,7 @@
"EXPRESSION := VALUE | KEYWORD | FUNCTION | LPAR EXPRESSION RPAR;"
"FUNCTION := NAME LPAR VALUES RPAR | NAME LPAR RPAR ;"
;; TODO: same for values.
"VALUES := VALUE | VALUE COMMA VALUES ;"
"VALUES := VALUE COMMA *;"
"VALUE := INT-VAL | CHAR-VAL | TRUTH-VAL | KW-NULL | NAMES | VALUE '::' NAME;"
"INT-VAL := #'[0-9]+' ;"
"CHAR-VAL := #'\\047.*\\047' ;"
@ -198,6 +197,7 @@
"LPAR := OPT-SPACE '(' OPT-SPACE ;"
"RPAR := OPT-SPACE ')' OPT-SPACE ; "
"EQUALS := OPT-SPACE '=' OPT-SPACE ;"
"OPT-COMMA := COMMA | '' ;"
;; OPT-SPACE is optional space - it's acceptable as OPT-SPACE if there are no whitespace characters. Comments are
;; also acceptable wherever OPT-SPACE is acceptable
"OPT-SPACE := #'\\s*' | OPT-SPACE COMMENT OPT-SPACE;"

View file

@ -32,7 +32,7 @@
"True if `x` is something which just clutters up the parse tree."
[x]
(and
(coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA} (first x))))
(coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA :TERMINATOR} (first x))))
(defn remove-recursive
@ -46,25 +46,8 @@
%)
(remove predicate collection)))
(defn flatten-statements
[parse-tree]
(if
(and (coll? parse-tree) (not (empty? parse-tree)))
(if
(= (first parse-tree) :STATEMENTS)
(cond
(>= (count parse-tree) 3)
(cons (nth parse-tree 1) (flatten-statements (nth parse-tree 2)))
(>= (count parse-tree) 2)
(list (nth parse-tree 1))
true
())
parse-tree)
parse-tree))
(defn simplify [parse-tree]
(flatten-statements
(remove-recursive ignorable? parse-tree)))
(remove-recursive ignorable? parse-tree))

View file

@ -0,0 +1,155 @@
(ns ^{:doc "A parser for SQL: generate Application Description Language."
:author "Simon Brooke"}
squirrel-parse.to-adl
(:require [clojure.xml :refer [emit-element]]
[squirrel-parse.parser :refer [parse]]
[squirrel-parse.simplify :refer [simplify]]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;
;;;; squirrel-parse.to-adl: generate Application Description Language.
;;;;
;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU General Public License
;;;; as published by the Free Software Foundation; either version 2
;;;; of the License, or (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this program; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
;;;; USA.
;;;;
;;;; Copyright (C) 2018 Simon Brooke
;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def sql-datatype-to-adl-datatype
"Map to convert SQL datatypes to the nearest ADL equivalent."
{:DT-BIGINT :integer
:DT-BIGSERIAL :integer
:DT-BIT :integer
:DT-BOOLEAN :boolean
:DT-BYTEA :unsupported
:DT-DATE :date
:DT-DOUBLE-PRECISION :real
:DT-FLOAT :real
:DT-INTEGER :integer
:DT-MONEY :money
:DT-NUMERIC :real
:DT-REAL :real
:DT-SERIAL :integer
:DT-TEXT :string
:DT-CHAR :string
:DT-CHARACTER :string
:DT-CHARACTER-VARYING :string
:DT-VARCHAR :string
:DT-TIME :string
:DT-TIMESTAMP :timestamp
:DT-INTERVAL :unsupported
})
(defn is-subtree-of-type?
"Is this `subtree` a parser subtree of the specified `type`, expected to be a keyword?"
[subtree type]
(and (coll? subtree) (= (first subtree) type)))
(defn is-create-table-statement?
"Is this statement a create table statement?"
[statement]
(and
(is-subtree-of-type? statement :STATEMENT)
(is-subtree-of-type? (second statement) :TABLE-DECL)))
(defn get-children-of-type [subtree type]
(if
(coll? subtree)
(remove
nil?
(map
#(if
(and (coll? %) (= (first %) type))
%)
subtree))))
(defn get-first-child-of-type [subtree type]
(first (get-children-of-type subtree type)))
(defn get-name
"Return the value the first top-level :NAME element of this `subtree`."
[subtree]
(let [name-elt (make-property subtree :NAME)]
(if name-elt (second name-elt))))
(defn get-column-datatype
"Get the datatype of this column specification."
[column-spec]
(let [datatype-spec (get-first-child-of-type column-spec :DATATYPE)
sql-datatype (first (second datatype-spec))]
(sql-datatype-to-adl-datatype sql-datatype)))
(defn make-property
"Make an ADL property representing this column specification."
[column-spec]
(if
(is-subtree-of-type? (second column-spec) :COLUMN-SPEC)
(make-property (second column-spec))
{:tag :property
:attrs
{
:name (get-name column-spec)
:type (get-column-datatype column-spec)
}}
))
(defn make-entity [table-decl]
"Make an ADL entity representing this table declaration"
{:tag :entity
:name (get-name table-decl)
:content
(apply
vector
(map
make-property
(remove
nil?
(map
#(if
(and
(is-subtree-of-type? % :TABLE-SPEC-ELEMENT)
(is-subtree-of-type? (second %) :COLUMN-SPEC))
(second %))
(get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))})
(defn table-definition-to-entity
"Return a map like this `map` with, if this `statement` is a table declaration,
an ADL entity representing that table added to it."
[entity-map statement]
(if
(is-create-table-statement? statement)
(let [table-decl (second statement)
table-name (get-name table-decl)]
(merge entity-map {table-name (make-entity table-decl)}))
entity-map))
(defn table-definitions-to-entities
"Extract table definitions from these `statements` as a map of ADL
entities indexed by name."
([statements]
(reduce table-definition-to-entity {} statements)))
(defn to-adl [filename name]
(let [entities (table-definitions-to-entities (simplify (parse (slurp filename))))]
[{:tag :application
:name name
:content (vals entities)}
nil]))

View file

@ -0,0 +1,15 @@
(ns squirrel-parse.to-adl-test
(:require [clojure.test :refer :all]
[squirrel-parse.to-adl :refer :all]))
(deftest test-get-column-datatype
(testing "Testing whether right datatype is returned for a valid column"
(let [column-spec '(:COLUMN-SPEC
(:NAME "longitude")
(:DATATYPE (:DT-REAL "real"))
(:COLUMN-CONSTRAINTS)
(:OPT-COMMA))
actual (get-column-datatype column-spec)
expected :real]
(is (= expected actual)))))