Simplified the parser slightly, and now generating a subset of ADL.

This commit is contained in:
Simon Brooke 2018-02-14 17:51:38 +00:00
parent ad252a0d78
commit 3dc84787b5
5 changed files with 186 additions and 31 deletions

2
.gitignore vendored
View file

@ -6,3 +6,5 @@ Leiningen.gitignore
target/classes/META-INF/maven/squirrel-parse/squirrel-parse/pom\.properties target/classes/META-INF/maven/squirrel-parse/squirrel-parse/pom\.properties
target/ target/
\.lein-failures

View file

@ -103,7 +103,7 @@
There will be some differences from ANSI 92. Rules are generally expressed as single There will be some differences from ANSI 92. Rules are generally expressed as single
strings. Order is not very significant, but instaparse treats the first rule as special." strings. Order is not very significant, but instaparse treats the first rule as special."
(list (list
"STATEMENTS := OPT-SPACE STATEMENT | OPT-SPACE STATEMENT OPT-SPACE STATEMENTS ;" "STATEMENTS := OPT-SPACE STATEMENT + ;"
"STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;" "STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;"
"ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;" "ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;"
@ -112,7 +112,7 @@
;; taken from https://www.postgresql.org/docs/10/static/sql-createsequence.html ;; taken from https://www.postgresql.org/docs/10/static/sql-createsequence.html
"SEQUENCE-DECL := KW-CREATE PERMANENCE KW-SEQUENCE EXISTENCE NAME SEQ-SPEC-ELEMENTS TERMINATOR ;" "SEQUENCE-DECL := KW-CREATE PERMANENCE KW-SEQUENCE EXISTENCE NAME SEQ-SPEC-ELEMENTS TERMINATOR ;"
"SEQ-SPEC-ELEMENTS := SEQ-SPEC-ELEMENT | SEQ-SPEC-ELEMENT SEQ-SPEC-ELEMENTS ;" "SEQ-SPEC-ELEMENTS := SEQ-SPEC-ELEMENT + ;"
"SEQ-SPEC-ELEMENT := SEQ-SPEC-AS | SEQ-SPEC-INCREMENT | SEQ-SPEC-MIN | SEQ-SPEC-MAX | SEQ-SPEC-START | SEQ-SPEC-CACHE | SEQ-SPEC-CYCLE | SEQ-SPEC-OWNER ;" "SEQ-SPEC-ELEMENT := SEQ-SPEC-AS | SEQ-SPEC-INCREMENT | SEQ-SPEC-MIN | SEQ-SPEC-MAX | SEQ-SPEC-START | SEQ-SPEC-CACHE | SEQ-SPEC-CYCLE | SEQ-SPEC-OWNER ;"
"SEQ-SPEC-AS := KW-AS DATATYPE ;" "SEQ-SPEC-AS := KW-AS DATATYPE ;"
"SEQ-SPEC-INCREMENT := INT-VAL | KW-INCREMENT INT-VAL | KW-INCREMENT KW-BY INT-VAL ;" "SEQ-SPEC-INCREMENT := INT-VAL | KW-INCREMENT INT-VAL | KW-INCREMENT KW-BY INT-VAL ;"
@ -126,7 +126,7 @@
;; from https://www.postgresql.org/docs/10/static/sql-altersequence.html ;; from https://www.postgresql.org/docs/10/static/sql-altersequence.html
"ALTER-SEQUENCE := KW-ALTER KW-SEQUENCE EXISTENCE NAME ALTER-SEQ-ELEMENTS TERMINATOR ;" "ALTER-SEQUENCE := KW-ALTER KW-SEQUENCE EXISTENCE NAME ALTER-SEQ-ELEMENTS TERMINATOR ;"
"ALTER-SEQ-ELEMENTS := ALTER-SEQ-ELEMENT | ALTER-SEQ-ELEMENT ALTER-SEQ-ELEMENTS ;" "ALTER-SEQ-ELEMENTS := ALTER-SEQ-ELEMENT + ;"
"ALTER-SEQ-ELEMENT := SEQ-SPEC-ELEMENT | OWNER-TO | SEQ-RENAME | SEQ-SET-SCHEMA ;" "ALTER-SEQ-ELEMENT := SEQ-SPEC-ELEMENT | OWNER-TO | SEQ-RENAME | SEQ-SET-SCHEMA ;"
"OWNER-TO := KW-OWNER KW-TO NAME ;" "OWNER-TO := KW-OWNER KW-TO NAME ;"
"SEQ-RENAME := KW-RENAME KW-TO NAME ;" "SEQ-RENAME := KW-RENAME KW-TO NAME ;"
@ -138,11 +138,11 @@
;; taken from https://www.postgresql.org/docs/10/static/sql-createtable.html ;; taken from https://www.postgresql.org/docs/10/static/sql-createtable.html
;; but by no means all of that is implemented. ;; but by no means all of that is implemented.
"TABLE-DECL := KW-CREATE PERMANENCE KW-TABLE EXISTENCE NAME LPAR TABLE-SPEC-ELEMENTS RPAR TERMINATOR ;" "TABLE-DECL := KW-CREATE PERMANENCE KW-TABLE EXISTENCE NAME LPAR TABLE-SPEC-ELEMENTS RPAR TERMINATOR ;"
"TABLE-SPEC-ELEMENTS := TABLE-SPEC-ELEMENT | TABLE-SPEC-ELEMENT COMMA TABLE-SPEC-ELEMENTS ; " "TABLE-SPEC-ELEMENTS := TABLE-SPEC-ELEMENT +; "
"TABLE-SPEC-ELEMENT := COLUMN-SPEC | TABLE-CONSTRAINT ;" "TABLE-SPEC-ELEMENT := COLUMN-SPEC | TABLE-CONSTRAINT ;"
"COLUMN-SPEC := NAME SPACE DATATYPE | COLUMN-SPEC OPT-SPACE COLUMN-CONSTRAINTS ;" "COLUMN-SPEC := NAME SPACE DATATYPE COLUMN-CONSTRAINTS OPT-COMMA;"
"TABLE-CONSTRAINT := 'TODO' ;" "TABLE-CONSTRAINT := 'TODO' OPT-COMMA ;"
"COLUMN-CONSTRAINTS := COLUMN-CONSTRAINT | COLUMN-CONSTRAINT SPACE COLUMN-CONSTRAINT ;" "COLUMN-CONSTRAINTS := COLUMN-CONSTRAINT * ;"
"COLUMN-CONSTRAINT := KW-CONSTRAINT NAME COLUMN-CONSTRAINT | NOT-NULL-CC | NULL-CC | DEFAULT-CC | UNIQUE-CC | PRIMARY-CC | REFERENCES-CC ;" "COLUMN-CONSTRAINT := KW-CONSTRAINT NAME COLUMN-CONSTRAINT | NOT-NULL-CC | NULL-CC | DEFAULT-CC | UNIQUE-CC | PRIMARY-CC | REFERENCES-CC ;"
"NOT-NULL-CC := KW-NOT KW-NULL ;" "NOT-NULL-CC := KW-NOT KW-NULL ;"
"NULL-CC := KW-NULL ;" "NULL-CC := KW-NULL ;"
@ -150,7 +150,7 @@
"UNIQUE-CC := KW-UNIQUE INDEX-PARAMS ;" "UNIQUE-CC := KW-UNIQUE INDEX-PARAMS ;"
"PRIMARY-CC := KW-PRIMARY KW-KEY INDEX-PARAMS ;" "PRIMARY-CC := KW-PRIMARY KW-KEY INDEX-PARAMS ;"
"REFERENCES-CC := KW-REFERENCES NAME LPAR NAMES RPAR REF-DIRECTIVES | KW-FOREIGN KW-KEY LPAR NAMES RPAR REFERENCES-CC;" "REFERENCES-CC := KW-REFERENCES NAME LPAR NAMES RPAR REF-DIRECTIVES | KW-FOREIGN KW-KEY LPAR NAMES RPAR REFERENCES-CC;"
"REF-DIRECTIVES := '' | REF-DIRECTIVE REF-DIRECTIVES ;" "REF-DIRECTIVES := REF-DIRECTIVE * ;"
"REF-DIRECTIVE := REF-MATCH | REF-ON-UPDATE | REF-ON-DELETE ;" "REF-DIRECTIVE := REF-MATCH | REF-ON-UPDATE | REF-ON-DELETE ;"
"REF-MATCH := KW-MATCH MATCH-TYPE ;" "REF-MATCH := KW-MATCH MATCH-TYPE ;"
"REF-ON-DELETE := KW-ON KW-DELETE REF-ACTION ;" "REF-ON-DELETE := KW-ON KW-DELETE REF-ACTION ;"
@ -163,7 +163,7 @@
"ONLY := KW-ONLY | '' ;" "ONLY := KW-ONLY | '' ;"
"ALTER-TABLE := KW-ALTER KW-TABLE EXISTENCE ONLY QUAL-NAME OPT-SPACE ALTER-TABLE-ELEMENTS TERMINATOR ;" "ALTER-TABLE := KW-ALTER KW-TABLE EXISTENCE ONLY QUAL-NAME OPT-SPACE ALTER-TABLE-ELEMENTS TERMINATOR ;"
"ALTER-TABLE-ELEMENTS := ALTER-TABLE-ELEMENT | ALTER-TABLE-ELEMENT OPT-SPACE ALTER-TABLE-ELEMENTS ;" "ALTER-TABLE-ELEMENTS := ALTER-TABLE-ELEMENT + ;"
"ALTER-TABLE-ELEMENT := OWNER-TO | ALTER-COLUMN | ADD-CONSTRAINT;" "ALTER-TABLE-ELEMENT := OWNER-TO | ALTER-COLUMN | ADD-CONSTRAINT;"
"ALTER-COLUMN := KW-ALTER KW-COLUMN NAME ALTER-COL-SPEC ;" "ALTER-COLUMN := KW-ALTER KW-COLUMN NAME ALTER-COL-SPEC ;"
"ALTER-COL-SPEC := ALTER-COL-TYPE | ALTER-COL-DFLT;" "ALTER-COL-SPEC := ALTER-COL-TYPE | ALTER-COL-DFLT;"
@ -171,13 +171,12 @@
"ALTER-COL-DFLT := KW-SET KW-DEFAULT EXPRESSION ;" "ALTER-COL-DFLT := KW-SET KW-DEFAULT EXPRESSION ;"
"ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;" "ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;"
"OPT-KW-DATA := KW-DATA | '' ;" "OPT-KW-DATA := KW-DATA | '' ;"
;; "TABLE-ADD-COL :=
"PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;" "PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;"
"REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;" "REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;"
"GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;" "GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;"
"PERMISSIONS := PERMISSION | PERMISSION COMMA PERMISSIONS ;" "PERMISSIONS := PERMISSION COMMA * ;"
"PERMISSION := KW-ALL | KW-SELECT | KW-INSERT | KW-UPDATE | KW-DELETE ;" "PERMISSION := KW-ALL | KW-SELECT | KW-INSERT | KW-UPDATE | KW-DELETE ;"
"OPT-KW-SCHEMA := KW-SCHEMA | '' ;" "OPT-KW-SCHEMA := KW-SCHEMA | '' ;"
@ -187,7 +186,7 @@
"EXPRESSION := VALUE | KEYWORD | FUNCTION | LPAR EXPRESSION RPAR;" "EXPRESSION := VALUE | KEYWORD | FUNCTION | LPAR EXPRESSION RPAR;"
"FUNCTION := NAME LPAR VALUES RPAR | NAME LPAR RPAR ;" "FUNCTION := NAME LPAR VALUES RPAR | NAME LPAR RPAR ;"
;; TODO: same for values. ;; TODO: same for values.
"VALUES := VALUE | VALUE COMMA VALUES ;" "VALUES := VALUE COMMA *;"
"VALUE := INT-VAL | CHAR-VAL | TRUTH-VAL | KW-NULL | NAMES | VALUE '::' NAME;" "VALUE := INT-VAL | CHAR-VAL | TRUTH-VAL | KW-NULL | NAMES | VALUE '::' NAME;"
"INT-VAL := #'[0-9]+' ;" "INT-VAL := #'[0-9]+' ;"
"CHAR-VAL := #'\\047.*\\047' ;" "CHAR-VAL := #'\\047.*\\047' ;"
@ -198,6 +197,7 @@
"LPAR := OPT-SPACE '(' OPT-SPACE ;" "LPAR := OPT-SPACE '(' OPT-SPACE ;"
"RPAR := OPT-SPACE ')' OPT-SPACE ; " "RPAR := OPT-SPACE ')' OPT-SPACE ; "
"EQUALS := OPT-SPACE '=' OPT-SPACE ;" "EQUALS := OPT-SPACE '=' OPT-SPACE ;"
"OPT-COMMA := COMMA | '' ;"
;; OPT-SPACE is optional space - it's acceptable as OPT-SPACE if there are no whitespace characters. Comments are ;; OPT-SPACE is optional space - it's acceptable as OPT-SPACE if there are no whitespace characters. Comments are
;; also acceptable wherever OPT-SPACE is acceptable ;; also acceptable wherever OPT-SPACE is acceptable
"OPT-SPACE := #'\\s*' | OPT-SPACE COMMENT OPT-SPACE;" "OPT-SPACE := #'\\s*' | OPT-SPACE COMMENT OPT-SPACE;"

View file

@ -32,7 +32,7 @@
"True if `x` is something which just clutters up the parse tree." "True if `x` is something which just clutters up the parse tree."
[x] [x]
(and (and
(coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA} (first x)))) (coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA :TERMINATOR} (first x))))
(defn remove-recursive (defn remove-recursive
@ -46,25 +46,8 @@
%) %)
(remove predicate collection))) (remove predicate collection)))
(defn flatten-statements
[parse-tree]
(if
(and (coll? parse-tree) (not (empty? parse-tree)))
(if
(= (first parse-tree) :STATEMENTS)
(cond
(>= (count parse-tree) 3)
(cons (nth parse-tree 1) (flatten-statements (nth parse-tree 2)))
(>= (count parse-tree) 2)
(list (nth parse-tree 1))
true
())
parse-tree)
parse-tree))
(defn simplify [parse-tree] (defn simplify [parse-tree]
(flatten-statements (remove-recursive ignorable? parse-tree))
(remove-recursive ignorable? parse-tree)))

View file

@ -0,0 +1,155 @@
(ns ^{:doc "A parser for SQL: generate Application Description Language."
:author "Simon Brooke"}
squirrel-parse.to-adl
(:require [clojure.xml :refer [emit-element]]
[squirrel-parse.parser :refer [parse]]
[squirrel-parse.simplify :refer [simplify]]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;
;;;; squirrel-parse.to-adl: generate Application Description Language.
;;;;
;;;; This program is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU General Public License
;;;; as published by the Free Software Foundation; either version 2
;;;; of the License, or (at your option) any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this program; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
;;;; USA.
;;;;
;;;; Copyright (C) 2018 Simon Brooke
;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def sql-datatype-to-adl-datatype
"Map to convert SQL datatypes to the nearest ADL equivalent."
{:DT-BIGINT :integer
:DT-BIGSERIAL :integer
:DT-BIT :integer
:DT-BOOLEAN :boolean
:DT-BYTEA :unsupported
:DT-DATE :date
:DT-DOUBLE-PRECISION :real
:DT-FLOAT :real
:DT-INTEGER :integer
:DT-MONEY :money
:DT-NUMERIC :real
:DT-REAL :real
:DT-SERIAL :integer
:DT-TEXT :string
:DT-CHAR :string
:DT-CHARACTER :string
:DT-CHARACTER-VARYING :string
:DT-VARCHAR :string
:DT-TIME :string
:DT-TIMESTAMP :timestamp
:DT-INTERVAL :unsupported
})
(defn is-subtree-of-type?
"Is this `subtree` a parser subtree of the specified `type`, expected to be a keyword?"
[subtree type]
(and (coll? subtree) (= (first subtree) type)))
(defn is-create-table-statement?
"Is this statement a create table statement?"
[statement]
(and
(is-subtree-of-type? statement :STATEMENT)
(is-subtree-of-type? (second statement) :TABLE-DECL)))
(defn get-children-of-type [subtree type]
(if
(coll? subtree)
(remove
nil?
(map
#(if
(and (coll? %) (= (first %) type))
%)
subtree))))
(defn get-first-child-of-type [subtree type]
(first (get-children-of-type subtree type)))
(defn get-name
"Return the value the first top-level :NAME element of this `subtree`."
[subtree]
(let [name-elt (make-property subtree :NAME)]
(if name-elt (second name-elt))))
(defn get-column-datatype
"Get the datatype of this column specification."
[column-spec]
(let [datatype-spec (get-first-child-of-type column-spec :DATATYPE)
sql-datatype (first (second datatype-spec))]
(sql-datatype-to-adl-datatype sql-datatype)))
(defn make-property
"Make an ADL property representing this column specification."
[column-spec]
(if
(is-subtree-of-type? (second column-spec) :COLUMN-SPEC)
(make-property (second column-spec))
{:tag :property
:attrs
{
:name (get-name column-spec)
:type (get-column-datatype column-spec)
}}
))
(defn make-entity [table-decl]
"Make an ADL entity representing this table declaration"
{:tag :entity
:name (get-name table-decl)
:content
(apply
vector
(map
make-property
(remove
nil?
(map
#(if
(and
(is-subtree-of-type? % :TABLE-SPEC-ELEMENT)
(is-subtree-of-type? (second %) :COLUMN-SPEC))
(second %))
(get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))})
(defn table-definition-to-entity
"Return a map like this `map` with, if this `statement` is a table declaration,
an ADL entity representing that table added to it."
[entity-map statement]
(if
(is-create-table-statement? statement)
(let [table-decl (second statement)
table-name (get-name table-decl)]
(merge entity-map {table-name (make-entity table-decl)}))
entity-map))
(defn table-definitions-to-entities
"Extract table definitions from these `statements` as a map of ADL
entities indexed by name."
([statements]
(reduce table-definition-to-entity {} statements)))
(defn to-adl [filename name]
(let [entities (table-definitions-to-entities (simplify (parse (slurp filename))))]
[{:tag :application
:name name
:content (vals entities)}
nil]))

View file

@ -0,0 +1,15 @@
(ns squirrel-parse.to-adl-test
(:require [clojure.test :refer :all]
[squirrel-parse.to-adl :refer :all]))
(deftest test-get-column-datatype
(testing "Testing whether right datatype is returned for a valid column"
(let [column-spec '(:COLUMN-SPEC
(:NAME "longitude")
(:DATATYPE (:DT-REAL "real"))
(:COLUMN-CONSTRAINTS)
(:OPT-COMMA))
actual (get-column-datatype column-spec)
expected :real]
(is (= expected actual)))))