Greatly improved the simplifier.
What comes out of the parser now contains very little which is redundent.
This commit is contained in:
parent
53aa0379ab
commit
1b990183b3
|
@ -33,13 +33,20 @@
|
||||||
"Keywords from the subset of Postgresql I have so far needed to support. Feel free to either
|
"Keywords from the subset of Postgresql I have so far needed to support. Feel free to either
|
||||||
1. Add further Postrgresql keywords, or
|
1. Add further Postrgresql keywords, or
|
||||||
2. Start a new def for non-postgresql keywords."
|
2. Start a new def for non-postgresql keywords."
|
||||||
["action" "add" "all" "alter" "as" "by" "cache" "cascade" "collate" "column" "comment" "constraint" "create"
|
["action" "add" "admin" "all" "alter" "as" "by" "cache" "cascade" "collate" "column" "comment" "connection" "constraint" "create"
|
||||||
"cycle" "data" "day" "default" "delete" "exists" "extension" "false" "foreign" "from" "full"
|
"cycle" "data" "day" "default" "delete" "encrypted" "exists" "extension" "false" "foreign" "from" "full"
|
||||||
"go" "grant" "hour" "if" "increment" "insert" "into" "key" "match" "maxvalue" "minute" "minvalue"
|
"go" "grant" "group" "hour" "if" "increment" "insert" "in" "into" "key" "limit" "match" "maxvalue" "minute" "minvalue"
|
||||||
"month" "no" "none" "not" "null" "off" "on" "only" "owned" "owner" "partial" "primary"
|
"month" "no" "none" "not" "null" "off" "on" "only" "owned" "owner" "partial" "password" "primary"
|
||||||
"references" "rename" "restrict" "revoke" "schema" "second" "select" "sequence" "set"
|
"references" "rename" "restrict" "revoke" "role" "schema" "second" "select" "sequence" "set"
|
||||||
"simple" "start" "table" "temp" "temporary" "time" "to" "true" "type" "unique"
|
"simple" "start" "sysid" "table" "temp" "temporary" "time" "to" "true" "type" "unique"
|
||||||
"update" "using" "values" "where" "with" "without" "year" "zone"])
|
"until" "update" "user" "using" "valid" "values" "where" "with" "without" "year" "zone"
|
||||||
|
|
||||||
|
;; the next group are all role options. I'm not sure whether or not they
|
||||||
|
;; really count as keywords. I'm also not sure whether 'NO' should be created
|
||||||
|
;; as a special case insensitive prefix, which would save half of these.
|
||||||
|
"bypassrls" "createdb" "createrole" "inherit" "login" "nobypassrls" "nocreatedb"
|
||||||
|
"nocreaterole" "noinherit" "nologin" "noreplication" "nosuperuser" "replication" "superuser"
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
(def keyword-rules
|
(def keyword-rules
|
||||||
|
@ -104,7 +111,7 @@
|
||||||
strings. Order is not very significant, but instaparse treats the first rule as special."
|
strings. Order is not very significant, but instaparse treats the first rule as special."
|
||||||
(list
|
(list
|
||||||
"STATEMENTS := OPT-SPACE STATEMENT + ;"
|
"STATEMENTS := OPT-SPACE STATEMENT + ;"
|
||||||
"STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;"
|
"STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | ROLE-DECL | INSERT-STMT | PERMISSIONS-STMT;"
|
||||||
"ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;"
|
"ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;"
|
||||||
|
|
||||||
"SET-STMT := KW-SET NAME EQUALS EXPRESSION TERMINATOR ;"
|
"SET-STMT := KW-SET NAME EQUALS EXPRESSION TERMINATOR ;"
|
||||||
|
@ -172,6 +179,30 @@
|
||||||
"ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;"
|
"ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;"
|
||||||
"OPT-KW-DATA := KW-DATA | '' ;"
|
"OPT-KW-DATA := KW-DATA | '' ;"
|
||||||
|
|
||||||
|
;; from https://www.postgresql.org/docs/current/static/sql-createrole.html
|
||||||
|
;; https://www.postgresql.org/docs/10/static/sql-createuser.html
|
||||||
|
"ROLE-DECL := KW-CREATE ROLE NAME ROLE-OPTIONS TERMINATOR;"
|
||||||
|
"ROLE := KW-GROUP | KW-ROLE | KW-USER ;"
|
||||||
|
"ROLE-OPTIONS := KW-WITH ROLE-OPTIONS | OPT-SPACE ROLE-OPTION *;"
|
||||||
|
"ROLE-OPTION := RO-SUPERUSER | RO-CREATEDB | RO-CREATEROLE | RO-INHERIT | RO-REPLIC | RO-BYPASSRLS | RO-LOGIN| RO-CONN-LIMIT | RO-PASSWORD | RO-TIMEOUT | RO-IN-ROLE | RO-ROLE | RO-ADMIN | RO-USER | RO-SYSID;"
|
||||||
|
"RO-SUPERUSER := KW-SUPERUSER | KW-NOSUPERUSER ;"
|
||||||
|
"RO-CREATEDB := KW-CREATEDB | KW-NOCREATEDB ;"
|
||||||
|
"RO-CREATEROLE := KW-CREATEROLE | KW-NOCREATEROLE ;"
|
||||||
|
"RO-INHERIT := KW-INHERIT | KW-NOINHERIT ;"
|
||||||
|
"RO-REPLIC := KW-REPLICATION | KW-NOREPLICATION ;"
|
||||||
|
"RO-BYPASSRLS := KW-BYPASSRLS | KW-NOBYPASSRLS ;"
|
||||||
|
"RO-LOGIN := KW-LOGIN | KW-NOLOGIN ;"
|
||||||
|
"RO-CONN-LIMIT := KW-CONNECTION KW-LIMIT INT-VAL ;"
|
||||||
|
"RO-PASSWORD := KW-PASSWORD CHAR-VAL | KW-ENCRYPTED KW-PASSWORD CHAR-VAL ;"
|
||||||
|
;; The value here is actually a date/time value, but that's a level of detail we don't need.
|
||||||
|
"RO-TIMEOUT := KW-VALID KW-UNTIL CHAR-VAL ;"
|
||||||
|
"RO-IN-ROLE := KW-IN KW-ROLE NAMES | KW-IN KW-GROUP NAMES ;"
|
||||||
|
"RO-ROLE := KW-ROLE NAMES ;"
|
||||||
|
"RO-ADMIN := KW-ADMIN NAMES ;"
|
||||||
|
"RO-USER := KW-USER NAMES ;"
|
||||||
|
"RO-SYSID := KW-SYSID CHAR-VAL ;"
|
||||||
|
|
||||||
|
|
||||||
"PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;"
|
"PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;"
|
||||||
"REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;"
|
"REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;"
|
||||||
"GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;"
|
"GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;"
|
||||||
|
@ -216,4 +247,4 @@
|
||||||
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
|
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
|
||||||
(insta/parser grammar))
|
(insta/parser grammar))
|
||||||
|
|
||||||
(def parse-comment (insta/parser "COMMENT := #'--[^\\n\\r]*' ;"))
|
|
||||||
|
|
|
@ -28,13 +28,38 @@
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
|
||||||
|
(declare in-simplify)
|
||||||
|
|
||||||
|
|
||||||
(defn ignorable?
|
(defn ignorable?
|
||||||
"True if `x` is something which just clutters up the parse tree."
|
"True if `x` is something which just clutters up the parse tree."
|
||||||
[x]
|
[x]
|
||||||
(and
|
(and
|
||||||
(coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA :TERMINATOR} (first x))))
|
(coll? x)(contains? #{:COMMENT
|
||||||
|
:SPACE
|
||||||
|
:OPT-KW-DATA
|
||||||
|
:OPT-SPACE
|
||||||
|
:TERMINATOR} (first x))))
|
||||||
|
|
||||||
|
|
||||||
|
(defn simplify-second-of-two
|
||||||
|
"There are a number of possible simplifications such that if the `tree` has
|
||||||
|
only two elements, the second is semantically sufficient."
|
||||||
|
[tree]
|
||||||
|
(if
|
||||||
|
(and (= (count tree) 2) (coll? (nth tree 1)))
|
||||||
|
(in-simplify (nth tree 1))
|
||||||
|
tree))
|
||||||
|
|
||||||
|
(defn simplify-second-if-not-empty
|
||||||
|
"Like `simplify-second-of-two`, but returns nil if there is no second element
|
||||||
|
of `tree`."
|
||||||
|
[tree]
|
||||||
|
(if
|
||||||
|
(= (count tree) 1)
|
||||||
|
nil
|
||||||
|
(simplify-second-of-two tree)))
|
||||||
|
|
||||||
(defn remove-recursive
|
(defn remove-recursive
|
||||||
"Return a collection like this `collection` from which items which are matched
|
"Return a collection like this `collection` from which items which are matched
|
||||||
by this `predicate` have been removed at all levels."
|
by this `predicate` have been removed at all levels."
|
||||||
|
@ -47,7 +72,49 @@
|
||||||
(remove predicate collection)))
|
(remove predicate collection)))
|
||||||
|
|
||||||
|
|
||||||
(defn simplify [parse-tree]
|
(defn- in-simplify
|
||||||
(remove-recursive ignorable? parse-tree))
|
"Simplify/canonicalise this `tree`, presumed already to have had ignorables
|
||||||
|
removed. Opportunistically replace complex fragments with
|
||||||
|
semantically identical simpler fragments"
|
||||||
|
[tree]
|
||||||
|
(if
|
||||||
|
(coll? tree)
|
||||||
|
(case (first tree)
|
||||||
|
(:STATEMENTS) (remove nil? (map in-simplify (rest tree)))
|
||||||
|
(:EXISTENCE
|
||||||
|
:PERMANENCE) (simplify-second-if-not-empty tree)
|
||||||
|
(:ALTER-COL-SPEC
|
||||||
|
:ALTER-SEQ-ELEMENT
|
||||||
|
:ALTER-STMT
|
||||||
|
:ALTER-TABLE-ELEMENT
|
||||||
|
:MATCH-TYPE
|
||||||
|
:ONLY
|
||||||
|
:OPT-KW-SCHEMA
|
||||||
|
:PERMISSION
|
||||||
|
:PERMISSIONS
|
||||||
|
:PERMISSIONS-STMT
|
||||||
|
:REF-DIRECTIVE
|
||||||
|
:RO-BYPASSRLS
|
||||||
|
:RO-CREATEDB
|
||||||
|
:RO-CREATEROLE
|
||||||
|
:RO-INHERIT
|
||||||
|
:RO-LOGIN
|
||||||
|
:RO-REPLIC
|
||||||
|
:RO-SUPERUSER
|
||||||
|
:ROLE-OPTION
|
||||||
|
:SEQ-SPEC-ELEMENT
|
||||||
|
:STATEMENT
|
||||||
|
:TABLE-SPEC-ELEMENT
|
||||||
|
:VALUE) (simplify-second-of-two tree)
|
||||||
|
(:ROLE) (first tree)
|
||||||
|
(remove nil? (map in-simplify tree)))
|
||||||
|
tree))
|
||||||
|
|
||||||
|
|
||||||
|
(defn simplify
|
||||||
|
"Simplify/canonicalise this `tree`. Opportunistically replace complex fragments with
|
||||||
|
semantically identical simpler fragments"
|
||||||
|
[parse-tree]
|
||||||
|
(in-simplify (remove-recursive ignorable? parse-tree)))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,7 @@
|
||||||
:DT-NUMERIC :real
|
:DT-NUMERIC :real
|
||||||
:DT-REAL :real
|
:DT-REAL :real
|
||||||
:DT-SERIAL :integer
|
:DT-SERIAL :integer
|
||||||
:DT-TEXT :string
|
:DT-TEXT :text
|
||||||
:DT-CHAR :string
|
:DT-CHAR :string
|
||||||
:DT-CHARACTER :string
|
:DT-CHARACTER :string
|
||||||
:DT-CHARACTER-VARYING :string
|
:DT-CHARACTER-VARYING :string
|
||||||
|
@ -71,9 +71,7 @@
|
||||||
(defn is-create-table-statement?
|
(defn is-create-table-statement?
|
||||||
"Is this statement a create table statement?"
|
"Is this statement a create table statement?"
|
||||||
[statement]
|
[statement]
|
||||||
(and
|
(is-subtree-of-type? statement :TABLE-DECL))
|
||||||
(is-subtree-of-type? statement :STATEMENT)
|
|
||||||
(is-subtree-of-type? (second statement) :TABLE-DECL)))
|
|
||||||
|
|
||||||
(defn get-children-of-type [subtree type]
|
(defn get-children-of-type [subtree type]
|
||||||
(if
|
(if
|
||||||
|
@ -129,10 +127,8 @@
|
||||||
nil?
|
nil?
|
||||||
(map
|
(map
|
||||||
#(if
|
#(if
|
||||||
(and
|
(is-subtree-of-type? % :COLUMN-SPEC)
|
||||||
(is-subtree-of-type? % :TABLE-SPEC-ELEMENT)
|
%)
|
||||||
(is-subtree-of-type? (second %) :COLUMN-SPEC))
|
|
||||||
(second %))
|
|
||||||
(get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))})
|
(get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))})
|
||||||
|
|
||||||
|
|
||||||
|
@ -142,9 +138,8 @@
|
||||||
[entity-map statement]
|
[entity-map statement]
|
||||||
(if
|
(if
|
||||||
(is-create-table-statement? statement)
|
(is-create-table-statement? statement)
|
||||||
(let [table-decl (second statement)
|
(let [table-name (get-name statement)]
|
||||||
table-name (get-name table-decl)]
|
(merge entity-map {table-name (make-entity statement)}))
|
||||||
(merge entity-map {table-name (make-entity table-decl)}))
|
|
||||||
entity-map))
|
entity-map))
|
||||||
|
|
||||||
(defn table-definitions-to-entities
|
(defn table-definitions-to-entities
|
||||||
|
@ -153,6 +148,10 @@
|
||||||
([statements]
|
([statements]
|
||||||
(reduce table-definition-to-entity {} statements)))
|
(reduce table-definition-to-entity {} statements)))
|
||||||
|
|
||||||
|
(defn extract-security-groups-from-statements
|
||||||
|
[statements]
|
||||||
|
nil)
|
||||||
|
|
||||||
|
|
||||||
(defn to-adl
|
(defn to-adl
|
||||||
"Take this `input` (filename, url, whatever) assumed to contain a stream of SQL
|
"Take this `input` (filename, url, whatever) assumed to contain a stream of SQL
|
||||||
|
|
Loading…
Reference in a new issue