Greatly improved the simplifier.

What comes out of the parser now contains very little which is redundent.
This commit is contained in:
Simon Brooke 2018-02-15 18:20:57 +00:00
parent 53aa0379ab
commit 1b990183b3
3 changed files with 120 additions and 23 deletions

View file

@ -33,13 +33,20 @@
"Keywords from the subset of Postgresql I have so far needed to support. Feel free to either
1. Add further Postrgresql keywords, or
2. Start a new def for non-postgresql keywords."
["action" "add" "all" "alter" "as" "by" "cache" "cascade" "collate" "column" "comment" "constraint" "create"
"cycle" "data" "day" "default" "delete" "exists" "extension" "false" "foreign" "from" "full"
"go" "grant" "hour" "if" "increment" "insert" "into" "key" "match" "maxvalue" "minute" "minvalue"
"month" "no" "none" "not" "null" "off" "on" "only" "owned" "owner" "partial" "primary"
"references" "rename" "restrict" "revoke" "schema" "second" "select" "sequence" "set"
"simple" "start" "table" "temp" "temporary" "time" "to" "true" "type" "unique"
"update" "using" "values" "where" "with" "without" "year" "zone"])
["action" "add" "admin" "all" "alter" "as" "by" "cache" "cascade" "collate" "column" "comment" "connection" "constraint" "create"
"cycle" "data" "day" "default" "delete" "encrypted" "exists" "extension" "false" "foreign" "from" "full"
"go" "grant" "group" "hour" "if" "increment" "insert" "in" "into" "key" "limit" "match" "maxvalue" "minute" "minvalue"
"month" "no" "none" "not" "null" "off" "on" "only" "owned" "owner" "partial" "password" "primary"
"references" "rename" "restrict" "revoke" "role" "schema" "second" "select" "sequence" "set"
"simple" "start" "sysid" "table" "temp" "temporary" "time" "to" "true" "type" "unique"
"until" "update" "user" "using" "valid" "values" "where" "with" "without" "year" "zone"
;; the next group are all role options. I'm not sure whether or not they
;; really count as keywords. I'm also not sure whether 'NO' should be created
;; as a special case insensitive prefix, which would save half of these.
"bypassrls" "createdb" "createrole" "inherit" "login" "nobypassrls" "nocreatedb"
"nocreaterole" "noinherit" "nologin" "noreplication" "nosuperuser" "replication" "superuser"
])
(def keyword-rules
@ -104,7 +111,7 @@
strings. Order is not very significant, but instaparse treats the first rule as special."
(list
"STATEMENTS := OPT-SPACE STATEMENT + ;"
"STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;"
"STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | ROLE-DECL | INSERT-STMT | PERMISSIONS-STMT;"
"ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;"
"SET-STMT := KW-SET NAME EQUALS EXPRESSION TERMINATOR ;"
@ -172,6 +179,30 @@
"ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;"
"OPT-KW-DATA := KW-DATA | '' ;"
;; from https://www.postgresql.org/docs/current/static/sql-createrole.html
;; https://www.postgresql.org/docs/10/static/sql-createuser.html
"ROLE-DECL := KW-CREATE ROLE NAME ROLE-OPTIONS TERMINATOR;"
"ROLE := KW-GROUP | KW-ROLE | KW-USER ;"
"ROLE-OPTIONS := KW-WITH ROLE-OPTIONS | OPT-SPACE ROLE-OPTION *;"
"ROLE-OPTION := RO-SUPERUSER | RO-CREATEDB | RO-CREATEROLE | RO-INHERIT | RO-REPLIC | RO-BYPASSRLS | RO-LOGIN| RO-CONN-LIMIT | RO-PASSWORD | RO-TIMEOUT | RO-IN-ROLE | RO-ROLE | RO-ADMIN | RO-USER | RO-SYSID;"
"RO-SUPERUSER := KW-SUPERUSER | KW-NOSUPERUSER ;"
"RO-CREATEDB := KW-CREATEDB | KW-NOCREATEDB ;"
"RO-CREATEROLE := KW-CREATEROLE | KW-NOCREATEROLE ;"
"RO-INHERIT := KW-INHERIT | KW-NOINHERIT ;"
"RO-REPLIC := KW-REPLICATION | KW-NOREPLICATION ;"
"RO-BYPASSRLS := KW-BYPASSRLS | KW-NOBYPASSRLS ;"
"RO-LOGIN := KW-LOGIN | KW-NOLOGIN ;"
"RO-CONN-LIMIT := KW-CONNECTION KW-LIMIT INT-VAL ;"
"RO-PASSWORD := KW-PASSWORD CHAR-VAL | KW-ENCRYPTED KW-PASSWORD CHAR-VAL ;"
;; The value here is actually a date/time value, but that's a level of detail we don't need.
"RO-TIMEOUT := KW-VALID KW-UNTIL CHAR-VAL ;"
"RO-IN-ROLE := KW-IN KW-ROLE NAMES | KW-IN KW-GROUP NAMES ;"
"RO-ROLE := KW-ROLE NAMES ;"
"RO-ADMIN := KW-ADMIN NAMES ;"
"RO-USER := KW-USER NAMES ;"
"RO-SYSID := KW-SYSID CHAR-VAL ;"
"PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;"
"REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;"
"GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;"
@ -216,4 +247,4 @@
"Parse the argument, assumed to be a string in the correct syntax, and return a parse tree."
(insta/parser grammar))
(def parse-comment (insta/parser "COMMENT := #'--[^\\n\\r]*' ;"))

View file

@ -28,13 +28,38 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(declare in-simplify)
(defn ignorable?
"True if `x` is something which just clutters up the parse tree."
[x]
(and
(coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA :TERMINATOR} (first x))))
(coll? x)(contains? #{:COMMENT
:SPACE
:OPT-KW-DATA
:OPT-SPACE
:TERMINATOR} (first x))))
(defn simplify-second-of-two
"There are a number of possible simplifications such that if the `tree` has
only two elements, the second is semantically sufficient."
[tree]
(if
(and (= (count tree) 2) (coll? (nth tree 1)))
(in-simplify (nth tree 1))
tree))
(defn simplify-second-if-not-empty
"Like `simplify-second-of-two`, but returns nil if there is no second element
of `tree`."
[tree]
(if
(= (count tree) 1)
nil
(simplify-second-of-two tree)))
(defn remove-recursive
"Return a collection like this `collection` from which items which are matched
by this `predicate` have been removed at all levels."
@ -47,7 +72,49 @@
(remove predicate collection)))
(defn simplify [parse-tree]
(remove-recursive ignorable? parse-tree))
(defn- in-simplify
"Simplify/canonicalise this `tree`, presumed already to have had ignorables
removed. Opportunistically replace complex fragments with
semantically identical simpler fragments"
[tree]
(if
(coll? tree)
(case (first tree)
(:STATEMENTS) (remove nil? (map in-simplify (rest tree)))
(:EXISTENCE
:PERMANENCE) (simplify-second-if-not-empty tree)
(:ALTER-COL-SPEC
:ALTER-SEQ-ELEMENT
:ALTER-STMT
:ALTER-TABLE-ELEMENT
:MATCH-TYPE
:ONLY
:OPT-KW-SCHEMA
:PERMISSION
:PERMISSIONS
:PERMISSIONS-STMT
:REF-DIRECTIVE
:RO-BYPASSRLS
:RO-CREATEDB
:RO-CREATEROLE
:RO-INHERIT
:RO-LOGIN
:RO-REPLIC
:RO-SUPERUSER
:ROLE-OPTION
:SEQ-SPEC-ELEMENT
:STATEMENT
:TABLE-SPEC-ELEMENT
:VALUE) (simplify-second-of-two tree)
(:ROLE) (first tree)
(remove nil? (map in-simplify tree)))
tree))
(defn simplify
"Simplify/canonicalise this `tree`. Opportunistically replace complex fragments with
semantically identical simpler fragments"
[parse-tree]
(in-simplify (remove-recursive ignorable? parse-tree)))

View file

@ -53,7 +53,7 @@
:DT-NUMERIC :real
:DT-REAL :real
:DT-SERIAL :integer
:DT-TEXT :string
:DT-TEXT :text
:DT-CHAR :string
:DT-CHARACTER :string
:DT-CHARACTER-VARYING :string
@ -71,9 +71,7 @@
(defn is-create-table-statement?
"Is this statement a create table statement?"
[statement]
(and
(is-subtree-of-type? statement :STATEMENT)
(is-subtree-of-type? (second statement) :TABLE-DECL)))
(is-subtree-of-type? statement :TABLE-DECL))
(defn get-children-of-type [subtree type]
(if
@ -129,10 +127,8 @@
nil?
(map
#(if
(and
(is-subtree-of-type? % :TABLE-SPEC-ELEMENT)
(is-subtree-of-type? (second %) :COLUMN-SPEC))
(second %))
(is-subtree-of-type? % :COLUMN-SPEC)
%)
(get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))})
@ -142,9 +138,8 @@
[entity-map statement]
(if
(is-create-table-statement? statement)
(let [table-decl (second statement)
table-name (get-name table-decl)]
(merge entity-map {table-name (make-entity table-decl)}))
(let [table-name (get-name statement)]
(merge entity-map {table-name (make-entity statement)}))
entity-map))
(defn table-definitions-to-entities
@ -153,6 +148,10 @@
([statements]
(reduce table-definition-to-entity {} statements)))
(defn extract-security-groups-from-statements
[statements]
nil)
(defn to-adl
"Take this `input` (filename, url, whatever) assumed to contain a stream of SQL