From 1b990183b3073aa81d273a641769ded478a7d585 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Thu, 15 Feb 2018 18:20:57 +0000 Subject: [PATCH] Greatly improved the simplifier. What comes out of the parser now contains very little which is redundent. --- src/squirrel_parse/parser.clj | 49 ++++++++++++++++++---- src/squirrel_parse/simplify.clj | 73 +++++++++++++++++++++++++++++++-- src/squirrel_parse/to_adl.clj | 21 +++++----- 3 files changed, 120 insertions(+), 23 deletions(-) diff --git a/src/squirrel_parse/parser.clj b/src/squirrel_parse/parser.clj index 764faa8..5afd6e5 100644 --- a/src/squirrel_parse/parser.clj +++ b/src/squirrel_parse/parser.clj @@ -33,13 +33,20 @@ "Keywords from the subset of Postgresql I have so far needed to support. Feel free to either 1. Add further Postrgresql keywords, or 2. Start a new def for non-postgresql keywords." - ["action" "add" "all" "alter" "as" "by" "cache" "cascade" "collate" "column" "comment" "constraint" "create" - "cycle" "data" "day" "default" "delete" "exists" "extension" "false" "foreign" "from" "full" - "go" "grant" "hour" "if" "increment" "insert" "into" "key" "match" "maxvalue" "minute" "minvalue" - "month" "no" "none" "not" "null" "off" "on" "only" "owned" "owner" "partial" "primary" - "references" "rename" "restrict" "revoke" "schema" "second" "select" "sequence" "set" - "simple" "start" "table" "temp" "temporary" "time" "to" "true" "type" "unique" - "update" "using" "values" "where" "with" "without" "year" "zone"]) + ["action" "add" "admin" "all" "alter" "as" "by" "cache" "cascade" "collate" "column" "comment" "connection" "constraint" "create" + "cycle" "data" "day" "default" "delete" "encrypted" "exists" "extension" "false" "foreign" "from" "full" + "go" "grant" "group" "hour" "if" "increment" "insert" "in" "into" "key" "limit" "match" "maxvalue" "minute" "minvalue" + "month" "no" "none" "not" "null" "off" "on" "only" "owned" "owner" "partial" "password" "primary" + "references" "rename" "restrict" "revoke" "role" "schema" "second" "select" "sequence" "set" + "simple" "start" "sysid" "table" "temp" "temporary" "time" "to" "true" "type" "unique" + "until" "update" "user" "using" "valid" "values" "where" "with" "without" "year" "zone" + + ;; the next group are all role options. I'm not sure whether or not they + ;; really count as keywords. I'm also not sure whether 'NO' should be created + ;; as a special case insensitive prefix, which would save half of these. + "bypassrls" "createdb" "createrole" "inherit" "login" "nobypassrls" "nocreatedb" + "nocreaterole" "noinherit" "nologin" "noreplication" "nosuperuser" "replication" "superuser" + ]) (def keyword-rules @@ -104,7 +111,7 @@ strings. Order is not very significant, but instaparse treats the first rule as special." (list "STATEMENTS := OPT-SPACE STATEMENT + ;" - "STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | INSERT-STMT | PERMISSIONS-STMT;" + "STATEMENT := TABLE-DECL | ALTER-STMT | SET-STMT | COMMENT | EXTENSION-DECL | SEQUENCE-DECL | ROLE-DECL | INSERT-STMT | PERMISSIONS-STMT;" "ALTER-STMT := ALTER-TABLE | ALTER-SEQUENCE ;" "SET-STMT := KW-SET NAME EQUALS EXPRESSION TERMINATOR ;" @@ -172,6 +179,30 @@ "ADD-CONSTRAINT := KW-ADD COLUMN-CONSTRAINT ;" "OPT-KW-DATA := KW-DATA | '' ;" + ;; from https://www.postgresql.org/docs/current/static/sql-createrole.html + ;; https://www.postgresql.org/docs/10/static/sql-createuser.html + "ROLE-DECL := KW-CREATE ROLE NAME ROLE-OPTIONS TERMINATOR;" + "ROLE := KW-GROUP | KW-ROLE | KW-USER ;" + "ROLE-OPTIONS := KW-WITH ROLE-OPTIONS | OPT-SPACE ROLE-OPTION *;" + "ROLE-OPTION := RO-SUPERUSER | RO-CREATEDB | RO-CREATEROLE | RO-INHERIT | RO-REPLIC | RO-BYPASSRLS | RO-LOGIN| RO-CONN-LIMIT | RO-PASSWORD | RO-TIMEOUT | RO-IN-ROLE | RO-ROLE | RO-ADMIN | RO-USER | RO-SYSID;" + "RO-SUPERUSER := KW-SUPERUSER | KW-NOSUPERUSER ;" + "RO-CREATEDB := KW-CREATEDB | KW-NOCREATEDB ;" + "RO-CREATEROLE := KW-CREATEROLE | KW-NOCREATEROLE ;" + "RO-INHERIT := KW-INHERIT | KW-NOINHERIT ;" + "RO-REPLIC := KW-REPLICATION | KW-NOREPLICATION ;" + "RO-BYPASSRLS := KW-BYPASSRLS | KW-NOBYPASSRLS ;" + "RO-LOGIN := KW-LOGIN | KW-NOLOGIN ;" + "RO-CONN-LIMIT := KW-CONNECTION KW-LIMIT INT-VAL ;" + "RO-PASSWORD := KW-PASSWORD CHAR-VAL | KW-ENCRYPTED KW-PASSWORD CHAR-VAL ;" + ;; The value here is actually a date/time value, but that's a level of detail we don't need. + "RO-TIMEOUT := KW-VALID KW-UNTIL CHAR-VAL ;" + "RO-IN-ROLE := KW-IN KW-ROLE NAMES | KW-IN KW-GROUP NAMES ;" + "RO-ROLE := KW-ROLE NAMES ;" + "RO-ADMIN := KW-ADMIN NAMES ;" + "RO-USER := KW-USER NAMES ;" + "RO-SYSID := KW-SYSID CHAR-VAL ;" + + "PERMISSIONS-STMT := REVOKE-STMT | GRANT-STMT;" "REVOKE-STMT := KW-REVOKE PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-FROM NAMES TERMINATOR;" "GRANT-STMT := KW-GRANT PERMISSIONS KW-ON OPT-KW-SCHEMA QUAL-NAME KW-TO NAMES TERMINATOR;" @@ -216,4 +247,4 @@ "Parse the argument, assumed to be a string in the correct syntax, and return a parse tree." (insta/parser grammar)) -(def parse-comment (insta/parser "COMMENT := #'--[^\\n\\r]*' ;")) + diff --git a/src/squirrel_parse/simplify.clj b/src/squirrel_parse/simplify.clj index b65cd27..3d96727 100644 --- a/src/squirrel_parse/simplify.clj +++ b/src/squirrel_parse/simplify.clj @@ -28,13 +28,38 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(declare in-simplify) + + (defn ignorable? "True if `x` is something which just clutters up the parse tree." [x] (and - (coll? x)(contains? #{:SPACE :OPT-SPACE :COMMENT :OPT-KW-DATA :TERMINATOR} (first x)))) + (coll? x)(contains? #{:COMMENT + :SPACE + :OPT-KW-DATA + :OPT-SPACE + :TERMINATOR} (first x)))) +(defn simplify-second-of-two + "There are a number of possible simplifications such that if the `tree` has + only two elements, the second is semantically sufficient." + [tree] + (if + (and (= (count tree) 2) (coll? (nth tree 1))) + (in-simplify (nth tree 1)) + tree)) + +(defn simplify-second-if-not-empty + "Like `simplify-second-of-two`, but returns nil if there is no second element + of `tree`." + [tree] + (if + (= (count tree) 1) + nil + (simplify-second-of-two tree))) + (defn remove-recursive "Return a collection like this `collection` from which items which are matched by this `predicate` have been removed at all levels." @@ -47,7 +72,49 @@ (remove predicate collection))) -(defn simplify [parse-tree] - (remove-recursive ignorable? parse-tree)) +(defn- in-simplify + "Simplify/canonicalise this `tree`, presumed already to have had ignorables + removed. Opportunistically replace complex fragments with + semantically identical simpler fragments" + [tree] + (if + (coll? tree) + (case (first tree) + (:STATEMENTS) (remove nil? (map in-simplify (rest tree))) + (:EXISTENCE + :PERMANENCE) (simplify-second-if-not-empty tree) + (:ALTER-COL-SPEC + :ALTER-SEQ-ELEMENT + :ALTER-STMT + :ALTER-TABLE-ELEMENT + :MATCH-TYPE + :ONLY + :OPT-KW-SCHEMA + :PERMISSION + :PERMISSIONS + :PERMISSIONS-STMT + :REF-DIRECTIVE + :RO-BYPASSRLS + :RO-CREATEDB + :RO-CREATEROLE + :RO-INHERIT + :RO-LOGIN + :RO-REPLIC + :RO-SUPERUSER + :ROLE-OPTION + :SEQ-SPEC-ELEMENT + :STATEMENT + :TABLE-SPEC-ELEMENT + :VALUE) (simplify-second-of-two tree) + (:ROLE) (first tree) + (remove nil? (map in-simplify tree))) + tree)) + + +(defn simplify + "Simplify/canonicalise this `tree`. Opportunistically replace complex fragments with + semantically identical simpler fragments" + [parse-tree] + (in-simplify (remove-recursive ignorable? parse-tree))) diff --git a/src/squirrel_parse/to_adl.clj b/src/squirrel_parse/to_adl.clj index 18c66e2..bb0be02 100644 --- a/src/squirrel_parse/to_adl.clj +++ b/src/squirrel_parse/to_adl.clj @@ -53,7 +53,7 @@ :DT-NUMERIC :real :DT-REAL :real :DT-SERIAL :integer - :DT-TEXT :string + :DT-TEXT :text :DT-CHAR :string :DT-CHARACTER :string :DT-CHARACTER-VARYING :string @@ -71,9 +71,7 @@ (defn is-create-table-statement? "Is this statement a create table statement?" [statement] - (and - (is-subtree-of-type? statement :STATEMENT) - (is-subtree-of-type? (second statement) :TABLE-DECL))) + (is-subtree-of-type? statement :TABLE-DECL)) (defn get-children-of-type [subtree type] (if @@ -129,10 +127,8 @@ nil? (map #(if - (and - (is-subtree-of-type? % :TABLE-SPEC-ELEMENT) - (is-subtree-of-type? (second %) :COLUMN-SPEC)) - (second %)) + (is-subtree-of-type? % :COLUMN-SPEC) + %) (get-first-child-of-type table-decl :TABLE-SPEC-ELEMENTS)))))}) @@ -142,9 +138,8 @@ [entity-map statement] (if (is-create-table-statement? statement) - (let [table-decl (second statement) - table-name (get-name table-decl)] - (merge entity-map {table-name (make-entity table-decl)})) + (let [table-name (get-name statement)] + (merge entity-map {table-name (make-entity statement)})) entity-map)) (defn table-definitions-to-entities @@ -153,6 +148,10 @@ ([statements] (reduce table-definition-to-entity {} statements))) +(defn extract-security-groups-from-statements + [statements] + nil) + (defn to-adl "Take this `input` (filename, url, whatever) assumed to contain a stream of SQL