Huge amounts of (unfinished, not working) work on picky validator.

This commit is contained in:
Simon Brooke 2022-12-22 22:35:50 +00:00
parent 21220970a8
commit 3f35c5e293
9 changed files with 399 additions and 122 deletions

View file

@ -1,5 +1,21 @@
(ns dog-and-duck.quack.fault-messages)
;;; Copyright (C) Simon Brooke, 2022
;;; This program is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU General Public License
;;; as published by the Free Software Foundation; either version 2
;;; of the License, or (at your option) any later version.
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;; You should have received a copy of the GNU General Public License
;;; along with this program; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
(def messages
"Actual fault messages to which fault codes resolve."
{:id-not-https "Publicly facing content SHOULD use HTTPS URIs"

View file

@ -11,7 +11,7 @@
document specifying this vocabulary;
2. `:type` whose value shall be `Fault`;
3. `:severity` whose value shall be one of
`minor`, `should`, `must` or `critical`;
`info`, `minor`, `should`, `must` or `critical`;
4. `:fault` whose value shall be a unique token
representing the particular fault type;
5. `:narrative` whose value shall be a natural
@ -27,13 +27,73 @@
possible to serialise a fault report as a
document which in its own right conforms to the
ActivityStreams spec."
(:require [dog-and-duck.quack.fault-messages :refer [messages]]
[dog-and-duck.utils.process :refer [pid]])
(:require [clojure.set :refer [intersection]]
[dog-and-duck.quack.fault-messages :refer [messages]]
[dog-and-duck.utils.process :refer [get-hostname get-pid]]
[taoensso.timbre :as timbre
;; Optional, just refer what you like:
:refer [warn]])
(:import [java.net URI URISyntaxException]))
;;; Copyright (C) Simon Brooke, 2022
;;; This program is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU General Public License
;;; as published by the Free Software Foundation; either version 2
;;; of the License, or (at your option) any later version.
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;; You should have received a copy of the GNU General Public License
;;; along with this program; if not, write to the Free Software
;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
;; ERRATA
(def ^:dynamic *reify-refs*
"If `true`, references to objects in fields will be reified and validated.
If `false`, they won't, but an `:info` level fault report will be generated.
There are several things in the spec which, in a document, may correctly be
either
1. a fully fleshed out object, or
2. a URI pointing to such an object.
Obviously to fully validate a document we ought to reify all the refs and
check that they are themselves valid, but
a. in some of the published test documents the URIs do not reference a
valid document;
b. there will be performance costs to reifying all the refs;
c. in perverse cases, reifying refs might result in runaway recursion.
TODO: I think that in production this should default to `true`."
false)
(def ^:dynamic *reject-severity*
"The severity at which the binary validator will return `false`.
In practice documents seen in the wild do not typically appear to be
fully valid, and this does not matter. This allows the sensitivity of
the binary validator (`dog-and-duck.quack.quack`) to be tuned. It's in
this (`dog-and-duck.quack.picky`) namespace, not that one, because this
namespace is where concerns about severity are handled."
:must)
(def ^:const context-key
"The Clojure reader barfs on `:@context`, although it is in principle a valid
keyword. So we'll make it once, here, to make the code more performant and
easier to read."
(keyword "@context"))
(def ^:const severity
"Severity of faults found, as follows:
0. `:info` not actually a fault, but an issue noted during validation;
1. `:minor` things which I consider to be faults, but which
don't actually breach the spec;
2. `:should` instances where the spec says something SHOULD
@ -42,28 +102,47 @@
be done, which isn't;
4. `:critical` instances where I believe the fault means that
the object cannot be meaningfully processed."
#{:minor :should :must :critical})
#{:info :minor :should :must :critical})
(def ^:const severity-filters
"Hack for implementing a severity hierarchy"
{:all #{}
:minor #{:minor}
:should #{:minor :should}
:must #{:minor :should :must}
:info #{}
:minor #{:info}
:should #{:info :minor}
:must #{:info :minor :should}
:critical severity})
(defn truthy?
"Return `true` if `x` is truthy, else `false`."
[x]
(if x true false))
(defn has-type?
"Return `true` if object `x` has type `type`, else `false`.
The values of `type` fields of ActivityStreams objects may be lists; they
are considered to have a type if the type token is a member of the list."
[x type]
(assert (map? x) (string? type))
(let [tv (:type x)]
(cond
(coll? tv) (truthy? (not-empty (filter #(= % type) tv)))
:else (= tv type))))
(defn filter-severity
"Return a list of reports taken from these `reports` where the severity
of the report is greater than this `severity`."
of the report is greater than this or equal to this `severity`."
[reports severity]
(assert
(and
(coll? reports)
(every? map? reports)
(every? :severity reports)))
(remove
#((severity-filters severity) (:severity %))
reports))
(cond (nil? reports) nil
(and
(coll? reports)
(every? map? reports)
(every? :severity reports)) (remove
#((severity-filters severity) (:severity %))
reports)
:else reports)) ;; TODO this actually shouldn't happen and we should
;; error if it does
(def ^:const activitystreams-context-uri
"The URI of the context of an ActivityStreams object is expected to be this
@ -93,7 +172,7 @@
(defmacro has-context?
"True if `x` is an ActivityStreams object with a valid context, else `false`."
[x]
`(context? ((keyword "@context") ~x)))
`(context? (context-key ~x)))
(defn make-fault-object
"Return a fault object with these `severity`, `fault` and `narrative` values.
@ -105,28 +184,52 @@
;; to look up the narrative in a resource file.
[severity fault]
(assoc {}
(keyword "@context") validation-fault-context-uri
context-key validation-fault-context-uri
:id (str "https://"
(.. java.net.InetAddress getLocalHost getHostName)
(get-hostname)
"/fault/"
pid
(get-pid)
":"
(inst-ms (java.util.Date.)))
:type "Fault"
:severity severity
:fault fault
:narrative (messages fault)))
:narrative (or (messages fault)
(do
(warn "No narrative provided for fault token " fault)
(str fault)))))
(defmacro nil-if-empty
"if `x` is an empty collection, return `nil`; else return `x`."
[x]
`(if (coll? ~x)
(if (empty? ~x) nil ~x)
(nil-if-empty ~x)
~x))
(defn has-type-or-fault
"If object `x` has a `:type` value which is `acceptable`, return `nil`;
else return a fault object with this `severity` and `token`.
`acceptable` may be passed as either a string, or as a set of strings."
[x acceptable severity token]
(let [tv (:type x)]
(when-not
(cond
(and (string? tv) (string? acceptable)) (= tv acceptable)
(and (string? tv) (set? acceptable)) (acceptable tv)
(and (coll? tv) (string? acceptable)) ((set tv) acceptable)
(and (coll? tv) (set? acceptable)) (not-empty
(intersection (set tv) acceptable))
;; TODO else should error
)
(make-fault-object severity token))))
(defn object-faults
"Return a list of faults found in object `x`, or `nil` if none are."
[x]
"Return a list of faults found in object `x`, or `nil` if none are.
If `expected-type` is also passed, verify that `x` has `expected-type`.
`expected-type` may be passed as a string or as a set of strings."
([x]
(nil-if-empty
(remove empty?
(list
@ -139,6 +242,27 @@
(make-fault-object :minor :no-type))
(when-not (and (map? x) (contains? x :id))
(make-fault-object :minor :no-id-transient))))))
([x expected-type]
(nil-if-empty
(remove empty?
(concat
(object-faults x)
(list
;; TODO: should resolve the correct `-faults`function for the
;; `expected-type` and call that; but that's for later.
(has-type-or-fault x expected-type :critical :unexpected-type)))))))
(defn uri-or-fault
"If `u` is not a valid URI, return a fault object with this `severity` and
`if-invalid-token`. If it's `nil`, return a fault object with this `severity`
and `if-missing-token`. Otherwise return nil."
[u severity if-missing-token if-invalid-token]
(try
(uri? (URI. u))
(catch URISyntaxException _
(make-fault-object severity if-invalid-token))
(catch NullPointerException _
(make-fault-object severity if-missing-token))))
(defn persistent-object-faults
"Return a list of faults found in persistent object `x`, or `nil` if none are."
@ -158,3 +282,131 @@
(make-fault-object :must :null-id-persistent)))
(make-fault-object :must :no-id-persistent)))))))
(def ^:const actor-types
"The set of types we will accept as actors.
There's an [explicit set of allowed actor types]
(https://www.w3.org/TR/activitystreams-vocabulary/#actor-types)."
#{"Application"
"Group"
"Organization"
"Person"
"Service"})
(defmacro actor-type?
"Return `true` if the `x` is a recognised actor type, else `false`."
[^String x]
`(if (actor-types ~x) true false))
(defn has-actor-type?
"Return `true` if the object `x` has a type which is an actor type, else
`false`."
[x]
(let [tv (:type x)]
(cond
(coll? tv) (truthy? (not-empty (filter actor-type? tv)))
:else (actor-type? tv))))
(defn actor-faults
"Return a list of faults found in actor `x`, or `nil` if none are."
[x]
(nil-if-empty
(remove empty?
(concat (persistent-object-faults x)
(list
(when-not (has-actor-type? x)
(make-fault-object :must :not-actor-type))
(uri-or-fault
(:inbox x) :must :no-inbox :invalid-inbox-uri)
(uri-or-fault
(:outbox x) :must :no-outbox :invalid-outbox-uri))))))
(def ^:const verb-types
"The set of types we will accept as verbs.
There's an [explicit set of allowed verb types]
(https://www.w3.org/TR/activitystreams-vocabulary/#activity-types)."
#{"Accept" "Add" "Announce" "Arrive" "Block" "Create" "Delete" "Dislike"
"Flag" "Follow" "Ignore" "Invite" "Join" "Leave" "Like" "Listen" "Move"
"Offer" "Question" "Reject" "Read" "Remove" "TentativeAccept"
"TentativeReject" "Travel" "Undo" "Update" "View"})
(defmacro verb-type?
"`true` if `x`, a string, represents a recognised ActivityStreams activity
type."
[^String x]
`(if (verb-types ~x) true false))
(defn has-activity-type?
"Return `true` if the object `x` has a type which is an activity type, else
`false`."
[x]
(let [tv (:type x)]
(cond
(coll? tv) (truthy? (not-empty (filter verb-type? tv)))
:else (actor-type? tv))))
(defn string-or-fault
"If this `value` is not a string, return a fault object with this `severity`
and `token`, else `nil`. If `pattern` is also passed, it is expected to be
a Regex, and the fault object will be returned unless `value` matches the
`pattern`."
([value severity token]
(when-not (string? value) (make-fault-object severity token)))
([value severity token pattern]
(when not (and (string? value) (re-matches pattern value))
(make-fault-object severity token))) )
(defn link-faults
"A link object is required to have an `href` property. It may have all of
`rel` | `mediaType` | `name` | `hreflang` | `height` | `width` | `preview`
but I *think* they're all optional."
[x]
(list
(uri-or-fault
(:href x) :must :no-href-uri :invalid-href-uri)
(string-or-fault (:mediaType x) :minor :no-media-type #"\w+\/[-+.\w]+")))
(defn object-reference-or-fault
"If this `value` is either
1. an object of `expected-type`;
2. a URI; or
3. a link object
then return `nil`; else return a fault object with this `severity` and `token`.
As with `has-type-or-fault` (q.v.), `expected-type` may be passed as a
string or as a set of strings.
**NOTE THAT** TODO if `*reify-refs*` is `true` and `value` is either a URI or
a link, the linked object should be checked and validated as an object of
`expected-type`."
[value expected-type severity token]
(cond TODO continue here in the morning))
(defn activity-type-faults
"Some specific activity types have specific requirements which are not
requirements."
([x]
(if (coll? (:type x))
(map #(activity-type-faults x %) (:type x))
(activity-type-faults x (:type x))))
([x token]
(case token
"Link" (link-faults x)
)))
(defn activity-faults
[x]
(nil-if-empty
(remove empty?
(concat (persistent-object-faults x)
(activity-type-faults x)
(list
(when-not
(has-activity-type? x)
(make-fault-object :must :not-activity-type))
(when-not (string? (:summary x)) (make-fault-object :should :no-summary))
)))))

View file

@ -12,10 +12,11 @@
a hazy relationship to the spec, so this is difficult. I suspect that
I may have to implement a `*strict*` dynamic variable, so that users can
toggle some checks off."
;;(:require [clojure.spec.alpha as s])
(:require [dog-and-duck.quack.picky :refer [filter-severity has-context?
object-faults]])
(:require [dog-and-duck.quack.picky :refer [*reject-severity* actor-faults
filter-severity
has-context?
object-faults persistent-object-faults]])
(:import [java.net URI URISyntaxException]))
;;; Copyright (C) Simon Brooke, 2022
@ -54,7 +55,7 @@
but in samples found in the wild they typically don't."
([x]
(and (map? x) (:type x) true))
(object? x *reject-severity*))
([x severity]
(empty? (filter-severity (object-faults x) severity))))
@ -63,69 +64,16 @@
Transient objects in ActivityPub are not required to have an `id` key, but persistent
ones must have a key, and it must be an IRI (but normally a URI)."
[x]
(try
(and (object? x) (uri? (URI. (:id x))))
(catch URISyntaxException _ false)))
;; (persistent-object? {:type "test" :id "https://mastodon.scot/@barfilfarm"})
(def ^:const actor-types
"The set of types we will accept as actors.
There's an [explicit set of allowed actor types]
(https://www.w3.org/TR/activitystreams-vocabulary/#actor-types)."
#{"Application"
"Group"
"Organization"
"Person"
"Service"})
(defmacro actor-type?
"Return `true` iff the `x` is a recognised actor type, else `false`."
[^String x]
`(if (actor-types ~x) true false))
;; (actor-type? "Group")
(def ^:const verb-types
"The set of types we will accept as verbs.
There's an [explicit set of allowed verb types]
(https://www.w3.org/TR/activitystreams-vocabulary/#activity-types)."
#{"Accept" "Add" "Announce" "Arrive" "Block" "Create" "Delete" "Dislike"
"Flag" "Follow" "Ignore" "Invite" "Join" "Leave" "Like" "Listen" "Move"
"Offer" "Question" "Reject" "Read" "Remove" "TentativeAccept"
"TentativeReject" "Travel" "Undo" "Update" "View"})
(defmacro verb-type?
;; TODO: better as a macro
[^String x]
`(if (verb-types ~x) true false))
([x]
(persistent-object? x *reject-severity*))
([x severity]
(empty? (filter-severity (persistent-object-faults x) severity))))
(defn actor?
"Returns `true` if `x` quacks like an actor, else false.
**NOTE THAT** [Section 4.1 of the spec]
(https://www.w3.org/TR/activitypub/#actor-objects) says explicitly that
> Actor objects MUST have, in addition to the properties mandated by 3.1 Object Identifiers, the following properties:
>
> inbox
> A reference to an [ActivityStreams] OrderedCollection comprised of all the messages received by the actor; see 5.2 Inbox.
> outbox
> An [ActivityStreams] OrderedCollection comprised of all the messages produced by the actor; see 5.1 Outbox.
However, none of the provided examples in the [activitystreams-test-documents repository]() does in fact have these properties"
[x]
(and
(object? x)
(has-context? x)
(uri? (URI. (:inbox x)))
(uri? (URI. (:outbox x)))
(actor-type? (:type x))
true))
"Returns `true` if `x` quacks like an actor, else false."
([x] (actor? x *reject-severity*))
([x severity]
(empty? (filter-severity (actor-faults x) severity))))
(defn actor-or-uri?
"`true` if `x` is either a URI or an actor.
@ -135,10 +83,10 @@
*must be* to an actor object, but before, may only be to a URI pointing to
one."
[x]
(and
(and
(cond (string? x) (uri? (URI. x))
:else (actor? x))
true))
:else (actor? x))
true))
(defn activity?
"`true` iff `x` quacks like an activity, else false."
@ -184,7 +132,7 @@
(:items x) (nil? (:orderedItems x))
(:orderedItems x) (nil? (:items x)) ;; can't have both properties
(integer? (:totalItems x)) true ;; can have neither, provided it has totalItems.
:else false)
:else false)
(object? x)
(= (:type x) object-type)
(if items