Much work on error reporting architecture for validation

This commit is contained in:
Simon Brooke 2022-12-21 17:30:26 +00:00
parent 5593bb22d1
commit 25795cf364
43 changed files with 2024 additions and 1067 deletions

View file

@ -0,0 +1,143 @@
(ns dog-and-duck.quack.picky "Fault-finder for ActivityPub documents.
Generally, each `-faults` function will return:
1. `nil` if no faults were found;
2. a sequence of fault objects if faults were found.
Each fault object shall have the properties:
1. `:@context` whose value shall be the URL of a
document specifying this vocabulary;
2. `:type` whose value shall be `Fault`;
3. `:severity` whose value shall be one of
`minor`, `should`, `must` or `critical`;
4. `:fault` whose value shall be a unique token
representing the particular fault type;
5. `:narrative` whose value shall be a natural
language description of the fault type.
Note that the reason for the `:fault` property is
to be able to have a well known place, linked to
from the @context URL, which allows narratives
for each fault type to be served in as many
natural languages as possible.
The idea further is that it should ultimately be
possible to serialise a fault report as a
document which in its own right conforms to the
ActivityStreams spec."
(:require [dog-and-duck.utils.process :refer [pid]]))
(def ^:const severity
"Severity of faults found, as follows:
1. `:minor` things which I consider to be faults, but which
don't actually breach the spec;
2. `:should` instances where the spec says something SHOULD
be done, which isn't;
3. `:must` instances where the spec says something MUST
be done, which isn't;
4. `:critical` instances where I believe the fault means that
the object cannot be meaningfully processed."
#{:minor :should :must :critical})
(def ^:const severity-filters
"Hack for implementing a severity hierarchy"
{:all #{}
:minor #{:minor}
:should #{:minor :should}
:must #{:minor :should :must}
:critical severity})
(defn filter-severity
"Return a list of reports taken from these `reports` where the severity
of the report is greater than this `severity`."
[reports severity]
(assert
(and
(coll? reports)
(every? map? reports)
(every? :severity reports)))
(remove
#((severity-filters severity) (:severity %))
reports))
(def ^:const activitystreams-context-uri
"The URI of the context of an ActivityStreams object is expected to be this
literal string."
"https://www.w3.org/ns/activitystreams")
(def ^:const validation-fault-context-uri
"The URI of the context of a validation fault report object shall be this
literal string."
"https://simon-brooke.github.io/dog-and-duck/codox/Validation_Faults.html")
(defn context?
"Returns `true` iff `x` quacks like an ActivityStreams context, else false.
A context is either
1. the URI (actually an IRI) `activitystreams-context-uri`, or
2. a collection comprising that URI and a map."
[x]
(cond
(nil? x) false
(string? x) (and (= x activitystreams-context-uri) true)
(coll? x) (and (context? (first (remove map? x)))
(= (count x) 2)
true)
:else false))
(defmacro has-context?
"True if `x` is an ActivityStreams object with a valid context, else `false`."
[x]
`(context? ((keyword "@context") ~x)))
(defn make-fault-object
"Return a fault object with these `severity`, `fault` and `narrative` values.
An ActivityPub object MUST have a globally unique ID. Whether this is
meaningful depends on whether we persist fault report objects and serve
them, which at present I have no plans to do."
[severity fault narrative]
(assoc {}
(keyword "@context") validation-fault-context-uri
:id (str "https://"
(.. java.net.InetAddress getLocalHost getHostName)
"/fault/"
pid
":"
(inst-ms (java.util.Date.)))
:type "Fault"
:severity severity
:fault fault
:narrative narrative))
(defn object-faults
[x]
(remove
empty?
(list
(when-not
(has-context? x)
(make-fault-object
:should
:no-context
"Section 3 of the ActivityPub specification states
`Implementers SHOULD include the ActivityPub context in
their object definitions`.")
(when-not (:type x)
(make-fault-object
:minor
:no-type
"The ActivityPub specification states that the `type` field is
optional, but it is hard to process objects with no known type."))
(when-not (contains? x :id)
(make-fault-object
:minor
:no-id-transient
"The ActivityPub specification allows objects without `id` fields
only if they are intentionally transient; even so it is preferred
that the object should have an explicit null id."
))
))))

View file

@ -1,6 +1,21 @@
(ns dog-and-duck.quack.quack
"Validator for ActivityPub objects: if it walks like a duck, and it quacks like a duck..."
"Validator for ActivityPub objects: if it walks like a duck, and it quacks
like a duck...
**NOTE THAT the ActivityPub spec
[says](https://www.w3.org/TR/activitypub/#obj)
> Servers SHOULD validate the content they receive to avoid content
> spoofing attacks
but in practice ActivityPub content collected in the wild bears only
a hazy relationship to the spec, so this is difficult. I suspect that
I may have to implement a `*strict*` dynamic variable, so that users can
toggle some checks off."
;;(:require [clojure.spec.alpha as s])
(:require [dog-and-duck.quack.picky :refer [filter-severity has-context?
object-faults]])
(:import [java.net URI URISyntaxException]))
;;; Copyright (C) Simon Brooke, 2022
@ -30,9 +45,18 @@
But we are *just not having that*, because otherwise we're flying blind.
We *shall* reject objects lacking at least `:type`. Missing `:id` keys are
tolerable because they represent transient objects, which we expect to
handle."
[x]
handle.
**NOTE THAT** The ActivityPub spec [says](https://www.w3.org/TR/activitypub/#obj)
> Implementers SHOULD include the ActivityPub context in their object
> definitions
but in samples found in the wild they typically don't."
([x]
(and (map? x) (:type x) true))
([x severity]
(empty? (filter-severity (object-faults x) severity))))
(defn persistent-object?
"`true` iff `x` is a persistent object.
@ -44,7 +68,7 @@
(and (object? x) (uri? (URI. (:id x))))
(catch URISyntaxException _ false)))
(persistent-object? {:type "test" :id "https://mastodon.scot/@barfilfarm"})
;; (persistent-object? {:type "test" :id "https://mastodon.scot/@barfilfarm"})
(def ^:const actor-types
"The set of types we will accept as actors.
@ -57,10 +81,12 @@
"Person"
"Service"})
(defn actor-type?
;; TODO: better as a macro
[x]
(if (actor-types x) true false))
(defmacro actor-type?
"Return `true` iff the `x` is a recognised actor type, else `false`."
[^String x]
`(if (actor-types ~x) true false))
;; (actor-type? "Group")
(def ^:const verb-types
"The set of types we will accept as verbs.
@ -72,47 +98,14 @@
"Offer" "Question" "Reject" "Read" "Remove" "TentativeAccept"
"TentativeReject" "Travel" "Undo" "Update" "View"})
(defn verb-type?
(defmacro verb-type?
;; TODO: better as a macro
[x]
(if (verb-types x) true false))
[^String x]
`(if (verb-types ~x) true false))
(def ^:const activitystreams-context-uri
"The URI of the context of an ActivityStreams object is expected to be this
literal string."
"https://www.w3.org/ns/activitystreams")
(defn context?
"Returns `true` iff `x` quacks like an ActivityStreams context, else false.
A context is either
1. the URI (actually an IRI) `activitystreams-context-uri`, or
2. a collection comprising that URI and a map."
[x]
(cond
(nil? x) false
(string? x) (and (= x activitystreams-context-uri) true)
(coll? x) (and (context? (first (remove map? x)))
(= (count x) 2)
true)
:else false))
(defmacro has-context? [x]
`(context? ((keyword "@context") ~x)))
(defn actor?
"Returns `true` if `x` quacks like an actor, else false."
[x]
(and
(object? x)
(has-context? x)
(uri? (URI. (:inbox x)))
(uri? (URI. (:outbox x)))
(actor-type? (:type x))
true))
(defn activity?
"`true` iff `x` quacks like an activity, else false.
"Returns `true` if `x` quacks like an actor, else false.
**NOTE THAT** [Section 4.1 of the spec]
(https://www.w3.org/TR/activitypub/#actor-objects) says explicitly that
@ -126,11 +119,35 @@
However, none of the provided examples in the [activitystreams-test-documents repository]() does in fact have these properties"
[x]
(and
(object? x)
(has-context? x)
(uri? (URI. (:inbox x)))
(uri? (URI. (:outbox x)))
(actor-type? (:type x))
true))
(defn actor-or-uri?
"`true` if `x` is either a URI or an actor.
**TODO**: I need to decide about whether to reify referenced objects
before validation or after. After reification, every reference to an actor
*must be* to an actor object, but before, may only be to a URI pointing to
one."
[x]
(and
(cond (string? x) (uri? (URI. x))
:else (actor? x))
true))
(defn activity?
"`true` iff `x` quacks like an activity, else false."
[x]
(try
(and (object? x)
(has-context? x)
(string? (:summary x))
(actor? (:actor x))
(actor-or-uri? (:actor x))
(verb-type? (:type x))
(or (object? (:object x)) (uri? (URI. (:object x))))
true)
@ -156,35 +173,46 @@
true))
(defn collection?
"`true` iff `x` quacks like a collection of type `type`, else `false`.
"`true` iff `x` quacks like a collection of type `object-type`, else `false`.
With one argument, will recognise plain collections and ordered collections,
but (currently) not collection pages."
([x type]
([x ^String object-type]
(let [items (or (:items x) (:orderedItems x))]
(and
(cond
(:items x) (nil? (:orderedItems x))
(:orderedItems x) (nil? (:items x))) ;; can't have both properties
(:orderedItems x) (nil? (:items x)) ;; can't have both properties
(integer? (:totalItems x)) true ;; can have neither, provided it has totalItems.
:else false)
(object? x)
(= (:type x) type)
(coll? items)
(every? object? items)
(integer? (:totalItems x))
true)))
(= (:type x) object-type)
(if items
(and (coll? items)
(every? object? items) ;; if there are items, they must form a
;; collection of objects.
true)
true) ;; but it's OK if there aren't.
true)
;; test for totalItems not done here, because collection pages don't
;; have it.
))
([x]
(or (collection? x "Collection")
(collection? x "OrderedCollection"))))
(and
(or (collection? x "Collection")
(collection? x "OrderedCollection"))
(integer? (:totalItems x))
true)))
(defn unordered-collection?
"`true` iff `x` quacks like an unordered collection, else `false`."
[x]
(collection? x "Collection"))
(and (collection? x "Collection") (integer? (:totalItems x)) true))
(defn ordered-collection?
"`true` iff `x` quacks like an ordered collection, else `false`."
[x]
(collection? x "OrderedCollection"))
(and (collection? x "OrderedCollection") (integer? (:totalItems x)) true))
(defn collection-page?
"`true` iff `x` quacks like a page in a paged collection, else `false`."