Much work on error reporting architecture for validation
This commit is contained in:
parent
5593bb22d1
commit
25795cf364
43 changed files with 2024 additions and 1067 deletions
143
src/dog_and_duck/quack/picky.clj
Normal file
143
src/dog_and_duck/quack/picky.clj
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
(ns dog-and-duck.quack.picky "Fault-finder for ActivityPub documents.
|
||||
|
||||
Generally, each `-faults` function will return:
|
||||
1. `nil` if no faults were found;
|
||||
2. a sequence of fault objects if faults were found.
|
||||
|
||||
Each fault object shall have the properties:
|
||||
1. `:@context` whose value shall be the URL of a
|
||||
document specifying this vocabulary;
|
||||
2. `:type` whose value shall be `Fault`;
|
||||
3. `:severity` whose value shall be one of
|
||||
`minor`, `should`, `must` or `critical`;
|
||||
4. `:fault` whose value shall be a unique token
|
||||
representing the particular fault type;
|
||||
5. `:narrative` whose value shall be a natural
|
||||
language description of the fault type.
|
||||
|
||||
Note that the reason for the `:fault` property is
|
||||
to be able to have a well known place, linked to
|
||||
from the @context URL, which allows narratives
|
||||
for each fault type to be served in as many
|
||||
natural languages as possible.
|
||||
|
||||
The idea further is that it should ultimately be
|
||||
possible to serialise a fault report as a
|
||||
document which in its own right conforms to the
|
||||
ActivityStreams spec."
|
||||
(:require [dog-and-duck.utils.process :refer [pid]]))
|
||||
|
||||
(def ^:const severity
|
||||
"Severity of faults found, as follows:
|
||||
|
||||
1. `:minor` things which I consider to be faults, but which
|
||||
don't actually breach the spec;
|
||||
2. `:should` instances where the spec says something SHOULD
|
||||
be done, which isn't;
|
||||
3. `:must` instances where the spec says something MUST
|
||||
be done, which isn't;
|
||||
4. `:critical` instances where I believe the fault means that
|
||||
the object cannot be meaningfully processed."
|
||||
#{:minor :should :must :critical})
|
||||
|
||||
(def ^:const severity-filters
|
||||
"Hack for implementing a severity hierarchy"
|
||||
{:all #{}
|
||||
:minor #{:minor}
|
||||
:should #{:minor :should}
|
||||
:must #{:minor :should :must}
|
||||
:critical severity})
|
||||
|
||||
(defn filter-severity
|
||||
"Return a list of reports taken from these `reports` where the severity
|
||||
of the report is greater than this `severity`."
|
||||
[reports severity]
|
||||
(assert
|
||||
(and
|
||||
(coll? reports)
|
||||
(every? map? reports)
|
||||
(every? :severity reports)))
|
||||
(remove
|
||||
#((severity-filters severity) (:severity %))
|
||||
reports))
|
||||
|
||||
(def ^:const activitystreams-context-uri
|
||||
"The URI of the context of an ActivityStreams object is expected to be this
|
||||
literal string."
|
||||
"https://www.w3.org/ns/activitystreams")
|
||||
|
||||
(def ^:const validation-fault-context-uri
|
||||
"The URI of the context of a validation fault report object shall be this
|
||||
literal string."
|
||||
"https://simon-brooke.github.io/dog-and-duck/codox/Validation_Faults.html")
|
||||
|
||||
(defn context?
|
||||
"Returns `true` iff `x` quacks like an ActivityStreams context, else false.
|
||||
|
||||
A context is either
|
||||
1. the URI (actually an IRI) `activitystreams-context-uri`, or
|
||||
2. a collection comprising that URI and a map."
|
||||
[x]
|
||||
(cond
|
||||
(nil? x) false
|
||||
(string? x) (and (= x activitystreams-context-uri) true)
|
||||
(coll? x) (and (context? (first (remove map? x)))
|
||||
(= (count x) 2)
|
||||
true)
|
||||
:else false))
|
||||
|
||||
(defmacro has-context?
|
||||
"True if `x` is an ActivityStreams object with a valid context, else `false`."
|
||||
[x]
|
||||
`(context? ((keyword "@context") ~x)))
|
||||
|
||||
|
||||
|
||||
(defn make-fault-object
|
||||
"Return a fault object with these `severity`, `fault` and `narrative` values.
|
||||
|
||||
An ActivityPub object MUST have a globally unique ID. Whether this is
|
||||
meaningful depends on whether we persist fault report objects and serve
|
||||
them, which at present I have no plans to do."
|
||||
[severity fault narrative]
|
||||
(assoc {}
|
||||
(keyword "@context") validation-fault-context-uri
|
||||
:id (str "https://"
|
||||
(.. java.net.InetAddress getLocalHost getHostName)
|
||||
"/fault/"
|
||||
pid
|
||||
":"
|
||||
(inst-ms (java.util.Date.)))
|
||||
:type "Fault"
|
||||
:severity severity
|
||||
:fault fault
|
||||
:narrative narrative))
|
||||
|
||||
(defn object-faults
|
||||
[x]
|
||||
(remove
|
||||
empty?
|
||||
(list
|
||||
(when-not
|
||||
(has-context? x)
|
||||
(make-fault-object
|
||||
:should
|
||||
:no-context
|
||||
"Section 3 of the ActivityPub specification states
|
||||
`Implementers SHOULD include the ActivityPub context in
|
||||
their object definitions`.")
|
||||
(when-not (:type x)
|
||||
(make-fault-object
|
||||
:minor
|
||||
:no-type
|
||||
"The ActivityPub specification states that the `type` field is
|
||||
optional, but it is hard to process objects with no known type."))
|
||||
(when-not (contains? x :id)
|
||||
(make-fault-object
|
||||
:minor
|
||||
:no-id-transient
|
||||
"The ActivityPub specification allows objects without `id` fields
|
||||
only if they are intentionally transient; even so it is preferred
|
||||
that the object should have an explicit null id."
|
||||
))
|
||||
))))
|
||||
|
|
@ -1,6 +1,21 @@
|
|||
(ns dog-and-duck.quack.quack
|
||||
"Validator for ActivityPub objects: if it walks like a duck, and it quacks like a duck..."
|
||||
"Validator for ActivityPub objects: if it walks like a duck, and it quacks
|
||||
like a duck...
|
||||
|
||||
**NOTE THAT the ActivityPub spec
|
||||
[says](https://www.w3.org/TR/activitypub/#obj)
|
||||
|
||||
> Servers SHOULD validate the content they receive to avoid content
|
||||
> spoofing attacks
|
||||
|
||||
but in practice ActivityPub content collected in the wild bears only
|
||||
a hazy relationship to the spec, so this is difficult. I suspect that
|
||||
I may have to implement a `*strict*` dynamic variable, so that users can
|
||||
toggle some checks off."
|
||||
|
||||
;;(:require [clojure.spec.alpha as s])
|
||||
(:require [dog-and-duck.quack.picky :refer [filter-severity has-context?
|
||||
object-faults]])
|
||||
(:import [java.net URI URISyntaxException]))
|
||||
|
||||
;;; Copyright (C) Simon Brooke, 2022
|
||||
|
|
@ -30,9 +45,18 @@
|
|||
But we are *just not having that*, because otherwise we're flying blind.
|
||||
We *shall* reject objects lacking at least `:type`. Missing `:id` keys are
|
||||
tolerable because they represent transient objects, which we expect to
|
||||
handle."
|
||||
[x]
|
||||
handle.
|
||||
|
||||
**NOTE THAT** The ActivityPub spec [says](https://www.w3.org/TR/activitypub/#obj)
|
||||
|
||||
> Implementers SHOULD include the ActivityPub context in their object
|
||||
> definitions
|
||||
|
||||
but in samples found in the wild they typically don't."
|
||||
([x]
|
||||
(and (map? x) (:type x) true))
|
||||
([x severity]
|
||||
(empty? (filter-severity (object-faults x) severity))))
|
||||
|
||||
(defn persistent-object?
|
||||
"`true` iff `x` is a persistent object.
|
||||
|
|
@ -44,7 +68,7 @@
|
|||
(and (object? x) (uri? (URI. (:id x))))
|
||||
(catch URISyntaxException _ false)))
|
||||
|
||||
(persistent-object? {:type "test" :id "https://mastodon.scot/@barfilfarm"})
|
||||
;; (persistent-object? {:type "test" :id "https://mastodon.scot/@barfilfarm"})
|
||||
|
||||
(def ^:const actor-types
|
||||
"The set of types we will accept as actors.
|
||||
|
|
@ -57,10 +81,12 @@
|
|||
"Person"
|
||||
"Service"})
|
||||
|
||||
(defn actor-type?
|
||||
;; TODO: better as a macro
|
||||
[x]
|
||||
(if (actor-types x) true false))
|
||||
(defmacro actor-type?
|
||||
"Return `true` iff the `x` is a recognised actor type, else `false`."
|
||||
[^String x]
|
||||
`(if (actor-types ~x) true false))
|
||||
|
||||
;; (actor-type? "Group")
|
||||
|
||||
(def ^:const verb-types
|
||||
"The set of types we will accept as verbs.
|
||||
|
|
@ -72,47 +98,14 @@
|
|||
"Offer" "Question" "Reject" "Read" "Remove" "TentativeAccept"
|
||||
"TentativeReject" "Travel" "Undo" "Update" "View"})
|
||||
|
||||
(defn verb-type?
|
||||
(defmacro verb-type?
|
||||
;; TODO: better as a macro
|
||||
[x]
|
||||
(if (verb-types x) true false))
|
||||
[^String x]
|
||||
`(if (verb-types ~x) true false))
|
||||
|
||||
(def ^:const activitystreams-context-uri
|
||||
"The URI of the context of an ActivityStreams object is expected to be this
|
||||
literal string."
|
||||
"https://www.w3.org/ns/activitystreams")
|
||||
|
||||
(defn context?
|
||||
"Returns `true` iff `x` quacks like an ActivityStreams context, else false.
|
||||
|
||||
A context is either
|
||||
1. the URI (actually an IRI) `activitystreams-context-uri`, or
|
||||
2. a collection comprising that URI and a map."
|
||||
[x]
|
||||
(cond
|
||||
(nil? x) false
|
||||
(string? x) (and (= x activitystreams-context-uri) true)
|
||||
(coll? x) (and (context? (first (remove map? x)))
|
||||
(= (count x) 2)
|
||||
true)
|
||||
:else false))
|
||||
|
||||
(defmacro has-context? [x]
|
||||
`(context? ((keyword "@context") ~x)))
|
||||
|
||||
(defn actor?
|
||||
"Returns `true` if `x` quacks like an actor, else false."
|
||||
[x]
|
||||
(and
|
||||
(object? x)
|
||||
(has-context? x)
|
||||
(uri? (URI. (:inbox x)))
|
||||
(uri? (URI. (:outbox x)))
|
||||
(actor-type? (:type x))
|
||||
true))
|
||||
|
||||
(defn activity?
|
||||
"`true` iff `x` quacks like an activity, else false.
|
||||
"Returns `true` if `x` quacks like an actor, else false.
|
||||
|
||||
**NOTE THAT** [Section 4.1 of the spec]
|
||||
(https://www.w3.org/TR/activitypub/#actor-objects) says explicitly that
|
||||
|
|
@ -126,11 +119,35 @@
|
|||
|
||||
However, none of the provided examples in the [activitystreams-test-documents repository]() does in fact have these properties"
|
||||
[x]
|
||||
(and
|
||||
(object? x)
|
||||
(has-context? x)
|
||||
(uri? (URI. (:inbox x)))
|
||||
(uri? (URI. (:outbox x)))
|
||||
(actor-type? (:type x))
|
||||
true))
|
||||
|
||||
(defn actor-or-uri?
|
||||
"`true` if `x` is either a URI or an actor.
|
||||
|
||||
**TODO**: I need to decide about whether to reify referenced objects
|
||||
before validation or after. After reification, every reference to an actor
|
||||
*must be* to an actor object, but before, may only be to a URI pointing to
|
||||
one."
|
||||
[x]
|
||||
(and
|
||||
(cond (string? x) (uri? (URI. x))
|
||||
:else (actor? x))
|
||||
true))
|
||||
|
||||
(defn activity?
|
||||
"`true` iff `x` quacks like an activity, else false."
|
||||
[x]
|
||||
(try
|
||||
(and (object? x)
|
||||
(has-context? x)
|
||||
(string? (:summary x))
|
||||
(actor? (:actor x))
|
||||
(actor-or-uri? (:actor x))
|
||||
(verb-type? (:type x))
|
||||
(or (object? (:object x)) (uri? (URI. (:object x))))
|
||||
true)
|
||||
|
|
@ -156,35 +173,46 @@
|
|||
true))
|
||||
|
||||
(defn collection?
|
||||
"`true` iff `x` quacks like a collection of type `type`, else `false`.
|
||||
"`true` iff `x` quacks like a collection of type `object-type`, else `false`.
|
||||
|
||||
With one argument, will recognise plain collections and ordered collections,
|
||||
but (currently) not collection pages."
|
||||
([x type]
|
||||
([x ^String object-type]
|
||||
(let [items (or (:items x) (:orderedItems x))]
|
||||
(and
|
||||
(cond
|
||||
(:items x) (nil? (:orderedItems x))
|
||||
(:orderedItems x) (nil? (:items x))) ;; can't have both properties
|
||||
(:orderedItems x) (nil? (:items x)) ;; can't have both properties
|
||||
(integer? (:totalItems x)) true ;; can have neither, provided it has totalItems.
|
||||
:else false)
|
||||
(object? x)
|
||||
(= (:type x) type)
|
||||
(coll? items)
|
||||
(every? object? items)
|
||||
(integer? (:totalItems x))
|
||||
true)))
|
||||
(= (:type x) object-type)
|
||||
(if items
|
||||
(and (coll? items)
|
||||
(every? object? items) ;; if there are items, they must form a
|
||||
;; collection of objects.
|
||||
true)
|
||||
true) ;; but it's OK if there aren't.
|
||||
true)
|
||||
;; test for totalItems not done here, because collection pages don't
|
||||
;; have it.
|
||||
))
|
||||
([x]
|
||||
(or (collection? x "Collection")
|
||||
(collection? x "OrderedCollection"))))
|
||||
(and
|
||||
(or (collection? x "Collection")
|
||||
(collection? x "OrderedCollection"))
|
||||
(integer? (:totalItems x))
|
||||
true)))
|
||||
|
||||
(defn unordered-collection?
|
||||
"`true` iff `x` quacks like an unordered collection, else `false`."
|
||||
[x]
|
||||
(collection? x "Collection"))
|
||||
(and (collection? x "Collection") (integer? (:totalItems x)) true))
|
||||
|
||||
(defn ordered-collection?
|
||||
"`true` iff `x` quacks like an ordered collection, else `false`."
|
||||
[x]
|
||||
(collection? x "OrderedCollection"))
|
||||
(and (collection? x "OrderedCollection") (integer? (:totalItems x)) true))
|
||||
|
||||
(defn collection-page?
|
||||
"`true` iff `x` quacks like a page in a paged collection, else `false`."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue