001 (ns dog-and-duck.quack.picky "Fault-finder for ActivityPub documents.
002
003 Generally, each `-faults` function will return:
004
005 1. `nil` if no faults were found;
006 2. a sequence of fault objects if faults were found.
007
008 Each fault object shall have the properties:
009
010 1. `:@context` whose value shall be the URL of a
011 document specifying this vocabulary;
012 2. `:type` whose value shall be `Fault`;
013 3. `:severity` whose value shall be one of
014 `info`, `minor`, `should`, `must` or `critical`;
015 4. `:fault` whose value shall be a unique token
016 representing the particular fault type;
017 5. `:narrative` whose value shall be a natural
018 language description of the fault type.
019
020 Note that the reason for the `:fault` property is
021 to be able to have a well known place, linked to
022 from the @context URL, which allows narratives
023 for each fault type to be served in as many
024 natural languages as possible.
025
026 The idea further is that it should ultimately be
027 possible to serialise a fault report as a
028 document which in its own right conforms to the
029 ActivityStreams spec."
030 (:require [clojure.set :refer [intersection]]
031 [dog-and-duck.quack.fault-messages :refer [messages]]
032 [dog-and-duck.utils.process :refer [get-hostname get-pid]]
033 [taoensso.timbre :as timbre
034 ;; Optional, just refer what you like:
035 :refer [warn]]
036 [clojure.data.json :as json])
037 (:import [java.net URI URISyntaxException]))
038
039 ;;; Copyright (C) Simon Brooke, 2022
040
041 ;;; This program is free software; you can redistribute it and/or
042 ;;; modify it under the terms of the GNU General Public License
043 ;;; as published by the Free Software Foundation; either version 2
044 ;;; of the License, or (at your option) any later version.
045
046 ;;; This program is distributed in the hope that it will be useful,
047 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
048 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
049 ;;; GNU General Public License for more details.
050
051 ;;; You should have received a copy of the GNU General Public License
052 ;;; along with this program; if not, write to the Free Software
053 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
054
055 ;; ERRATA
056
057 (def ^:dynamic *reify-refs*
058 "If `true`, references to objects in fields will be reified and validated.
059 If `false`, they won't, but an `:info` level fault report will be generated.
060
061 There are several things in the spec which, in a document, may correctly be
062 either
063
064 1. a fully fleshed out object, or
065 2. a URI pointing to such an object.
066
067 Obviously to fully validate a document we ought to reify all the refs and
068 check that they are themselves valid, but
069
070 a. in some of the published test documents the URIs do not reference a
071 valid document;
072 b. there will be performance costs to reifying all the refs;
073 c. in perverse cases, reifying refs might result in runaway recursion.
074
075 TODO: I think that in production this should default to `true`."
076 false)
077
078 (def ^:dynamic *reject-severity*
079 "The severity at which the binary validator will return `false`.
080
081 In practice documents seen in the wild do not typically appear to be
082 fully valid, and this does not matter. This allows the sensitivity of
083 the binary validator (`dog-and-duck.quack.quack`) to be tuned. It's in
084 this (`dog-and-duck.quack.picky`) namespace, not that one, because this
085 namespace is where concerns about severity are handled."
086 :must)
087
088 (def ^:const context-key
089 "The Clojure reader barfs on `:@context`, although it is in principle a valid
090 keyword. So we'll make it once, here, to make the code more performant and
091 easier to read."
092 (keyword "@context"))
093
094 (def ^:const severity
095 "Severity of faults found, as follows:
096
097 0. `:info` not actually a fault, but an issue noted during validation;
098 1. `:minor` things which I consider to be faults, but which
099 don't actually breach the spec;
100 2. `:should` instances where the spec says something SHOULD
101 be done, which isn't;
102 3. `:must` instances where the spec says something MUST
103 be done, which isn't;
104 4. `:critical` instances where I believe the fault means that
105 the object cannot be meaningfully processed."
106 #{:info :minor :should :must :critical})
107
108 (def ^:const severity-filters
109 "Hack for implementing a severity hierarchy"
110 {:all #{}
111 :info #{}
112 :minor #{:info}
113 :should #{:info :minor}
114 :must #{:info :minor :should}
115 :critical severity})
116
117 (defn truthy?
118 "Return `true` if `x` is truthy, else `false`."
119 [x]
120 (if x true false))
121
122 (defn has-type?
123 "Return `true` if object `x` has type `type`, else `false`.
124
125 The values of `type` fields of ActivityStreams objects may be lists; they
126 are considered to have a type if the type token is a member of the list."
127 [x type]
128 (assert (map? x) (string? type))
129 (let [tv (:type x)]
130 (cond
131 (coll? tv) (truthy? (not-empty (filter #(= % type) tv)))
132 :else (= tv type))))
133
134 (defn filter-severity
135 "Return a list of reports taken from these `reports` where the severity
136 of the report is greater than this or equal to this `severity`."
137 [reports severity]
138 (cond (nil? reports) nil
139 (and
140 (coll? reports)
141 (every? map? reports)
142 (every? :severity reports)) (remove
143 #((severity-filters severity) (:severity %))
144 reports)
145 :else
146 (throw
147 (ex-info
148 "Argument `reports` was not a collection of fault reports"
149 {:arguments {:reports reports
150 :severity severity}}))))
151
152 (def ^:const activitystreams-context-uri
153 "The URI of the context of an ActivityStreams object is expected to be this
154 literal string."
155 "https://www.w3.org/ns/activitystreams")
156
157 (def ^:const validation-fault-context-uri
158 "The URI of the context of a validation fault report object shall be this
159 literal string."
160 "https://simon-brooke.github.io/dog-and-duck/codox/Validation_Faults.html")
161
162 (defn context?
163 "Returns `true` iff `x` quacks like an ActivityStreams context, else false.
164
165 A context is either
166 1. the URI (actually an IRI) `activitystreams-context-uri`, or
167 2. a collection comprising that URI and a map."
168 [x]
169 (cond
170 (nil? x) false
171 (string? x) (and (= x activitystreams-context-uri) true)
172 (coll? x) (and (context? (first (remove map? x)))
173 (= (count x) 2)
174 true)
175 :else false))
176
177 (defmacro has-context?
178 "True if `x` is an ActivityStreams object with a valid context, else `false`."
179 [x]
180 `(context? (context-key ~x)))
181
182 (defn make-fault-object
183 "Return a fault object with these `severity`, `fault` and `narrative` values.
184
185 An ActivityPub object MUST have a globally unique ID. Whether this is
186 meaningful depends on whether we persist fault report objects and serve
187 them, which at present I have no plans to do."
188 ;; TODO: should not pass in the narrative; instead should use the :fault value
189 ;; to look up the narrative in a resource file.
190 [severity fault]
191 (assoc {}
192 context-key validation-fault-context-uri
193 :id (str "https://"
194 (get-hostname)
195 "/fault/"
196 (get-pid)
197 ":"
198 (inst-ms (java.util.Date.)))
199 :type "Fault"
200 :severity severity
201 :fault fault
202 :narrative (or (messages fault)
203 (do
204 (warn "No narrative provided for fault token " fault)
205 (str fault)))))
206
207 (defmacro nil-if-empty
208 "if `x` is an empty collection, return `nil`; else return `x`."
209 [x]
210 `(if (and (coll? ~x) (empty? ~x)) nil
211 ~x))
212
213 (defn has-type-or-fault
214 "If object `x` has a `:type` value which is `acceptable`, return `nil`;
215 else return a fault object with this `severity` and `token`.
216
217 `acceptable` may be passed as either nil, a string, or a set of strings.
218 If `acceptable` is `nil`, no type specific tests will be performed."
219 [x acceptable severity token]
220 (when acceptable
221 (let [tv (:type x)]
222 (when-not
223 (cond
224 (and (string? tv) (string? acceptable)) (= tv acceptable)
225 (and (string? tv) (set? acceptable)) (acceptable tv)
226 (and (coll? tv) (string? acceptable)) ((set tv) acceptable)
227 (and (coll? tv) (set? acceptable)) (not-empty
228 (intersection (set tv) acceptable))
229 :else
230 (throw (ex-info "Type value or `acceptable` argument not as expected."
231 {:arguments {:x x
232 :acceptable acceptable
233 :severity severity
234 :token token}})))
235 (make-fault-object severity token)))))
236
237 (defn object-faults
238 "Return a list of faults found in object `x`, or `nil` if none are.
239
240 If `expected-type` is also passed, verify that `x` has `expected-type`.
241 `expected-type` may be passed as a string or as a set of strings."
242 ([x]
243 (nil-if-empty
244 (remove empty?
245 (list
246 (when-not (map? x)
247 (make-fault-object :critical :not-an-object))
248 (when-not
249 (has-context? x)
250 (make-fault-object :should :no-context))
251 (when-not (:type x)
252 (make-fault-object :minor :no-type))
253 (when-not (and (map? x) (contains? x :id))
254 (make-fault-object :minor :no-id-transient))))))
255 ([x expected-type]
256 (nil-if-empty
257 (remove empty?
258 (concat
259 (object-faults x)
260 (list
261 ;; TODO: should resolve the correct `-faults`function for the
262 ;; `expected-type` and call that; but that's for later.
263 (has-type-or-fault x expected-type :critical :unexpected-type)))))))
264
265 (defn uri-or-fault
266 "If `u` is not a valid URI, return a fault object with this `severity` and
267 `if-invalid-token`. If it's `nil`, return a fault object with this `severity`
268 and `if-missing-token`. Otherwise return nil."
269 ([u severity if-missing-token]
270 (uri-or-fault u severity if-missing-token if-missing-token))
271 ([u severity if-missing-token if-invalid-token]
272 (try
273 (if (uri? (URI. u))
274 nil
275 (make-fault-object severity if-invalid-token))
276 (catch URISyntaxException _
277 (make-fault-object severity if-invalid-token))
278 (catch NullPointerException _
279 (make-fault-object severity if-missing-token)))))
280
281 (defn persistent-object-faults
282 "Return a list of faults found in persistent object `x`, or `nil` if none are."
283 [x]
284 (nil-if-empty
285 (remove empty?
286 (concat
287 (object-faults x)
288 (list
289 (if (contains? x :id)
290 (try (let [id (URI. (:id x))]
291 (when-not (= (.getScheme id) "https")
292 (make-fault-object :should :id-not-https)))
293 (catch URISyntaxException _
294 (make-fault-object :must :id-not-uri))
295 (catch NullPointerException _
296 (make-fault-object :must :null-id-persistent)))
297 (make-fault-object :must :no-id-persistent)))))))
298
299 (def ^:const actor-types
300 "The set of types we will accept as actors.
301
302 There's an [explicit set of allowed actor types]
303 (https://www.w3.org/TR/activitystreams-vocabulary/#actor-types)."
304 #{"Application"
305 "Group"
306 "Organization"
307 "Person"
308 "Service"})
309
310 (defn actor-type?
311 "Return `true` if the `x` is a recognised actor type, else `false`."
312 [^String x]
313 (if (actor-types x) true false))
314
315 (defn has-actor-type?
316 "Return `true` if the object `x` has a type which is an actor type, else
317 `false`."
318 [x]
319 (let [tv (:type x)]
320 (cond
321 (coll? tv) (truthy? (not-empty (filter actor-type? tv)))
322 :else (actor-type? tv))))
323
324 (defn actor-faults
325 "Return a list of faults found in actor `x`, or `nil` if none are."
326 [x]
327 (nil-if-empty
328 (remove empty?
329 (concat (persistent-object-faults x)
330 (list
331 (when-not (has-actor-type? x)
332 (make-fault-object :must :not-actor-type))
333 (uri-or-fault
334 (:inbox x) :must :no-inbox :invalid-inbox-uri)
335 (uri-or-fault
336 (:outbox x) :must :no-outbox :invalid-outbox-uri))))))
337
338 (def ^:const verb-types
339 "The set of types we will accept as verbs.
340
341 There's an [explicit set of allowed verb types]
342 (https://www.w3.org/TR/activitystreams-vocabulary/#activity-types)."
343 #{"Accept" "Add" "Announce" "Arrive" "Block" "Create" "Delete" "Dislike"
344 "Flag" "Follow" "Ignore" "Invite" "Join" "Leave" "Like" "Listen" "Move"
345 "Offer" "Question" "Reject" "Read" "Remove" "TentativeAccept"
346 "TentativeReject" "Travel" "Undo" "Update" "View"})
347
348 (defn verb-type?
349 "`true` if `x`, a string, represents a recognised ActivityStreams activity
350 type."
351 [^String x]
352 (if (verb-types x) true false))
353
354 (defn has-activity-type?
355 "Return `true` if the object `x` has a type which is an activity type, else
356 `false`."
357 [x]
358 (let [tv (:type x)]
359 (cond
360 (coll? tv) (truthy? (not-empty (filter verb-type? tv)))
361 :else (actor-type? tv))))
362
363 (defn string-or-fault
364 "If this `value` is not a string, return a fault object with this `severity`
365 and `token`, else `nil`. If `pattern` is also passed, it is expected to be
366 a Regex, and the fault object will be returned unless `value` matches the
367 `pattern`."
368 ([value severity token]
369 (when-not (string? value) (make-fault-object severity token)))
370 ([value severity token pattern]
371 (when not (and (string? value) (re-matches pattern value))
372 (make-fault-object severity token))))
373
374 (defn link-faults
375 "A link object is required to have an `href` property. It may have all of
376 `rel` | `mediaType` | `name` | `hreflang` | `height` | `width` | `preview`
377 but I *think* they're all optional."
378 [x]
379 (list
380 (uri-or-fault
381 (:href x) :must :no-href-uri :invalid-href-uri)
382 (string-or-fault (:mediaType x) :minor :no-media-type #"\w+\/[-+.\w]+")
383 ;; TODO: possibly more here. Audit against the specs
384 ))
385
386 (defn object-reference-or-faults
387 "If this `value` is either
388
389 1. an object of `expected-type`;
390 2. a URI referencing an object of `expected-type`; or
391 3. a link object referencing an object of `expected-type`
392
393 and no faults are returned from validating the linked object, then return
394 `nil`; else return a sequence comprising a fault object with this `severity`
395 and `token`, prepended to the faults returned.
396
397 As with `has-type-or-fault` (q.v.), `expected-type` may be passed as a
398 string or as a set of strings.
399
400 **NOTE THAT** if `*reify-refs*` is `false`, referenced objects will not
401 actually be checked."
402 [value expected-type severity token]
403 (let [faults (cond
404 (string? value) (try (let [uri (URI. value)
405 object (when *reify-refs*
406 (json/read-str (slurp uri)))]
407 (when object
408 (object-faults object expected-type)))
409 (catch URISyntaxException _
410 (make-fault-object severity token)))
411 (map? value) (if (has-type? value "Link")
412 (cond
413 ;; if we were looking for a link and we've
414 ;; found a link, that's OK.
415 (= expected-type "Link") nil
416 (and (set? expected-type) (expected-type "Link")) nil
417 :else
418 (object-reference-or-faults
419 (:href value) expected-type severity token))
420 (object-faults value expected-type))
421 :else (throw
422 (ex-info
423 "Argument `value` was not an object or a link to an object"
424 {:arguments {:value value}
425 :expected-type expected-type
426 :severity severity
427 :token token})))]
428 (when faults (cons (make-fault-object severity token) faults))))
429
430 (defn link-faults
431 "Return a list of faults found in the link `x`, or `nil` if none are found."
432 [x]
433 (object-reference-or-faults x "Link" :critical :expected-link))
434
435 (defn coll-object-reference-or-fault
436 "As object-reference-or-fault, except `value` argument may also be a list of
437 objects and/or object references."
438 [value expected-type severity token]
439 (cond
440 (map? value) (object-reference-or-faults value expected-type severity token)
441 (coll? value) (nil-if-empty
442 (remove nil?
443 (reduce concat
444 (map
445 #(object-reference-or-faults
446 % expected-type severity token)
447 value))))
448 :else (throw
449 (ex-info
450 "Argument `value` was not an object, a link to an object, nor a list of these."
451 {:arguments {:value value}
452 :expected-type expected-type
453 :severity severity
454 :token token}))))
455
456 (def ^:const base-activity-required-properties
457 "Properties most activities should have. Values are validating functions, each.
458
459 See https://www.w3.org/TR/activitystreams-vocabulary/#dfn-activity"
460 {:summary (fn [v] (when-not (string? v)
461 (list (make-fault-object :should :no-summary))))
462 :actor (fn [v] (object-reference-or-faults v actor-types :must :no-actor))
463 :object (fn [v] (object-reference-or-faults v nil :must :no-object))})
464
465 (def ^:const intransitive-activity-required-properties
466 "Properties intransitive activities should have.
467
468 See https://www.w3.org/TR/activitystreams-vocabulary/#dfn-intransitiveactivity"
469 (dissoc base-activity-required-properties :object))
470
471 (def ^:const accept-required-properties
472 "As base-activity-required-properties, except that the type of the object
473 is restricted."
474 (assoc base-activity-required-properties
475 :object
476 (fn [v]
477 (object-reference-or-faults v #{"Invite" "Person"}
478 :must
479 :bad-accept-target))))
480
481 (def ^:const activity-required-properties
482 "Properties activities should have, keyed by activity type. Values are maps
483 of the format of `base-activity-required-properties`, q.v."
484 {"Accept" accept-required-properties
485 "Add" base-activity-required-properties
486 "Announce" base-activity-required-properties
487 "Arrive" intransitive-activity-required-properties
488 ;; TODO: is `:location` required for arrive?
489 "Block" base-activity-required-properties
490 "Create" base-activity-required-properties
491 "Delete" base-activity-required-properties
492 "Dislike" base-activity-required-properties
493 "Flag" base-activity-required-properties
494 "Follow" base-activity-required-properties
495 ;; TODO: is `:object` required to be an actor?
496 "Ignore" base-activity-required-properties
497 "Invite" (assoc base-activity-required-properties :target
498 (fn [v]
499 (coll-object-reference-or-fault v #{"Event" "Group"}
500 :must
501 :bad-accept-target)))
502 ;; TODO: are here other things one could meaningfully be invited to?
503 "Join" base-activity-required-properties
504 "Leave" base-activity-required-properties
505 "Like" base-activity-required-properties
506 "Listen" base-activity-required-properties
507 "Move" base-activity-required-properties
508 "Offer" base-activity-required-properties
509 "Question" intransitive-activity-required-properties
510 "Reject" base-activity-required-properties
511 "Read" base-activity-required-properties
512 "Remove" base-activity-required-properties
513 "TentativeReject" base-activity-required-properties
514 "TentativeAccept" accept-required-properties
515 "Travel" base-activity-required-properties
516 "Undo" base-activity-required-properties
517 "Update" base-activity-required-properties
518 "View" base-activity-required-properties})
519
520 (defn activity-type-faults
521 "Return a list of faults found in the activity `x`; if `type` is also
522 specified, it should be a string naming a specific activity type for
523 which checks should be performed.
524
525 Some specific activity types have specific requirements which are not
526 requirements."
527 ([x]
528 (if (coll? (:type x))
529 (map #(activity-type-faults x %) (:type x))
530 (activity-type-faults x (:type x))))
531 ([x type]
532 (let [checks (activity-required-properties type)]
533 (map
534 #(apply (checks %) (x %))
535 (keys checks)))))
536
537 (defn activity-faults
538 [x]
539 (nil-if-empty
540 (remove empty?
541 (concat (persistent-object-faults x)
542 (activity-type-faults x)
543 (list
544 (when-not
545 (has-activity-type? x)
546 (make-fault-object :must :not-activity-type))
547 (when-not (string? (:summary x)) (make-fault-object :should :no-summary)))))))