From bfd7304da129dd6ce69a3885800595ff67ae6899 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Mon, 16 Aug 2021 15:12:05 +0100 Subject: [PATCH] Bother. It looks like I'd already fully implemented hashmaps... May need to back out a whole hill of work. --- src/arith/peano.h | 2 + src/authorise.c | 24 ++++ src/authorise.h | 15 +++ src/memory/conspage.c | 20 +-- src/memory/consspaceobject.c | 48 ++++--- src/memory/consspaceobject.h | 4 +- src/memory/hashmap.c | 255 +++++++++++++++++++++++++++++++---- src/memory/hashmap.h | 32 ++--- src/memory/vectorspace.c | 26 ++++ src/memory/vectorspace.h | 30 ++++- src/ops/intern.c | 5 +- 11 files changed, 378 insertions(+), 83 deletions(-) create mode 100644 src/authorise.c create mode 100644 src/authorise.h diff --git a/src/arith/peano.h b/src/arith/peano.h index 89bfc3d..9bcd9e4 100644 --- a/src/arith/peano.h +++ b/src/arith/peano.h @@ -27,6 +27,8 @@ struct cons_pointer absolute( struct cons_pointer arg ); long double to_long_double( struct cons_pointer arg ); +int64_t to_long_int( struct cons_pointer arg ) ; + struct cons_pointer lisp_absolute( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ); diff --git a/src/authorise.c b/src/authorise.c new file mode 100644 index 0000000..5574db9 --- /dev/null +++ b/src/authorise.c @@ -0,0 +1,24 @@ +/* + * authorised.c + * + * For now, a dummy authorising everything. + * + * (c) 2021 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/conspage.h" +#include "memory/consspaceobject.h" + + +/** + * TODO: does nothing, yet. What it should do is access a magic value in the + * runtime environment and check that it is identical to something on this `acl` + */ +struct cons_pointer authorised(struct cons_pointer target, struct cons_pointer acl) { + if (nilp(acl)) { + acl = pointer2cell(target).access; + } + return TRUE; +} + diff --git a/src/authorise.h b/src/authorise.h new file mode 100644 index 0000000..c67977d --- /dev/null +++ b/src/authorise.h @@ -0,0 +1,15 @@ +/* + * authorise.h + * + * Basic implementation of a authorisation. + * + * (c) 2021 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_authorise_h +#define __psse_authorise_h + +struct cons_pointer authorised(struct cons_pointer target, struct cons_pointer acl); + +#endif \ No newline at end of file diff --git a/src/memory/conspage.c b/src/memory/conspage.c index 53496d3..c9c224d 100644 --- a/src/memory/conspage.c +++ b/src/memory/conspage.c @@ -179,26 +179,8 @@ void free_cell( struct cons_pointer pointer ) { dec_ref( cell->payload.string.cdr ); break; case VECTORPOINTTV: - /* for vector space pointers, free the actual vector-space - * object. Dangerous! */ - debug_printf( DEBUG_ALLOC, - L"About to free vector-space object at 0x%lx\n", - cell->payload.vectorp.address ); - struct vector_space_object *vso = - cell->payload.vectorp.address; - - switch ( vso->header.tag.value ) { - case STACKFRAMETV: - free_stack_frame( get_stack_frame( pointer ) ); - break; - } - - free( ( void * ) cell->payload.vectorp.address ); - debug_printf( DEBUG_ALLOC, - L"Freed vector-space object at 0x%lx\n", - cell->payload.vectorp.address ); + free_vso( pointer); break; - } strncpy( &cell->tag.bytes[0], FREETAG, TAGLENGTH ); diff --git a/src/memory/consspaceobject.c b/src/memory/consspaceobject.c index 06bf41c..ee82956 100644 --- a/src/memory/consspaceobject.c +++ b/src/memory/consspaceobject.c @@ -18,6 +18,7 @@ #include #include +#include "authorise.h" #include "conspage.h" #include "consspaceobject.h" #include "debug.h" @@ -38,7 +39,7 @@ bool check_tag( struct cons_pointer pointer, char *tag ) { result = strncmp( &cell.tag.bytes[0], tag, TAGLENGTH ) == 0; if ( result == false ) { - if ( strncmp( &cell.tag.bytes, VECTORPOINTTAG, TAGLENGTH ) == 0 ) { + if ( strncmp( &cell.tag.bytes[0], VECTORPOINTTAG, TAGLENGTH ) == 0 ) { struct vector_space_object *vec = pointer_to_vso( pointer ); if ( vec != NULL ) { @@ -55,13 +56,17 @@ bool check_tag( struct cons_pointer pointer, char *tag ) { * * You can't roll over the reference count. Once it hits the maximum * value you cannot increment further. + * + * Returns the `pointer`. */ -void inc_ref( struct cons_pointer pointer ) { +struct cons_pointer inc_ref( struct cons_pointer pointer ) { struct cons_space_object *cell = &pointer2cell( pointer ); if ( cell->count < MAXREFERENCE ) { cell->count++; } + + return pointer; } /** @@ -69,8 +74,10 @@ void inc_ref( struct cons_pointer pointer ) { * * If a count has reached MAXREFERENCE it cannot be decremented. * If a count is decremented to zero the cell should be freed. + * + * Returns the `pointer`, or, if the cell has been freed, NIL. */ -void dec_ref( struct cons_pointer pointer ) { +struct cons_pointer dec_ref( struct cons_pointer pointer ) { struct cons_space_object *cell = &pointer2cell( pointer ); if ( cell->count > 0 ) { @@ -78,8 +85,11 @@ void dec_ref( struct cons_pointer pointer ) { if ( cell->count == 0 ) { free_cell( pointer ); + pointer = NIL; } } + + return pointer; } @@ -108,38 +118,42 @@ struct cons_pointer c_type( struct cons_pointer pointer ) { } /** - * Implementation of car in C. If arg is not a cons, does not error but returns nil. + * Implementation of car in C. If arg is not a cons, or the current user is not + * authorised to read it, does not error but returns nil. */ struct cons_pointer c_car( struct cons_pointer arg ) { - struct cons_pointer result = NIL; + struct cons_pointer result = NIL; - if ( consp( arg ) ) { - result = pointer2cell( arg ).payload.cons.car; - } + if ( truep( authorised( arg, NIL ) ) && consp( arg ) ) { + result = pointer2cell( arg ).payload.cons.car; + } - return result; + return result; } /** - * Implementation of cdr in C. If arg is not a sequence, does not error but returns nil. + * Implementation of cdr in C. If arg is not a sequence, or the current user is + * not authorised to read it,does not error but returns nil. */ struct cons_pointer c_cdr( struct cons_pointer arg ) { - struct cons_pointer result = NIL; + struct cons_pointer result = NIL; + if ( truep( authorised( arg, NIL ) ) ) { struct cons_space_object *cell = &pointer2cell( arg ); - switch (cell->tag.value) { - case CONSTV: + switch ( cell->tag.value ) { + case CONSTV: result = cell->payload.cons.cdr; break; - case KEYTV: - case STRINGTV: - case SYMBOLTV: + case KEYTV: + case STRINGTV: + case SYMBOLTV: result = cell->payload.string.cdr; break; } + } - return result; + return result; } /** diff --git a/src/memory/consspaceobject.h b/src/memory/consspaceobject.h index 486efe2..98a5a24 100644 --- a/src/memory/consspaceobject.h +++ b/src/memory/consspaceobject.h @@ -675,9 +675,9 @@ struct cons_space_object { bool check_tag( struct cons_pointer pointer, char *tag ); -void inc_ref( struct cons_pointer pointer ); +struct cons_pointer inc_ref( struct cons_pointer pointer ); -void dec_ref( struct cons_pointer pointer ); +struct cons_pointer dec_ref( struct cons_pointer pointer ); struct cons_pointer c_type( struct cons_pointer pointer ); diff --git a/src/memory/hashmap.c b/src/memory/hashmap.c index b8110e4..9be7d64 100644 --- a/src/memory/hashmap.c +++ b/src/memory/hashmap.c @@ -8,33 +8,89 @@ */ #include "arith/integer.h" +#include "arith/peano.h" +#include "authorise.h" +#include "debug.h" #include "memory/consspaceobject.h" #include "memory/hashmap.h" #include "memory/vectorspace.h" +/** + * Return a hash value for the structure indicated by `ptr` such that if + * `x`,`y` are two separate structures whose print representation is the same + * then `(sxhash x)` and `(sxhash y)` will always be equal. + */ +uint32_t sxhash( struct cons_pointer ptr ) { + // TODO: Not Yet Implemented + /* TODO: should look at the implementation of Common Lisp sxhash? + * My current implementation of `print` only addresses URL_FILE + * streams. It would be better if it also addressed strings but + * currently it doesn't. Creating a print string of the structure + * and taking the hash of that would be one simple (but not necessarily + * cheap) solution. + */ + /* TODO: sbcl's implementation of `sxhash` is in src/compiler/sxhash.lisp + * and is EXTREMELY complex, and essentially has a different dispatch for + * every type of object. It's likely we need to do the same. + */ + return 0; +} + /** * Get the hash value for the cell indicated by this `ptr`; currently only - * implemented for string like things. + * implemented for string like things and integers. */ uint32_t get_hash(struct cons_pointer ptr) { struct cons_space_object *cell = &pointer2cell(ptr); uint32_t result = 0; - switch (cell->tag.value) - { - case KEYTV: - case STRINGTV: - case SYMBOLTV: + switch ( cell->tag.value ) { + case INTEGERTV: + /* Note that we're only hashing on the least significant word of an + * integer. */ + result = cell->payload.integer.value & 0xffffffff; + break; + case KEYTV: + case STRINGTV: + case SYMBOLTV: result = cell->payload.string.hash; - default: - // TODO: Not Yet Implemented - result = 0; + break; + case TRUETV: + result = 1; // arbitrarily + break; + default: + result = sxhash( ptr ); + break; } return result; } +/** + * Free the hashmap indicated by this `pointer`. + */ +void free_hashmap( struct cons_pointer pointer ) { + struct cons_space_object *cell = &pointer2cell( pointer ); + + if ( hashmapp( pointer ) ) { + struct vector_space_object *vso = cell->payload.vectorp.address; + struct hashmap_payload payload = vso->payload.hashmap; + + dec_ref( payload.hash_fn ); + dec_ref( payload.write_acl ); + + for ( int i = 0; i < payload.n_buckets; i++ ) { + debug_printf( DEBUG_ALLOC, + L"Decrementing buckets[%d] of hashmap at 0x%lx\n", i, + cell->payload.vectorp.address ); + dec_ref( payload.buckets[i] ); + } + } else { + debug_printf( DEBUG_ALLOC, L"Non-hashmap passed to `free_hashmap`\n" ); + } +} + /** * A lisp function signature conforming wrapper around get_hash, q.v.. */ @@ -50,7 +106,8 @@ struct cons_pointer lisp_get_hash(struct stack_frame *frame, * `hash_fn` is `NIL`, use the standard hash funtion. */ struct cons_pointer make_hashmap( uint32_t n_buckets, - struct cons_pointer hash_fn ) { + struct cons_pointer hash_fn, + struct cons_pointer write_acl ) { struct cons_pointer result = make_vso( HASHTAG, ( sizeof( struct cons_pointer ) * ( n_buckets + 1 ) ) + ( sizeof( uint32_t ) * 2 ) ); @@ -58,7 +115,9 @@ struct cons_pointer make_hashmap( uint32_t n_buckets, struct hashmap_payload *payload = (struct hashmap_payload *)&pointer_to_vso( result )->payload; - payload->hash_fn = hash_fn; + payload->hash_fn = inc_ref(hash_fn); + payload->write_acl = inc_ref(write_acl); + payload->n_buckets = n_buckets; for ( int i = 0; i < n_buckets; i++ ) { payload->buckets[i] = NIL; @@ -68,28 +127,170 @@ struct cons_pointer make_hashmap( uint32_t n_buckets, } /** - * If this `ptr` is a pointer to a hashmap, return a new identical hashmap; - * else return `NIL`. TODO: should return an exception. + * Lisp funtion of up to four args (all optional), where + * + * first is expected to be an integer, the number of buckets, or nil; + * second is expected to be a hashing function, or nil; + * third is expected to be an assocable, or nil; + * fourth is a list of user tokens, to be used as a write ACL, or nil. */ -struct cons_pointer clone_hashmap(struct cons_pointer ptr) { - struct cons_pointer result = NIL; +struct cons_pointer lisp_make_hashmap( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ) { + uint32_t n = 32; + struct cons_pointer hash_fn = NIL; + struct cons_pointer result = NIL; - if (hashmapp(ptr)) { - struct vector_space_object *from = pointer_to_vso( ptr ); + if ( frame->args > 0 ) { + if ( integerp( frame->arg[0] ) ) { + n = to_long_int( frame->arg[0] ) % UINT32_MAX; + } else if ( !nilp( frame->arg[0] ) ) { + result = make_exception( + c_string_to_lisp_string( L"First arg to `hashmap`, if passed, must " + L"be an integer or `nil`.`" ), + NIL ); + } + } + if ( frame->args > 1 ) { + hash_fn = frame->arg[1]; + } - if ( from != NULL ) { - struct hashmap_payload *from_pl = (struct hashmap_payload*)from->payload; - result = make_hashmap( from_pl->n_buckets, from_pl->hash_fn); - struct vector_space_object *to = pointer_to_vso(result); - struct hashmap_payload *to_pl = (struct hashmap_payload*)to->payload; + if ( nilp( result ) ) { + /* if there are fewer than 4 args, then arg[3] ought to be nil anyway, which + * is fine */ + result = make_hashmap( n, hash_fn, frame->arg[3] ); + struct vector_space_object *map = pointer_to_vso( result ); - for (int i = 0; i < to_pl->n_buckets; i++) { - to_pl->buckets[i] = from_pl->buckets[i]; - inc_ref(to_pl->buckets[i]); - } + if ( frame->args > 2 && + truep( authorised( result, map->payload.hashmap.write_acl ) ) ) { + // then arg[2] ought to be an assoc list which we should iterate down + // populating the hashmap. + for ( struct cons_pointer cursor = frame->arg[2]; !nilp( cursor ); + cursor = c_cdr( cursor ) ) { + struct cons_pointer pair = c_car( cursor ); + struct cons_pointer key = c_car( pair ); + struct cons_pointer val = c_cdr( pair ); + + uint32_t bucket_no = + get_hash( key ) % + ( (struct hashmap_payload *)&( map->payload ) )->n_buckets; + + map->payload.hashmap.buckets[bucket_no] = + inc_ref( make_cons( make_cons( key, val ), + map->payload.hashmap.buckets[bucket_no] )); } } + } - return result; + return result; +} + +/** + * If this `ptr` is a pointer to a hashmap, return a new identical hashmap; + * else return `NIL`. TODO: should return an exception if ptr is not a + * readable hashmap. + */ +struct cons_pointer clone_hashmap( struct cons_pointer ptr ) { + struct cons_pointer result = NIL; + + if ( truep( authorised( ptr, NIL ) ) ) { + if ( hashmapp( ptr ) ) { + struct vector_space_object *from = pointer_to_vso( ptr ); + + if ( from != NULL ) { + struct hashmap_payload from_pl = from->payload.hashmap; + result = make_hashmap( from_pl.n_buckets, from_pl.hash_fn, from_pl.write_acl ); + struct vector_space_object *to = pointer_to_vso( result ); + struct hashmap_payload to_pl = to->payload.hashmap; + + for ( int i = 0; i < to_pl.n_buckets; i++ ) { + to_pl.buckets[i] = from_pl.buckets[i]; + inc_ref( to_pl.buckets[i] ); + } + } + } + } + // TODO: else exception? + + return result; +} + +/** + * Store this `val` as the value of this `key` in this hashmap `mapp`. If + * current user is authorised to write to this hashmap, modifies the hashmap and + * returns it; if not, clones the hashmap, modifies the clone, and returns that. + */ +struct cons_pointer hashmap_put( struct cons_pointer mapp, + struct cons_pointer key, + struct cons_pointer val ) { + // TODO: if current user has write access to this hashmap + if ( hashmapp( mapp ) && !nilp( key ) ) { + struct vector_space_object *map = pointer_to_vso( mapp ); + + if (nilp(authorised(mapp, map->payload.hashmap.write_acl))) { + mapp = clone_hashmap( mapp); + map = pointer_to_vso( mapp ); + } + uint32_t bucket_no = + get_hash( key ) % + map->payload.hashmap.n_buckets; + + map->payload.hashmap.buckets[bucket_no] = + inc_ref( make_cons( make_cons( key, val ), + map->payload.hashmap.buckets[bucket_no] )); + } + + return mapp; +} + +/** + * Expects `frame->arg[1]` to be a hashmap or namespace; `frame->arg[2]` to be + * a string-like-thing (perhaps necessarily a keyword); frame->arg[3] to be + * any value. If + * current user is authorised to write to this hashmap, modifies the hashmap and + * returns it; if not, clones the hashmap, modifies the clone, and returns that. + */ +struct cons_pointer lisp_hashmap_put( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ) { + struct cons_pointer mapp = frame->arg[0]; + struct cons_pointer key = frame->arg[1]; + struct cons_pointer val = frame->arg[2]; + + return hashmap_put(mapp, key, val); +} + +/** + * Copy all key/value pairs in this association list `assoc` into this hashmap `mapp`. If + * current user is authorised to write to this hashmap, modifies the hashmap and + * returns it; if not, clones the hashmap, modifies the clone, and returns that. + */ +struct cons_pointer hashmap_put_all( struct cons_pointer mapp, + struct cons_pointer assoc ) { + // TODO: if current user has write access to this hashmap + if ( hashmapp( mapp ) && !nilp( assoc ) ) { + struct vector_space_object *map = pointer_to_vso( mapp ); + + if ( hashmapp( mapp ) && consp( assoc ) ) { + for ( struct cons_pointer pair = c_car( assoc ); !nilp( pair ); + pair = c_car( assoc ) ) { + /* TODO: this is really hammering the memory management system, because + * it will make a new lone for every key/value pair added. Fix. */ + mapp = hashmap_put( mapp, c_car( pair ), c_cdr( pair ) ); + } + } + } + + return mapp; +} + +/** + * Lisp function expecting two arguments, a hashmap and an assoc list. Copies all + * key/value pairs from the assoc list into the map. + */ +struct cons_pointer lisp_hashmap_put_all( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ) { + return hashmap_put_all( frame->arg[0], frame->arg[1] ); } diff --git a/src/memory/hashmap.h b/src/memory/hashmap.h index 813211b..579b56d 100644 --- a/src/memory/hashmap.h +++ b/src/memory/hashmap.h @@ -15,24 +15,24 @@ #include "memory/consspaceobject.h" #include "memory/vectorspace.h" -/** - * The payload of a hashmap. The number of buckets is assigned at run-time, - * and is stored in n_buckets. Each bucket is something ASSOC can consume: - * i.e. either an assoc list or a further hashmap. - */ -struct hashmap_payload { - struct cons_pointer hash_fn; - uint32_t n_buckets; - uint32_t unused; /* for word alignment and possible later expansion */ - struct cons_pointer buckets[]; -}; +uint32_t get_hash( struct cons_pointer ptr ); -uint32_t get_hash(struct cons_pointer ptr); +void free_hashmap( struct cons_pointer ptr ); -struct cons_pointer lisp_get_hash(struct stack_frame *frame, - struct cons_pointer frame_pointer, - struct cons_pointer env); +struct cons_pointer lisp_get_hash( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); -struct cons_pointer make_hashmap( uint32_t n_buckets, struct cons_pointer hash_fn); +struct cons_pointer lisp_hashmap_put( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); + +struct cons_pointer lisp_hashmap_put_all( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); + +struct cons_pointer lisp_make_hashmap( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); #endif \ No newline at end of file diff --git a/src/memory/vectorspace.c b/src/memory/vectorspace.c index 480effb..b3e64c6 100644 --- a/src/memory/vectorspace.c +++ b/src/memory/vectorspace.c @@ -22,6 +22,8 @@ #include "conspage.h" #include "consspaceobject.h" #include "debug.h" +#include "hashmap.h" +#include "stack.h" #include "vectorspace.h" @@ -112,3 +114,27 @@ struct cons_pointer make_vso( char *tag, uint64_t payload_size ) { return result; } + +/** for vector space pointers, free the actual vector-space + * object. Dangerous! */ + +void free_vso( struct cons_pointer pointer ) { + struct cons_space_object * cell = &pointer2cell( pointer); + + debug_printf( DEBUG_ALLOC, L"About to free vector-space object at 0x%lx\n", + cell->payload.vectorp.address ); + struct vector_space_object *vso = cell->payload.vectorp.address; + + switch ( vso->header.tag.value ) { + case HASHTV: + free_hashmap( pointer ); + break; + case STACKFRAMETV: + free_stack_frame( get_stack_frame( pointer ) ); + break; + } + + free( (void *)cell->payload.vectorp.address ); + debug_printf( DEBUG_ALLOC, L"Freed vector-space object at 0x%lx\n", + cell->payload.vectorp.address ); +} \ No newline at end of file diff --git a/src/memory/vectorspace.h b/src/memory/vectorspace.h index 15740ac..ed050bc 100644 --- a/src/memory/vectorspace.h +++ b/src/memory/vectorspace.h @@ -18,6 +18,7 @@ #include #include "consspaceobject.h" +#include "hashmap.h" #ifndef __vectorspace_h #define __vectorspace_h @@ -58,6 +59,8 @@ struct cons_pointer make_vso( char *tag, uint64_t payload_size ); +void free_vso(struct cons_pointer pointer); + /** * the header which forms the start of every vector space object. */ @@ -75,6 +78,27 @@ struct vector_space_header { uint64_t size; }; +/** + * The payload of a hashmap. The number of buckets is assigned at run-time, + * and is stored in n_buckets. Each bucket is something ASSOC can consume: + * i.e. either an assoc list or a further hashmap. + */ +struct hashmap_payload { + struct cons_pointer + hash_fn; /* function for hashing values in this hashmap, or `NIL` to use + the default hashing function */ + struct cons_pointer write_acl; /* it seems to me that it is likely that the + * principal difference between a hashmap and a + * namespace is that a hashmap has a write ACL + * of `NIL`, meaning not writeable by anyone */ + uint32_t n_buckets; /* number of hash buckets */ + uint32_t unused; /* for word alignment and possible later expansion */ + struct cons_pointer + buckets[]; /* actual hash buckets, which should be `NIL` + * or assoc lists or (possibly) further hashmaps. */ +}; + + /** a vector_space_object is just a vector_space_header followed by a * lump of bytes; what we deem to be in there is a function of the tag, * and at this stage we don't have a good picture of what these may be. @@ -87,7 +111,11 @@ struct vector_space_object { struct vector_space_header header; /** we'll malloc `size` bytes for payload, `payload` is just the first of these. * \todo this is almost certainly not idiomatic C. */ - char payload; + union { + /** the payload considered as bytes */ + char bytes; + struct hashmap_payload hashmap; + } payload; }; #endif diff --git a/src/ops/intern.c b/src/ops/intern.c index cf86e6b..802bc82 100644 --- a/src/ops/intern.c +++ b/src/ops/intern.c @@ -23,6 +23,7 @@ #include "consspaceobject.h" #include "debug.h" #include "equal.h" +#include "hashmap.h" #include "lispops.h" #include "map.h" #include "print.h" @@ -107,8 +108,10 @@ struct cons_pointer c_assoc( struct cons_pointer key, break; } } - } else if (vectorpointp( store)) { + } else if (hashmapp( store)) { result = assoc_in_map( key, store); + } else { + result = throw_exception(c_string_to_lisp_string(L"Store is of unknown type"), NIL); } debug_print( L"c_assoc returning ", DEBUG_BIND);