/* * intern.c * * For now this implements an oblist and shallow binding; local environments can * be consed onto the front of the oblist. Later, this won't do; bindings will happen * in namespaces, which will probably be implemented as hash tables. * * Doctrine is that cons cells are immutable, and life is a lot more simple if they are; * so when a symbol is rebound in the master oblist, what in fact we do is construct * a new oblist without the previous binding but with the new binding. Anything which, * prior to this action, held a pointer to the old oblist (as all current threads' * environments must do) continues to hold a pointer to the old oblist, and consequently * doesn't see the change. This is probably good but does mean you cannot use bindings * on the oblist to signal between threads. * * (c) 2017 Simon Brooke * Licensed under GPL version 2.0, or, at your option, any later version. */ #include /* * wide characters */ #include #include #include "authorise.h" #include "debug.h" #include "io/io.h" #include "memory/conspage.h" #include "memory/consspaceobject.h" #include "memory/hashmap.h" #include "ops/equal.h" #include "ops/intern.h" #include "ops/lispops.h" // #include "print.h" /** * The global object list/or, to put it differently, the root namespace. * What is added to this during system setup is 'global', that is, * visible to all sessions/threads. What is added during a session/thread is local to * that session/thread (because shallow binding). There must be some way for a user to * make the contents of their own environment persistent between threads but I don't * know what it is yet. At some stage there must be a way to rebind deep values so * they're visible to all users/threads, but again I don't yet have any idea how * that will work. */ struct cons_pointer oblist = NIL; /** * Return a hash value for the structure indicated by `ptr` such that if * `x`,`y` are two separate structures whose print representation is the same * then `(sxhash x)` and `(sxhash y)` will always be equal. */ uint32_t sxhash( struct cons_pointer ptr ) { // TODO: Not Yet Implemented /* TODO: should look at the implementation of Common Lisp sxhash? * My current implementation of `print` only addresses URL_FILE * streams. It would be better if it also addressed strings but * currently it doesn't. Creating a print string of the structure * and taking the hash of that would be one simple (but not necessarily * cheap) solution. */ /* TODO: sbcl's implementation of `sxhash` is in src/compiler/sxhash.lisp * and is EXTREMELY complex, and essentially has a different dispatch for * every type of object. It's likely we need to do the same. */ return 0; } /** * Get the hash value for the cell indicated by this `ptr`; currently only * implemented for string like things and integers. */ uint32_t get_hash( struct cons_pointer ptr ) { struct cons_space_object *cell = &pointer2cell( ptr ); uint32_t result = 0; switch ( cell->tag.value ) { case INTEGERTV: /* Note that we're only hashing on the least significant word of an * integer. */ result = cell->payload.integer.value & 0xffffffff; break; case KEYTV: case STRINGTV: case SYMBOLTV: result = cell->payload.string.hash; break; case TRUETV: result = 1; // arbitrarily break; default: result = sxhash( ptr ); break; } return result; } /** * Free the hashmap indicated by this `pointer`. */ void free_hashmap( struct cons_pointer pointer ) { struct cons_space_object *cell = &pointer2cell( pointer ); if ( hashmapp( pointer ) ) { struct vector_space_object *vso = cell->payload.vectorp.address; dec_ref( vso->payload.hashmap.hash_fn ); dec_ref( vso->payload.hashmap.write_acl ); for ( int i = 0; i < vso->payload.hashmap.n_buckets; i++ ) { if ( !nilp( vso->payload.hashmap.buckets[i] ) ) { debug_printf( DEBUG_ALLOC, L"Decrementing bucket [%d] of hashmap at 0x%lx\n", i, cell->payload.vectorp.address ); dec_ref( vso->payload.hashmap.buckets[i] ); } } } else { debug_printf( DEBUG_ALLOC, L"Non-hashmap passed to `free_hashmap`\n" ); } } /** * Make a hashmap with this number of buckets, using this `hash_fn`. If * `hash_fn` is `NIL`, use the standard hash funtion. */ struct cons_pointer make_hashmap( uint32_t n_buckets, struct cons_pointer hash_fn, struct cons_pointer write_acl ) { struct cons_pointer result = make_vso( HASHTV, ( sizeof( struct cons_pointer ) * ( n_buckets + 2 ) ) + ( sizeof( uint32_t ) * 2 ) ); struct hashmap_payload *payload = ( struct hashmap_payload * ) &pointer_to_vso( result )->payload; payload->hash_fn = inc_ref( hash_fn ); payload->write_acl = inc_ref( write_acl ); payload->n_buckets = n_buckets; for ( int i = 0; i < n_buckets; i++ ) { payload->buckets[i] = NIL; } return result; } /** * return a flat list of all the keys in the hashmap indicated by `map`. */ struct cons_pointer hashmap_keys( struct cons_pointer mapp ) { struct cons_pointer result = NIL; if ( hashmapp( mapp ) && truep( authorised( mapp, NIL ) ) ) { struct vector_space_object *map = pointer_to_vso( mapp ); for ( int i = 0; i < map->payload.hashmap.n_buckets; i++ ) { for ( struct cons_pointer c = map->payload.hashmap.buckets[i]; !nilp( c ); c = c_cdr( c ) ) { result = make_cons( c_car( c_car( c ) ), result ); } } } return result; } /** * Copy all key/value pairs in this association list `assoc` into this hashmap `mapp`. If * current user is authorised to write to this hashmap, modifies the hashmap and * returns it; if not, clones the hashmap, modifies the clone, and returns that. */ struct cons_pointer hashmap_put_all( struct cons_pointer mapp, struct cons_pointer assoc ) { // TODO: if current user has write access to this hashmap if ( hashmapp( mapp ) ) { struct vector_space_object *map = pointer_to_vso( mapp ); if ( consp( assoc ) ) { for ( struct cons_pointer pair = c_car( assoc ); !nilp( pair ); pair = c_car( assoc ) ) { /* TODO: this is really hammering the memory management system, because * it will make a new lone for every key/value pair added. Fix. */ if (consp( pair)) { mapp = hashmap_put( mapp, c_car( pair ), c_cdr( pair ) ); } else if (hashmapp( pair)) { hashmap_put_all( mapp, pair); } else { hashmap_put( mapp, pair, TRUE); } assoc = c_cdr( assoc); } } else if (hashmapp( assoc)) { for (struct cons_pointer keys = hashmap_keys( assoc); !nilp( keys); keys = c_cdr( keys)) { struct cons_pointer key = c_car( keys); hashmap_put( mapp, key, hashmap_get( assoc, key)); } } } return mapp; } /** Get a value from a hashmap. * * Note that this is here, rather than in memory/hashmap.c, because it is * closely tied in with c_assoc, q.v. */ struct cons_pointer hashmap_get( struct cons_pointer mapp, struct cons_pointer key ) { struct cons_pointer result = NIL; if ( hashmapp( mapp ) && truep( authorised( mapp, NIL ) ) && !nilp( key ) ) { struct vector_space_object *map = pointer_to_vso( mapp ); uint32_t bucket_no = get_hash( key ) % map->payload.hashmap.n_buckets; result = c_assoc( key, map->payload.hashmap.buckets[bucket_no] ); } return result; } /** * If this `ptr` is a pointer to a hashmap, return a new identical hashmap; * else return an exception. */ struct cons_pointer clone_hashmap( struct cons_pointer ptr ) { struct cons_pointer result = NIL; if ( truep( authorised( ptr, NIL ) ) ) { if ( hashmapp( ptr ) ) { struct vector_space_object const *from = pointer_to_vso( ptr ); if ( from != NULL ) { struct hashmap_payload from_pl = from->payload.hashmap; result = make_hashmap( from_pl.n_buckets, from_pl.hash_fn, from_pl.write_acl ); struct vector_space_object const *to = pointer_to_vso( result ); struct hashmap_payload to_pl = to->payload.hashmap; for ( int i = 0; i < to_pl.n_buckets; i++ ) { to_pl.buckets[i] = from_pl.buckets[i]; inc_ref( to_pl.buckets[i] ); } } } } else { result = make_exception( c_string_to_lisp_string ( L"Arg to `clone_hashmap` must " L"be a readable hashmap.`" ), NIL ); } return result; } /** * Implementation of interned? in C. The final implementation if interned? will * deal with stores which can be association lists or hashtables or hybrids of * the two, but that will almost certainly be implemented in lisp. * * If this key is lexically identical to a key in this store, return the key * from the store (so that later when we want to retrieve a value, an eq test * will work); otherwise return NIL. */ struct cons_pointer internedp( struct cons_pointer key, struct cons_pointer store ) { struct cons_pointer result = NIL; if ( symbolp( key ) || keywordp( key ) ) { // TODO: I see what I was doing here and it would be the right thing to // do for stores which are old-fashioned assoc lists, but it will not work // for my new hybrid stores. // for ( struct cons_pointer next = store; // nilp( result ) && consp( next ); // next = pointer2cell( next ).payload.cons.cdr ) { // struct cons_space_object entry = // pointer2cell( pointer2cell( next ).payload.cons.car ); // debug_print( L"Internedp: checking whether `", DEBUG_BIND ); // debug_print_object( key, DEBUG_BIND ); // debug_print( L"` equals `", DEBUG_BIND ); // debug_print_object( entry.payload.cons.car, DEBUG_BIND ); // debug_print( L"`\n", DEBUG_BIND ); // if ( equal( key, entry.payload.cons.car ) ) { // result = entry.payload.cons.car; // } if (!nilp( c_assoc( key, store))) { result = key; } } else { debug_print( L"`", DEBUG_BIND ); debug_print_object( key, DEBUG_BIND ); debug_print( L"` is a ", DEBUG_BIND ); debug_print_object( c_type( key ), DEBUG_BIND ); debug_print( L", not a KEYW or SYMB", DEBUG_BIND ); } return result; } /** * Implementation of assoc in C. Like interned?, the final implementation will * deal with stores which can be association lists or hashtables or hybrids of * the two, but that will almost certainly be implemented in lisp. * * If this key is lexically identical to a key in this store, return the value * of that key from the store; otherwise return NIL. */ struct cons_pointer c_assoc( struct cons_pointer key, struct cons_pointer store ) { struct cons_pointer result = NIL; debug_print( L"c_assoc; key is `", DEBUG_BIND ); debug_print_object( key, DEBUG_BIND ); debug_print( L"`\n", DEBUG_BIND ); if ( consp( store ) ) { for ( struct cons_pointer next = store; nilp( result ) && ( consp( next ) || hashmapp( next ) ); next = pointer2cell( next ).payload.cons.cdr ) { if ( consp( next ) ) { struct cons_pointer entry_ptr = c_car( next ); struct cons_space_object entry = pointer2cell( entry_ptr ); switch ( entry.tag.value ) { case CONSTV: if ( equal( key, entry.payload.cons.car ) ) { result = entry.payload.cons.cdr; } break; case VECTORPOINTTV: result = hashmap_get( entry_ptr, key ); break; default: throw_exception( c_append( c_string_to_lisp_string( L"Store entry is of unknown type: " ), c_type( entry_ptr)), NIL); } } } } else if ( hashmapp( store ) ) { result = hashmap_get( store, key ); } else if ( !nilp( store ) ) { debug_print( L"c_assoc; store is of unknown type `", DEBUG_BIND ); debug_print_object( c_type( store), DEBUG_BIND ); debug_print( L"`\n", DEBUG_BIND ); result = throw_exception( c_append( c_string_to_lisp_string( L"Store is of unknown type: " ), c_type( store)), NIL ); } debug_print( L"c_assoc returning ", DEBUG_BIND ); debug_print_object( result, DEBUG_BIND ); debug_println( DEBUG_BIND ); return result; } /** * Store this `val` as the value of this `key` in this hashmap `mapp`. If * current user is authorised to write to this hashmap, modifies the hashmap and * returns it; if not, clones the hashmap, modifies the clone, and returns that. */ struct cons_pointer hashmap_put( struct cons_pointer mapp, struct cons_pointer key, struct cons_pointer val ) { if ( hashmapp( mapp ) && !nilp( key ) ) { struct vector_space_object *map = pointer_to_vso( mapp ); if ( nilp( authorised( mapp, map->payload.hashmap.write_acl ) ) ) { mapp = clone_hashmap( mapp ); map = pointer_to_vso( mapp ); } uint32_t bucket_no = get_hash( key ) % map->payload.hashmap.n_buckets; // TODO: if there are too many values in the bucket, rehash the whole // hashmap to a bigger number of buckets, and return that. map->payload.hashmap.buckets[bucket_no] = inc_ref( make_cons( make_cons( key, val ), map->payload.hashmap.buckets[bucket_no] ) ); } return mapp; } /** * Return a new key/value store containing all the key/value pairs in this * store with this key/value pair added to the front. */ struct cons_pointer set( struct cons_pointer key, struct cons_pointer value, struct cons_pointer store ) { struct cons_pointer result = NIL; debug_print( L"set: binding `", DEBUG_BIND ); debug_print_object( key, DEBUG_BIND ); debug_print( L"` to `", DEBUG_BIND ); debug_print_object( value, DEBUG_BIND ); debug_print( L"` in store ", DEBUG_BIND ); debug_dump_object( store, DEBUG_BIND ); debug_println( DEBUG_BIND ); debug_printf( DEBUG_BIND, L"set: store is %s\n`", lisp_string_to_c_string( c_type( store)) ); if (nilp( value)) { result = store; } else if ( nilp( store ) || consp( store ) ) { result = make_cons( make_cons( key, value ), store ); } else if ( hashmapp( store ) ) { debug_print( L"set: storing in hashmap\n", DEBUG_BIND); result = hashmap_put( store, key, value ); } debug_print( L"set returning ", DEBUG_BIND ); debug_print_object( result, DEBUG_BIND ); debug_println( DEBUG_BIND ); return result; } /** * @brief Binds this key to this value in the global oblist. */ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value ) { debug_print( L"Entering deep_bind\n", DEBUG_BIND ); struct cons_pointer old = oblist; debug_print( L"deep_bind: binding `", DEBUG_BIND ); debug_print_object( key, DEBUG_BIND ); debug_print( L"` to ", DEBUG_BIND ); debug_print_object( value, DEBUG_BIND ); debug_println( DEBUG_BIND ); oblist = set( key, value, oblist ); if ( consp( oblist ) ) { inc_ref( oblist ); dec_ref( old ); } debug_print( L"deep_bind returning ", DEBUG_BIND ); debug_print_object( key, DEBUG_BIND ); debug_println( DEBUG_BIND ); return key; } /** * Ensure that a canonical copy of this key is bound in this environment, and * return that canonical copy. If there is currently no such binding, create one * with the value NIL. */ struct cons_pointer intern( struct cons_pointer key, struct cons_pointer environment ) { struct cons_pointer result = environment; struct cons_pointer canonical = internedp( key, environment ); if ( nilp( canonical ) ) { /* * not currently bound */ result = set( key, NIL, environment ); } return result; }