489 lines
18 KiB
C
489 lines
18 KiB
C
/*
|
|
* intern.c
|
|
*
|
|
* For now this implements an oblist and shallow binding; local environments can
|
|
* be consed onto the front of the oblist. Later, this won't do; bindings will happen
|
|
* in namespaces, which will probably be implemented as hash tables.
|
|
*
|
|
* Doctrine is that cons cells are immutable, and life is a lot more simple if they are;
|
|
* so when a symbol is rebound in the master oblist, what in fact we do is construct
|
|
* a new oblist without the previous binding but with the new binding. Anything which,
|
|
* prior to this action, held a pointer to the old oblist (as all current threads'
|
|
* environments must do) continues to hold a pointer to the old oblist, and consequently
|
|
* doesn't see the change. This is probably good but does mean you cannot use bindings
|
|
* on the oblist to signal between threads.
|
|
*
|
|
* (c) 2017 Simon Brooke <simon@journeyman.cc>
|
|
* Licensed under GPL version 2.0, or, at your option, any later version.
|
|
*/
|
|
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
/*
|
|
* wide characters
|
|
*/
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
|
|
#include "authorise.h"
|
|
#include "debug.h"
|
|
#include "io/io.h"
|
|
#include "memory/conspage.h"
|
|
#include "memory/consspaceobject.h"
|
|
#include "memory/hashmap.h"
|
|
#include "ops/equal.h"
|
|
#include "ops/intern.h"
|
|
#include "ops/lispops.h"
|
|
// #include "print.h"
|
|
|
|
/**
|
|
* @brief The global object list/or, to put it differently, the root namespace.
|
|
* What is added to this during system setup is 'global', that is,
|
|
* visible to all sessions/threads. What is added during a session/thread is local to
|
|
* that session/thread (because shallow binding). There must be some way for a user to
|
|
* make the contents of their own environment persistent between threads but I don't
|
|
* know what it is yet. At some stage there must be a way to rebind deep values so
|
|
* they're visible to all users/threads, but again I don't yet have any idea how
|
|
* that will work.
|
|
*/
|
|
struct cons_pointer oblist = NIL;
|
|
|
|
/**
|
|
* @brief the symbol `NIL`, which is special!
|
|
*
|
|
*/
|
|
struct cons_pointer privileged_symbol_nil = NIL;
|
|
|
|
/**
|
|
* Return a hash value for the structure indicated by `ptr` such that if
|
|
* `x`,`y` are two separate structures whose print representation is the same
|
|
* then `(sxhash x)` and `(sxhash y)` will always be equal.
|
|
*/
|
|
uint32_t sxhash( struct cons_pointer ptr ) {
|
|
// TODO: Not Yet Implemented
|
|
/* TODO: should look at the implementation of Common Lisp sxhash?
|
|
* My current implementation of `print` only addresses URL_FILE
|
|
* streams. It would be better if it also addressed strings but
|
|
* currently it doesn't. Creating a print string of the structure
|
|
* and taking the hash of that would be one simple (but not necessarily
|
|
* cheap) solution.
|
|
*/
|
|
/* TODO: sbcl's implementation of `sxhash` is in src/compiler/sxhash.lisp
|
|
* and is EXTREMELY complex, and essentially has a different dispatch for
|
|
* every type of object. It's likely we need to do the same.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Get the hash value for the cell indicated by this `ptr`; currently only
|
|
* implemented for string like things and integers.
|
|
*/
|
|
uint32_t get_hash( struct cons_pointer ptr ) {
|
|
struct cons_space_object *cell = &pointer2cell( ptr );
|
|
uint32_t result = 0;
|
|
|
|
switch ( cell->tag.value ) {
|
|
case INTEGERTV:
|
|
/* Note that we're only hashing on the least significant word of an
|
|
* integer. */
|
|
result = cell->payload.integer.value & 0xffffffff;
|
|
break;
|
|
case KEYTV:
|
|
case STRINGTV:
|
|
case SYMBOLTV:
|
|
result = cell->payload.string.hash;
|
|
break;
|
|
case TRUETV:
|
|
result = 1; // arbitrarily
|
|
break;
|
|
default:
|
|
result = sxhash( ptr );
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Free the hashmap indicated by this `pointer`.
|
|
*/
|
|
void free_hashmap( struct cons_pointer pointer ) {
|
|
struct cons_space_object *cell = &pointer2cell( pointer );
|
|
|
|
if ( hashmapp( pointer ) ) {
|
|
struct vector_space_object *vso = cell->payload.vectorp.address;
|
|
|
|
dec_ref( vso->payload.hashmap.hash_fn );
|
|
dec_ref( vso->payload.hashmap.write_acl );
|
|
|
|
for ( int i = 0; i < vso->payload.hashmap.n_buckets; i++ ) {
|
|
if ( !nilp( vso->payload.hashmap.buckets[i] ) ) {
|
|
debug_printf( DEBUG_ALLOC,
|
|
L"Decrementing bucket [%d] of hashmap at 0x%lx\n",
|
|
i, cell->payload.vectorp.address );
|
|
dec_ref( vso->payload.hashmap.buckets[i] );
|
|
}
|
|
}
|
|
} else {
|
|
debug_printf( DEBUG_ALLOC, L"Non-hashmap passed to `free_hashmap`\n" );
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* Make a hashmap with this number of buckets, using this `hash_fn`. If
|
|
* `hash_fn` is `NIL`, use the standard hash funtion.
|
|
*/
|
|
struct cons_pointer make_hashmap( uint32_t n_buckets,
|
|
struct cons_pointer hash_fn,
|
|
struct cons_pointer write_acl ) {
|
|
struct cons_pointer result = make_vso( HASHTV,
|
|
( sizeof( struct cons_pointer ) *
|
|
( n_buckets + 2 ) ) +
|
|
( sizeof( uint32_t ) * 2 ) );
|
|
|
|
struct hashmap_payload *payload =
|
|
( struct hashmap_payload * ) &pointer_to_vso( result )->payload;
|
|
|
|
payload->hash_fn = inc_ref( hash_fn );
|
|
payload->write_acl = inc_ref( write_acl );
|
|
|
|
payload->n_buckets = n_buckets;
|
|
for ( int i = 0; i < n_buckets; i++ ) {
|
|
payload->buckets[i] = NIL;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* return a flat list of all the keys in the hashmap indicated by `map`.
|
|
*/
|
|
struct cons_pointer hashmap_keys( struct cons_pointer mapp ) {
|
|
struct cons_pointer result = NIL;
|
|
if ( hashmapp( mapp ) && truep( authorised( mapp, NIL ) ) ) {
|
|
struct vector_space_object *map = pointer_to_vso( mapp );
|
|
|
|
for ( int i = 0; i < map->payload.hashmap.n_buckets; i++ ) {
|
|
for ( struct cons_pointer c = map->payload.hashmap.buckets[i];
|
|
!nilp( c ); c = c_cdr( c ) ) {
|
|
result = make_cons( c_car( c_car( c ) ), result );
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Copy all key/value pairs in this association list `assoc` into this hashmap `mapp`. If
|
|
* current user is authorised to write to this hashmap, modifies the hashmap and
|
|
* returns it; if not, clones the hashmap, modifies the clone, and returns that.
|
|
*/
|
|
struct cons_pointer hashmap_put_all( struct cons_pointer mapp,
|
|
struct cons_pointer assoc ) {
|
|
// TODO: if current user has write access to this hashmap
|
|
if ( hashmapp( mapp ) ) {
|
|
struct vector_space_object *map = pointer_to_vso( mapp );
|
|
|
|
if ( consp( assoc ) ) {
|
|
for ( struct cons_pointer pair = c_car( assoc ); !nilp( pair );
|
|
pair = c_car( assoc ) ) {
|
|
/* TODO: this is really hammering the memory management system, because
|
|
* it will make a new lone for every key/value pair added. Fix. */
|
|
if ( consp( pair ) ) {
|
|
mapp = hashmap_put( mapp, c_car( pair ), c_cdr( pair ) );
|
|
} else if ( hashmapp( pair ) ) {
|
|
hashmap_put_all( mapp, pair );
|
|
} else {
|
|
hashmap_put( mapp, pair, TRUE );
|
|
}
|
|
assoc = c_cdr( assoc );
|
|
}
|
|
} else if ( hashmapp( assoc ) ) {
|
|
for ( struct cons_pointer keys = hashmap_keys( assoc );
|
|
!nilp( keys ); keys = c_cdr( keys ) ) {
|
|
struct cons_pointer key = c_car( keys );
|
|
hashmap_put( mapp, key, hashmap_get( assoc, key ) );
|
|
}
|
|
}
|
|
}
|
|
|
|
return mapp;
|
|
}
|
|
|
|
/** Get a value from a hashmap.
|
|
*
|
|
* Note that this is here, rather than in memory/hashmap.c, because it is
|
|
* closely tied in with c_assoc, q.v.
|
|
*/
|
|
struct cons_pointer hashmap_get( struct cons_pointer mapp,
|
|
struct cons_pointer key ) {
|
|
struct cons_pointer result = NIL;
|
|
if ( hashmapp( mapp ) && truep( authorised( mapp, NIL ) ) && !nilp( key ) ) {
|
|
struct vector_space_object *map = pointer_to_vso( mapp );
|
|
uint32_t bucket_no = get_hash( key ) % map->payload.hashmap.n_buckets;
|
|
|
|
result = c_assoc( key, map->payload.hashmap.buckets[bucket_no] );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* If this `ptr` is a pointer to a hashmap, return a new identical hashmap;
|
|
* else return an exception.
|
|
*/
|
|
struct cons_pointer clone_hashmap( struct cons_pointer ptr ) {
|
|
struct cons_pointer result = NIL;
|
|
|
|
if ( truep( authorised( ptr, NIL ) ) ) {
|
|
if ( hashmapp( ptr ) ) {
|
|
struct vector_space_object const *from = pointer_to_vso( ptr );
|
|
|
|
if ( from != NULL ) {
|
|
struct hashmap_payload from_pl = from->payload.hashmap;
|
|
result =
|
|
make_hashmap( from_pl.n_buckets, from_pl.hash_fn,
|
|
from_pl.write_acl );
|
|
struct vector_space_object const *to =
|
|
pointer_to_vso( result );
|
|
struct hashmap_payload to_pl = to->payload.hashmap;
|
|
|
|
for ( int i = 0; i < to_pl.n_buckets; i++ ) {
|
|
to_pl.buckets[i] = from_pl.buckets[i];
|
|
inc_ref( to_pl.buckets[i] );
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
result =
|
|
make_exception( c_string_to_lisp_string
|
|
( L"Arg to `clone_hashmap` must "
|
|
L"be a readable hashmap.`" ), NIL );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
// (keys set let quote read equal *out* *log* oblist cons source cond close meta mapcar negative? open subtract eval nλ *in* *sink* cdr set! reverse slurp try assoc eq add list time car t *prompt* absolute append apply divide exception get-hash hashmap inspect metadata multiply print put! put-all! read-char repl throw type + * - / = lambda λ nlambda progn)
|
|
|
|
/**
|
|
* Implementation of interned? in C. The final implementation if interned? will
|
|
* deal with stores which can be association lists or hashtables or hybrids of
|
|
* the two, but that will almost certainly be implemented in lisp.
|
|
*
|
|
* If this key is lexically identical to a key in this store, return the key
|
|
* from the store (so that later when we want to retrieve a value, an eq test
|
|
* will work); otherwise return NIL.
|
|
*/
|
|
struct cons_pointer
|
|
internedp( struct cons_pointer key, struct cons_pointer store ) {
|
|
struct cons_pointer result = NIL;
|
|
|
|
if ( symbolp( key ) || keywordp( key ) ) {
|
|
// TODO: I see what I was doing here and it would be the right thing to
|
|
// do for stores which are old-fashioned assoc lists, but it will not work
|
|
// for my new hybrid stores.
|
|
// for ( struct cons_pointer next = store;
|
|
// nilp( result ) && consp( next );
|
|
// next = pointer2cell( next ).payload.cons.cdr ) {
|
|
// struct cons_space_object entry =
|
|
// pointer2cell( pointer2cell( next ).payload.cons.car );
|
|
|
|
// debug_print( L"Internedp: checking whether `", DEBUG_BIND );
|
|
// debug_print_object( key, DEBUG_BIND );
|
|
// debug_print( L"` equals `", DEBUG_BIND );
|
|
// debug_print_object( entry.payload.cons.car, DEBUG_BIND );
|
|
// debug_print( L"`\n", DEBUG_BIND );
|
|
|
|
// if ( equal( key, entry.payload.cons.car ) ) {
|
|
// result = entry.payload.cons.car;
|
|
// }
|
|
if ( !nilp( c_assoc( key, store ) ) ) {
|
|
result = key;
|
|
} else if ( equal( key, privileged_symbol_nil ) ) {
|
|
result = privileged_symbol_nil;
|
|
}
|
|
} else {
|
|
debug_print( L"`", DEBUG_BIND );
|
|
debug_print_object( key, DEBUG_BIND );
|
|
debug_print( L"` is a ", DEBUG_BIND );
|
|
debug_printf( DEBUG_BIND, L"%4.4s",
|
|
( char * ) pointer2cell( key ).tag.bytes );
|
|
debug_print( L", not a KEYW or SYMB", DEBUG_BIND );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Implementation of assoc in C. Like interned?, the final implementation will
|
|
* deal with stores which can be association lists or hashtables or hybrids of
|
|
* the two, but that will almost certainly be implemented in lisp.
|
|
*
|
|
* If this key is lexically identical to a key in this store, return the value
|
|
* of that key from the store; otherwise return NIL.
|
|
*/
|
|
struct cons_pointer c_assoc( struct cons_pointer key,
|
|
struct cons_pointer store ) {
|
|
struct cons_pointer result = NIL;
|
|
|
|
if ( !nilp( key ) ) {
|
|
if ( consp( store ) ) {
|
|
for ( struct cons_pointer next = store;
|
|
nilp( result ) && ( consp( next ) || hashmapp( next ) );
|
|
next = pointer2cell( next ).payload.cons.cdr ) {
|
|
if ( consp( next ) ) {
|
|
// #ifdef DEBUG
|
|
// debug_print( L"\nc_assoc; key is `", DEBUG_BIND );
|
|
// debug_print_object( key, DEBUG_BIND );
|
|
// debug_print( L"`\n", DEBUG_BIND );
|
|
// #endif
|
|
|
|
struct cons_pointer entry_ptr = c_car( next );
|
|
struct cons_space_object entry = pointer2cell( entry_ptr );
|
|
|
|
switch ( entry.tag.value ) {
|
|
case CONSTV:
|
|
if ( equal( key, entry.payload.cons.car ) ) {
|
|
result = entry.payload.cons.cdr;
|
|
}
|
|
break;
|
|
case VECTORPOINTTV:
|
|
result = hashmap_get( entry_ptr, key );
|
|
break;
|
|
default:
|
|
throw_exception( c_append
|
|
( c_string_to_lisp_string
|
|
( L"Store entry is of unknown type: " ),
|
|
c_type( entry_ptr ) ), NIL );
|
|
}
|
|
|
|
// #ifdef DEBUG
|
|
// debug_print( L"c_assoc `", DEBUG_BIND );
|
|
// debug_print_object( key, DEBUG_BIND );
|
|
// debug_print( L"` returning: ", DEBUG_BIND );
|
|
// debug_print_object( result, DEBUG_BIND );
|
|
// debug_println( DEBUG_BIND );
|
|
// #endif
|
|
}
|
|
}
|
|
} else if ( hashmapp( store ) ) {
|
|
result = hashmap_get( store, key );
|
|
} else if ( !nilp( store ) ) {
|
|
// #ifdef DEBUG
|
|
// debug_print( L"c_assoc; store is of unknown type `", DEBUG_BIND );
|
|
// debug_printf( DEBUG_BIND, L"%4.4s", (char *)pointer2cell(key).tag.bytes);
|
|
// debug_print( L"`\n", DEBUG_BIND );
|
|
// #endif
|
|
result =
|
|
throw_exception( c_append
|
|
( c_string_to_lisp_string
|
|
( L"Store is of unknown type: " ),
|
|
c_type( store ) ), NIL );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Store this `val` as the value of this `key` in this hashmap `mapp`. If
|
|
* current user is authorised to write to this hashmap, modifies the hashmap and
|
|
* returns it; if not, clones the hashmap, modifies the clone, and returns that.
|
|
*/
|
|
struct cons_pointer hashmap_put( struct cons_pointer mapp,
|
|
struct cons_pointer key,
|
|
struct cons_pointer val ) {
|
|
if ( hashmapp( mapp ) && !nilp( key ) ) {
|
|
struct vector_space_object *map = pointer_to_vso( mapp );
|
|
|
|
if ( nilp( authorised( mapp, map->payload.hashmap.write_acl ) ) ) {
|
|
mapp = clone_hashmap( mapp );
|
|
map = pointer_to_vso( mapp );
|
|
}
|
|
uint32_t bucket_no = get_hash( key ) % map->payload.hashmap.n_buckets;
|
|
|
|
// TODO: if there are too many values in the bucket, rehash the whole
|
|
// hashmap to a bigger number of buckets, and return that.
|
|
|
|
map->payload.hashmap.buckets[bucket_no] =
|
|
make_cons( make_cons( key, val ),
|
|
map->payload.hashmap.buckets[bucket_no] );
|
|
}
|
|
|
|
return mapp;
|
|
}
|
|
|
|
/**
|
|
* Return a new key/value store containing all the key/value pairs in this
|
|
* store with this key/value pair added to the front.
|
|
*/
|
|
struct cons_pointer set( struct cons_pointer key, struct cons_pointer value,
|
|
struct cons_pointer store ) {
|
|
struct cons_pointer result = NIL;
|
|
|
|
#ifdef DEBUG
|
|
bool deep = vectorpointp( store );
|
|
debug_print_binding( key, value, deep, DEBUG_BIND );
|
|
|
|
if ( deep ) {
|
|
debug_printf( DEBUG_BIND, L"\t-> %4.4s\n",
|
|
pointer2cell( store ).payload.vectorp.tag.bytes );
|
|
}
|
|
#endif
|
|
if ( nilp( value ) ) {
|
|
result = store;
|
|
} else if ( nilp( store ) || consp( store ) ) {
|
|
result = make_cons( make_cons( key, value ), store );
|
|
} else if ( hashmapp( store ) ) {
|
|
result = hashmap_put( store, key, value );
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* @brief Binds this `key` to this `value` in the global oblist, and returns the `key`.
|
|
*/
|
|
struct cons_pointer
|
|
deep_bind( struct cons_pointer key, struct cons_pointer value ) {
|
|
debug_print( L"Entering deep_bind\n", DEBUG_BIND );
|
|
|
|
struct cons_pointer old = oblist;
|
|
|
|
oblist = set( key, value, oblist );
|
|
|
|
// The oblist is not now an assoc list, and I don't think it will be again.
|
|
// if ( consp( oblist ) ) {
|
|
// inc_ref( oblist );
|
|
// dec_ref( old );
|
|
// }
|
|
|
|
debug_print( L"deep_bind returning ", DEBUG_BIND );
|
|
debug_print_object( key, DEBUG_BIND );
|
|
debug_println( DEBUG_BIND );
|
|
|
|
return key;
|
|
}
|
|
|
|
/**
|
|
* Ensure that a canonical copy of this key is bound in this environment, and
|
|
* return that canonical copy. If there is currently no such binding, create one
|
|
* with the value TRUE.
|
|
*/
|
|
struct cons_pointer
|
|
intern( struct cons_pointer key, struct cons_pointer environment ) {
|
|
struct cons_pointer result = environment;
|
|
struct cons_pointer canonical = internedp( key, environment );
|
|
if ( nilp( canonical ) ) {
|
|
/*
|
|
* not currently bound
|
|
*/
|
|
result = set( key, TRUE, environment );
|
|
}
|
|
|
|
return result;
|
|
}
|