Added the beginnings of hashmap but does not yet compile.

This commit is contained in:
Simon Brooke 2021-08-03 15:46:50 +01:00
parent 70d176982b
commit 3f3b596ff0
No known key found for this signature in database
GPG key ID: A7A4F18D1D4DF987
7 changed files with 154 additions and 26 deletions

View file

@ -1,7 +0,0 @@
{
"files.associations": {
"future": "cpp",
"system_error": "cpp",
"functional": "c"
}
}

View file

@ -13,7 +13,7 @@
#ifndef __psse_conspage_h #ifndef __psse_conspage_h
#define __psse_conspage_h #define __psse_conspage_h
#include "consspaceobject.h" #include "memory/consspaceobject.h"
/** /**
* the number of cons cells on a cons page. The maximum value this can * the number of cons cells on a cons page. The maximum value this can

View file

@ -101,16 +101,16 @@ struct cons_pointer c_car( struct cons_pointer arg ) {
struct cons_pointer c_cdr( struct cons_pointer arg ) { struct cons_pointer c_cdr( struct cons_pointer arg ) {
struct cons_pointer result = NIL; struct cons_pointer result = NIL;
struct cons_space_object cell = pointer2cell( arg ); struct cons_space_object *cell = &pointer2cell( arg );
switch (cell.tag.value) { switch (cell->tag.value) {
case CONSTV: case CONSTV:
result = pointer2cell( arg ).payload.cons.cdr; result = cell->payload.cons.cdr;
break; break;
case KEYTV: case KEYTV:
case STRINGTV: case STRINGTV:
case SYMBOLTV: case SYMBOLTV:
result = pointer2cell( arg ).payload.string.cdr; result = cell->payload.string.cdr;
break; break;
} }
@ -226,6 +226,36 @@ struct cons_pointer make_nlambda( struct cons_pointer args,
return pointer; return pointer;
} }
/**
* Return a hash value for this string.
*
* What's important here is that two strings with the same characters in the
* same order should have the same hash value, even if one was created using
* `"foobar"` and the other by `(append "foo" "bar")`. I *think* this function
* has that property. I doubt that it's the most efficient hash function to
* have that property.
*/
uint32_t calculate_hash( wint_t c, struct cons_pointer ptr) {
struct cons_space_object *cell = &pointer2cell(ptr);
uint32_t result = 0;
switch (cell->tag.value)
{
case KEYTV:
case STRINGTV:
case SYMBOLTV:
if (nilp(ptr)) {
result =(uint32_t) c;
} else {
result = ((uint32_t)c *
cell->payload.string.hash) &
0xffffffff;
}
}
return result;
}
/** /**
* Construct a string from this character (which later will be UTF) and * Construct a string from this character (which later will be UTF) and
* this tail. A string is implemented as a flat list of cells each of which * this tail. A string is implemented as a flat list of cells each of which
@ -245,8 +275,10 @@ make_string_like_thing( wint_t c, struct cons_pointer tail, char *tag ) {
cell->payload.string.cdr.page = tail.page; cell->payload.string.cdr.page = tail.page;
/* \todo There's a problem here. Sometimes the offsets on /* \todo There's a problem here. Sometimes the offsets on
* strings are quite massively off. Fix is probably * strings are quite massively off. Fix is probably
* cell->payload.string.cdr = tsil */ * cell->payload.string.cdr = tail */
cell->payload.string.cdr.offset = tail.offset; cell->payload.string.cdr.offset = tail.offset;
cell->payload.string.hash = calculate_hash(c, tail);
} else { } else {
// \todo should throw an exception! // \todo should throw an exception!
debug_printf( DEBUG_ALLOC, debug_printf( DEBUG_ALLOC,

View file

@ -20,7 +20,8 @@
#include <wchar.h> #include <wchar.h>
#include <wctype.h> #include <wctype.h>
#include "fopen.h" #include "io/fopen.h"
#include "memory/conspage.h"
/** /**
@ -557,8 +558,8 @@ struct stream_payload {
struct string_payload { struct string_payload {
/** the actual character stored in this cell */ /** the actual character stored in this cell */
wint_t character; wint_t character;
/** unused padding to word-align the cdr */ /** a hash of the string value, computed at store time. */
uint32_t padding; uint32_t hash;
/** the remainder of the string following this character. */ /** the remainder of the string following this character. */
struct cons_pointer cdr; struct cons_pointer cdr;
}; };

61
src/memory/hashmap.c Normal file
View file

@ -0,0 +1,61 @@
/*
* hashmap.c
*
* Basic implementation of a hashmap.
*
* (c) 2021 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include "arith/integer.h"
#include "memory/consspaceobject.h"
#include "memory/hashmap.h"
/**
* Get the hash value for the cell indicated by this `ptr`; currently only
* implemented for string like things.
*/
uint32_t get_hash(struct cons_pointer ptr)
{
struct cons_space_object *cell = &pointer2cell(ptr);
uint32_t result = 0;
switch (cell->tag.value)
{
case KEYTV:
case STRINGTV:
case SYMBOLTV:
result = cell->payload.string.hash;
default:
// TODO: Not Yet Implemented
result = 0;
}
return result;
}
/**
* A lisp function signature conforming wrapper around get_hash, q.v..
*/
struct cons_pointer lisp_get_hash(struct stack_frame *frame,
struct cons_pointer frame_pointer,
struct cons_pointer env)
{
return make_integer(get_hash(frame->arg[0]), NIL);
}
/**
* Make a hashmap with this number of buckets.
*/
struct cons_pointer make_hashmap( uint32_t n_buckets) {
struct cons_pointer result = make_vso(HASHTAG,
(sizeof(struct cons_pointer) * (n_buckets + 1)) +
(sizeof(uint32_t) * 2));
// TODO: fill in the payload!
struct hashmap_payload *payload =
(struct hashmap_payload *) &pointer_to_vso(result)->payload;
return result;
}

36
src/memory/hashmap.h Normal file
View file

@ -0,0 +1,36 @@
/*
* hashmap.h
*
* Basic implementation of a hashmap.
*
* (c) 2021 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_hashmap_h
#define __psse_hashmap_h
#include "memory/consspaceobject.h"
#include "memory/vectorspace.h"
/**
* The payload of a hashmap. The number of buckets is assigned at run-time,
* and is stored in n_buckets. Each bucket is something ASSOC can consume:
* i.e. either an assoc list or a further hashmap.
*/
struct hashmap_payload {
struct cons_pointer hash_fn;
uint32_t n_buckets;
uint32_t unused; /* for word alignment and possible later expansion */
struct cons_pointer buckets[];
};
uint32_t get_hash(struct cons_pointer ptr);
struct cons_pointer lisp_get_hash(struct stack_frame *frame,
struct cons_pointer frame_pointer,
struct cons_pointer env);
struct cons_pointer make_hashmap( uint32_t n_buckets, struct cons_pointer hash_fn);
#endif

View file

@ -10,11 +10,11 @@
#include <math.h> #include <math.h>
#include <stdbool.h> #include <stdbool.h>
#include "conspage.h" #include "memory/conspage.h"
#include "consspaceobject.h" #include "memory/consspaceobject.h"
#include "integer.h" #include "arith/integer.h"
#include "peano.h" #include "arith/peano.h"
#include "ratio.h" #include "arith/ratio.h"
/** /**
* Shallow, and thus cheap, equality: true if these two objects are * Shallow, and thus cheap, equality: true if these two objects are
@ -69,6 +69,9 @@ bool equal(struct cons_pointer a, struct cons_pointer b)
case CONSTV: case CONSTV:
case LAMBDATV: case LAMBDATV:
case NLAMBDATV: case NLAMBDATV:
/* TODO: it is not OK to do this on the stack since list-like
* structures can be of indefinite extent. It *must* be done by
* iteration (and even that is problematic) */
result = result =
equal(cell_a->payload.cons.car, cell_b->payload.cons.car) && equal(cell_a->payload.cons.cdr, equal(cell_a->payload.cons.car, cell_b->payload.cons.car) && equal(cell_a->payload.cons.cdr,
cell_b->payload.cons.cdr); cell_b->payload.cons.cdr);
@ -76,11 +79,13 @@ bool equal(struct cons_pointer a, struct cons_pointer b)
case KEYTV: case KEYTV:
case STRINGTV: case STRINGTV:
case SYMBOLTV: case SYMBOLTV:
/* /* slightly complex because a string may or may not have a '\0'
* slightly complex because a string may or may not have a '\0'
* cell at the end, but I'll ignore that for now. I think in * cell at the end, but I'll ignore that for now. I think in
* practice only the empty string will. * practice only the empty string will.
*/ */
/* TODO: it is not OK to do this on the stack since list-like
* structures can be of indefinite extent. It *must* be done by
* iteration (and even that is problematic) */
result = result =
cell_a->payload.string.character == cell_a->payload.string.character ==
cell_b->payload.string.character && cell_b->payload.string.character &&