From 3f3b596ff0f2e2b35b1328110836717994cb531c Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Tue, 3 Aug 2021 15:46:50 +0100 Subject: [PATCH] Added the beginnings of hashmap but does not yet compile. --- .vscode/settings.json | 7 ----- src/memory/conspage.h | 2 +- src/memory/consspaceobject.c | 42 ++++++++++++++++++++++--- src/memory/consspaceobject.h | 7 +++-- src/memory/hashmap.c | 61 ++++++++++++++++++++++++++++++++++++ src/memory/hashmap.h | 36 +++++++++++++++++++++ src/ops/equal.c | 25 +++++++++------ 7 files changed, 154 insertions(+), 26 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 src/memory/hashmap.c create mode 100644 src/memory/hashmap.h diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 14fb483..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "files.associations": { - "future": "cpp", - "system_error": "cpp", - "functional": "c" - } -} \ No newline at end of file diff --git a/src/memory/conspage.h b/src/memory/conspage.h index f13a46b..9eab748 100644 --- a/src/memory/conspage.h +++ b/src/memory/conspage.h @@ -13,7 +13,7 @@ #ifndef __psse_conspage_h #define __psse_conspage_h -#include "consspaceobject.h" +#include "memory/consspaceobject.h" /** * the number of cons cells on a cons page. The maximum value this can diff --git a/src/memory/consspaceobject.c b/src/memory/consspaceobject.c index 98bb495..080158d 100644 --- a/src/memory/consspaceobject.c +++ b/src/memory/consspaceobject.c @@ -101,16 +101,16 @@ struct cons_pointer c_car( struct cons_pointer arg ) { struct cons_pointer c_cdr( struct cons_pointer arg ) { struct cons_pointer result = NIL; - struct cons_space_object cell = pointer2cell( arg ); + struct cons_space_object *cell = &pointer2cell( arg ); - switch (cell.tag.value) { + switch (cell->tag.value) { case CONSTV: - result = pointer2cell( arg ).payload.cons.cdr; + result = cell->payload.cons.cdr; break; case KEYTV: case STRINGTV: case SYMBOLTV: - result = pointer2cell( arg ).payload.string.cdr; + result = cell->payload.string.cdr; break; } @@ -226,6 +226,36 @@ struct cons_pointer make_nlambda( struct cons_pointer args, return pointer; } +/** + * Return a hash value for this string. + * + * What's important here is that two strings with the same characters in the + * same order should have the same hash value, even if one was created using + * `"foobar"` and the other by `(append "foo" "bar")`. I *think* this function + * has that property. I doubt that it's the most efficient hash function to + * have that property. + */ +uint32_t calculate_hash( wint_t c, struct cons_pointer ptr) { +struct cons_space_object *cell = &pointer2cell(ptr); + uint32_t result = 0; + + switch (cell->tag.value) + { + case KEYTV: + case STRINGTV: + case SYMBOLTV: + if (nilp(ptr)) { + result =(uint32_t) c; + } else { + result = ((uint32_t)c * + cell->payload.string.hash) & + 0xffffffff; + } + } + + return result; +} + /** * Construct a string from this character (which later will be UTF) and * this tail. A string is implemented as a flat list of cells each of which @@ -245,8 +275,10 @@ make_string_like_thing( wint_t c, struct cons_pointer tail, char *tag ) { cell->payload.string.cdr.page = tail.page; /* \todo There's a problem here. Sometimes the offsets on * strings are quite massively off. Fix is probably - * cell->payload.string.cdr = tsil */ + * cell->payload.string.cdr = tail */ cell->payload.string.cdr.offset = tail.offset; + + cell->payload.string.hash = calculate_hash(c, tail); } else { // \todo should throw an exception! debug_printf( DEBUG_ALLOC, diff --git a/src/memory/consspaceobject.h b/src/memory/consspaceobject.h index f82b103..7bf34de 100644 --- a/src/memory/consspaceobject.h +++ b/src/memory/consspaceobject.h @@ -20,7 +20,8 @@ #include #include -#include "fopen.h" +#include "io/fopen.h" +#include "memory/conspage.h" /** @@ -557,8 +558,8 @@ struct stream_payload { struct string_payload { /** the actual character stored in this cell */ wint_t character; - /** unused padding to word-align the cdr */ - uint32_t padding; + /** a hash of the string value, computed at store time. */ + uint32_t hash; /** the remainder of the string following this character. */ struct cons_pointer cdr; }; diff --git a/src/memory/hashmap.c b/src/memory/hashmap.c new file mode 100644 index 0000000..fcd69e4 --- /dev/null +++ b/src/memory/hashmap.c @@ -0,0 +1,61 @@ +/* + * hashmap.c + * + * Basic implementation of a hashmap. + * + * (c) 2021 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "arith/integer.h" +#include "memory/consspaceobject.h" +#include "memory/hashmap.h" + +/** + * Get the hash value for the cell indicated by this `ptr`; currently only + * implemented for string like things. + */ +uint32_t get_hash(struct cons_pointer ptr) +{ + struct cons_space_object *cell = &pointer2cell(ptr); + uint32_t result = 0; + + switch (cell->tag.value) + { + case KEYTV: + case STRINGTV: + case SYMBOLTV: + result = cell->payload.string.hash; + default: + // TODO: Not Yet Implemented + result = 0; + } + + return result; +} + +/** + * A lisp function signature conforming wrapper around get_hash, q.v.. + */ +struct cons_pointer lisp_get_hash(struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env) +{ + return make_integer(get_hash(frame->arg[0]), NIL); +} + +/** + * Make a hashmap with this number of buckets. + */ +struct cons_pointer make_hashmap( uint32_t n_buckets) { + struct cons_pointer result = make_vso(HASHTAG, + (sizeof(struct cons_pointer) * (n_buckets + 1)) + + (sizeof(uint32_t) * 2)); + + // TODO: fill in the payload! + + struct hashmap_payload *payload = + (struct hashmap_payload *) &pointer_to_vso(result)->payload; + + return result; +} \ No newline at end of file diff --git a/src/memory/hashmap.h b/src/memory/hashmap.h new file mode 100644 index 0000000..b834f5a --- /dev/null +++ b/src/memory/hashmap.h @@ -0,0 +1,36 @@ +/* + * hashmap.h + * + * Basic implementation of a hashmap. + * + * (c) 2021 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_hashmap_h +#define __psse_hashmap_h + +#include "memory/consspaceobject.h" +#include "memory/vectorspace.h" + +/** + * The payload of a hashmap. The number of buckets is assigned at run-time, + * and is stored in n_buckets. Each bucket is something ASSOC can consume: + * i.e. either an assoc list or a further hashmap. + */ +struct hashmap_payload { + struct cons_pointer hash_fn; + uint32_t n_buckets; + uint32_t unused; /* for word alignment and possible later expansion */ + struct cons_pointer buckets[]; +}; + +uint32_t get_hash(struct cons_pointer ptr); + +struct cons_pointer lisp_get_hash(struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env); + +struct cons_pointer make_hashmap( uint32_t n_buckets, struct cons_pointer hash_fn); + +#endif \ No newline at end of file diff --git a/src/ops/equal.c b/src/ops/equal.c index 6a87de8..feffb93 100644 --- a/src/ops/equal.c +++ b/src/ops/equal.c @@ -10,11 +10,11 @@ #include #include -#include "conspage.h" -#include "consspaceobject.h" -#include "integer.h" -#include "peano.h" -#include "ratio.h" +#include "memory/conspage.h" +#include "memory/consspaceobject.h" +#include "arith/integer.h" +#include "arith/peano.h" +#include "arith/ratio.h" /** * Shallow, and thus cheap, equality: true if these two objects are @@ -69,6 +69,9 @@ bool equal(struct cons_pointer a, struct cons_pointer b) case CONSTV: case LAMBDATV: case NLAMBDATV: + /* TODO: it is not OK to do this on the stack since list-like + * structures can be of indefinite extent. It *must* be done by + * iteration (and even that is problematic) */ result = equal(cell_a->payload.cons.car, cell_b->payload.cons.car) && equal(cell_a->payload.cons.cdr, cell_b->payload.cons.cdr); @@ -76,11 +79,13 @@ bool equal(struct cons_pointer a, struct cons_pointer b) case KEYTV: case STRINGTV: case SYMBOLTV: - /* - * slightly complex because a string may or may not have a '\0' - * cell at the end, but I'll ignore that for now. I think in - * practice only the empty string will. - */ + /* slightly complex because a string may or may not have a '\0' + * cell at the end, but I'll ignore that for now. I think in + * practice only the empty string will. + */ + /* TODO: it is not OK to do this on the stack since list-like + * structures can be of indefinite extent. It *must* be done by + * iteration (and even that is problematic) */ result = cell_a->payload.string.character == cell_b->payload.string.character &&