From ffceda5edc572a49fc49d333d2c6bcf9803e64bf Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Wed, 25 Feb 2026 22:10:37 +0000 Subject: [PATCH] Greatly improved performance of `equal` for string like things. --- src/memory/consspaceobject.c | 2 +- src/ops/equal.c | 47 ++++++++++++++++++++++++------------ src/ops/equal.h | 6 +++++ 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/memory/consspaceobject.c b/src/memory/consspaceobject.c index 3f85ed6..0bd0b90 100644 --- a/src/memory/consspaceobject.c +++ b/src/memory/consspaceobject.c @@ -150,7 +150,7 @@ struct cons_pointer c_car( struct cons_pointer arg ) { /** * Implementation of cdr in C. If arg is not a sequence, or the current user is - * not authorised to read it,does not error but returns nil. + * not authorised to read it, does not error but returns nil. */ struct cons_pointer c_cdr( struct cons_pointer arg ) { struct cons_pointer result = NIL; diff --git a/src/ops/equal.c b/src/ops/equal.c index 1ad2fdc..fca8f61 100644 --- a/src/ops/equal.c +++ b/src/ops/equal.c @@ -9,6 +9,7 @@ #include #include +#include #include "arith/integer.h" #include "arith/peano.h" @@ -363,22 +364,38 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) { /* TODO: it is not OK to do this on the stack since list-like * structures can be of indefinite extent. It *must* be done by * iteration (and even that is problematic) */ + if (cell_a->payload.string.hash == cell_b->payload.string.hash) { + wchar_t a_buff[ STRING_SHIPYARD_SIZE], b_buff[ STRING_SHIPYARD_SIZE]; + uint32_t tag = cell_a->tag.value; + int i = 0; + + memset(a_buff,0,sizeof(a_buff)); + memset(b_buff,0,sizeof(b_buff)); + + for (; (i < (STRING_SHIPYARD_SIZE - 1)) && !nilp( a) && !nilp( b); i++) { + a_buff[i] = cell_a->payload.string.character; + a = c_cdr(a); + cell_a = &pointer2cell( a ); + + b_buff[i] = cell_b->payload.string.character; + b = c_cdr( b); + cell_b = &pointer2cell( b); + } + #ifdef DEBUG - debug_print( L"Comparing '", DEBUG_ARITH); - debug_print_object( a, DEBUG_ARITH); - debug_print( L"' to '", DEBUG_ARITH); - debug_print_object( b, DEBUG_ARITH); + debug_print( L"Comparing '", DEBUG_LAMBDA); + debug_print( a_buff, DEBUG_LAMBDA); + debug_print( L"' to '", DEBUG_LAMBDA); + debug_print( b_buff, DEBUG_LAMBDA); + debug_print( L"'\n", DEBUG_LAMBDA); #endif - result = - cell_a->payload.string.hash == cell_b->payload.string.hash - && cell_a->payload.string.character == - cell_b->payload.string.character - && - ( equal - ( cell_a->payload.string.cdr, - cell_b->payload.string.cdr ) - || ( end_of_string( cell_a->payload.string.cdr ) - && end_of_string( cell_b->payload.string.cdr ) ) ); + + /* OK, now we have wchar string buffers loaded from the objects. We + * may not have exhausted either string, so the buffers being equal + * isn't sufficient. So we recurse at least once. */ + + result = (wcsncmp( a_buff, b_buff, i) == 0) && equal( c_cdr(a), c_cdr(b)); + } break; case VECTORPOINTTV: if ( cell_b->tag.value == VECTORPOINTTV) { @@ -403,7 +420,7 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) { * I'll ignore them, too, for now. */ - debug_printf( DEBUG_ARITH, L"\nequal returning %d\n", result ); + debug_printf( DEBUG_LAMBDA, L"\nequal returning %d\n", result ); return result; } diff --git a/src/ops/equal.h b/src/ops/equal.h index 1f27104..061eb94 100644 --- a/src/ops/equal.h +++ b/src/ops/equal.h @@ -15,6 +15,12 @@ #ifndef __equal_h #define __equal_h +/** + * size of buffer for assembling strings. Likely to be useful to + * read, too. + */ +#define STRING_SHIPYARD_SIZE 1024 + /** * Shallow, and thus cheap, equality: true if these two objects are * the same object, else false.