/** * ops/string_ops.h * * Operations on a Lisp string frame. * * (c) 2026 Simon Brooke * Licensed under GPL version 2.0, or, at your option, any later version. */ #include #include #include /* * wide characters */ #include #include #include "debug.h" #include "memory/node.h" #include "memory/pointer.h" #include "memory/pso.h" #include "memory/pso2.h" #include "memory/tags.h" #include "ops/string_ops.h" #include "ops/truth.h" #include "payloads/cons.h" #include "payloads/exception.h" #include "payloads/keyword.h" #include "payloads/symbol.h" /** * Return a hash value for this string like thing. * * What's important here is that two strings with the same characters in the * same order should have the same hash value, even if one was created using * `"foobar"` and the other by `(append "foo" "bar")`. I *think* this function * has that property. I doubt that it's the most efficient hash function to * have that property. * * returns 0 for things which are not string like. */ uint32_t calculate_hash( wint_t c, struct pso_pointer ptr ) { struct pso2 *cell = pointer_to_object( ptr ); uint32_t result = 0; switch ( get_tag_value( ptr ) ) { case KEYTV: case STRINGTV: case SYMBOLTV: if ( c_nilp( cell->payload.string.cdr ) ) { result = ( uint32_t ) c; } else { result = ( ( uint32_t ) c * cell->payload.string.hash ) & 0xffffffff; } break; } return result; } /** * Construct a string from this character (which later will be UTF) and * this tail. A string is implemented as a flat list of cells each of which * has one character and a pointer to the next; in the last cell the * pointer to next is nil. * * NOTE THAT: in 0.1.X, we may allocate symbols and keywords as arrays of * wchar_t in larger pso classes, so this function may be only for strings * (and thus simpler). */ struct pso_pointer make_string_like_thing( struct pso_pointer frame_pointer, wint_t c, struct pso_pointer tail, char *tag ) { struct pso_pointer pointer = tail; if ( check_type( tail, tag ) || c_nilp( tail ) ) { pointer = allocate( frame_pointer, tag, CONS_SIZE_CLASS ); struct pso2 *cell = pointer_to_object( pointer ); cell->payload.string.character = c; cell->payload.string.cdr = tail; cell->payload.string.hash = calculate_hash( c, tail ); debug_printf( DEBUG_ALLOC, 0, L"Building string-like-thing of type %3.3s: ", cell->header.tag.bytes.mnemonic ); debug_print_object( pointer, DEBUG_ALLOC, 0 ); debug_println( DEBUG_ALLOC ); } else { // \todo should throw an exception! struct pso2 *tobj = pointer_to_object( tail ); debug_printf( DEBUG_ALLOC, 0, L"Warning: %3.3s cannot be prepended to %3.3s\n", tag, tobj->header.tag.bytes.mnemonic ); } return pointer; } /** * Return a lisp string representation of this wide character string. */ struct pso_pointer c_string_to_lisp_string( struct pso_pointer frame_pointer, wchar_t *string ) { struct pso_pointer result = nil; for ( int i = wcslen( string ) - 1; i >= 0; i-- ) { if ( string[i] != '"' ) { result = make_string( frame_pointer, string[i], result ); } else { result = make_string( frame_pointer, L'\\', make_string( frame_pointer, string[i], result ) ); } } return result; } /** * Convert this lisp string-like-thing (also works for symbols, and, later * keywords) into a UTF-8 string. NOTE that the returned value has been * malloced and must be freed. TODO: candidate to moving into a utilities * file. * * @param s the lisp string or symbol; * @return the c string. */ char *lisp_string_to_c_string( struct pso_pointer s ) { char *result = NULL; if ( stringp( s ) || symbolp( s ) ) { int len = 1; for ( struct pso_pointer c = s; !c_nilp( c ); c = c_cdr( c ) ) { len++; } wchar_t *buffer = calloc( len, sizeof( wchar_t ) ); int i = 0; for ( struct pso_pointer c = s; !c_nilp( c ); c = c_cdr( c ) ) { buffer[i++] = ( wchar_t ) ( pointer_to_object( c )->payload. string.character ); } mbstate_t ps; const wchar_t *src = buffer; memset( &ps, 0, sizeof( ps ) ); result = calloc( wcsrtombs( NULL, &src, len, &ps ) + 1, sizeof( char ) ); src = buffer; memset( &ps, 0, sizeof( ps ) ); wcsrtombs( result, &src, len, &ps ); free( buffer ); // mbstate_t ps = mbstate_t(); // // result = calloc( wcsrtombs( NULL, &buffer, len, &ps) + 1 ); // wcsrtombs( result, &buffer, len, &ps); // free( buffer ); } debug_print( L"lisp_string_to_c_string( ", DEBUG_IO, 0 ); debug_print_object( s, DEBUG_IO, 0 ); debug_printf( DEBUG_IO, 0, L") => '%s'\n", result ); return result; } /** * Return a lisp symbol representation of this wide character string. In * symbols, I am accepting only lower case characters and certain punctuation. */ struct pso_pointer c_string_to_lisp_symbol( struct pso_pointer frame_pointer, wchar_t *symbol ) { struct pso_pointer result = nil; for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) { wchar_t c = symbol[i]; if ( symbol_char_p(c)) { result = make_symbol( frame_pointer, c, result ); } } return result; } /** * Return a lisp keyword representation of this wide character string. In * keywords, I am accepting only lower case characters and numbers. */ struct pso_pointer c_string_to_lisp_keyword( struct pso_pointer frame_pointer, wchar_t *symbol ) { struct pso_pointer result = nil; for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) { wchar_t c = towlower( symbol[i] ); if ( iswalnum( c ) || c == L'-' ) { result = make_keyword( frame_pointer, c, result ); } } return result; } /** * @return t if `arg` represents an end of string, else false. * \todo candidate for moving to a memory/string.c file */ bool end_of_stringp( struct pso_pointer arg ) { return c_nilp( arg ) || ( stringp( arg ) && pointer_to_object( arg )->payload.string.character == ( wint_t ) '\0' ); }