post-scarcity/src/c/ops/string_ops.c

223 lines
6.8 KiB
C

/**
* ops/string_ops.h
*
* Operations on a Lisp string frame.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
#include "debug.h"
#include "memory/node.h"
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/pso2.h"
#include "memory/tags.h"
#include "ops/string_ops.h"
#include "ops/truth.h"
#include "payloads/cons.h"
#include "payloads/exception.h"
#include "payloads/keyword.h"
#include "payloads/symbol.h"
/**
* Return a hash value for this string like thing.
*
* What's important here is that two strings with the same characters in the
* same order should have the same hash value, even if one was created using
* `"foobar"` and the other by `(append "foo" "bar")`. I *think* this function
* has that property. I doubt that it's the most efficient hash function to
* have that property.
*
* returns 0 for things which are not string like.
*/
uint32_t calculate_hash( wint_t c, struct pso_pointer ptr ) {
struct pso2 *cell = pointer_to_object( ptr );
uint32_t result = 0;
switch ( get_tag_value( ptr ) ) {
case KEYTV:
case STRINGTV:
case SYMBOLTV:
if ( c_nilp( cell->payload.string.cdr ) ) {
result = ( uint32_t ) c;
} else {
result =
( ( uint32_t ) c *
cell->payload.string.hash ) & 0xffffffff;
}
break;
}
return result;
}
/**
* Construct a string from this character (which later will be UTF) and
* this tail. A string is implemented as a flat list of cells each of which
* has one character and a pointer to the next; in the last cell the
* pointer to next is nil.
*
* NOTE THAT: in 0.1.X, we may allocate symbols and keywords as arrays of
* char32_t in larger pso classes, so this function may be only for strings
* (and thus simpler).
*/
struct pso_pointer make_string_like_thing( struct pso_pointer frame_pointer,
wint_t c, struct pso_pointer tail,
char *tag ) {
struct pso_pointer pointer = tail;
if ( check_type( tail, tag ) || c_nilp( tail ) ) {
pointer = allocate( frame_pointer, tag, CONS_SIZE_CLASS );
struct pso2 *cell = pointer_to_object( pointer );
cell->payload.string.character = c;
cell->payload.string.cdr = tail;
cell->payload.string.hash = calculate_hash( c, tail );
debug_printf( DEBUG_ALLOC, 0,
L"Building string-like-thing of type %3.3s: ",
cell->header.tag.bytes.mnemonic );
debug_print_object( pointer, DEBUG_ALLOC, 0 );
debug_println( DEBUG_ALLOC );
} else {
// \todo should throw an exception!
struct pso2 *tobj = pointer_to_object( tail );
debug_printf( DEBUG_ALLOC, 0,
L"Warning: %3.3s cannot be prepended to %3.3s\n",
tag, tobj->header.tag.bytes.mnemonic );
}
return pointer;
}
/**
* Return a lisp string representation of this wide character string.
*/
struct pso_pointer c_string_to_lisp_string( struct pso_pointer frame_pointer,
wchar_t *string ) {
struct pso_pointer result = nil;
for ( int i = wcslen( string ) - 1; i >= 0; i-- ) {
if ( string[i] != '"' ) {
result = make_string( frame_pointer, string[i], result );
} else {
result = make_string( frame_pointer, L'\\',
make_string( frame_pointer, string[i],
result ) );
}
}
return result;
}
/**
* Convert this lisp string-like-thing (also works for symbols, and, later
* keywords) into a UTF-8 string. NOTE that the returned value has been
* malloced and must be freed. TODO: candidate to moving into a utilities
* file.
*
* @param s the lisp string or symbol;
* @return the c string.
*/
char *lisp_string_to_c_string( struct pso_pointer s ) {
char *result = NULL;
if ( stringp( s ) || symbolp( s ) ) {
int len = 1;
for ( struct pso_pointer c = s; !c_nilp( c ); c = c_cdr( c ) ) {
len++;
}
wchar_t *buffer = calloc( len, sizeof( char32_t ) );
int i = 0;
for ( struct pso_pointer c = s; !c_nilp( c ); c = c_cdr( c ) ) {
buffer[i++] =
( wchar_t ) ( pointer_to_object( c )->payload.
string.character );
}
mbstate_t ps;
const wchar_t *src = buffer;
memset( &ps, 0, sizeof( ps ) );
result =
calloc( wcsrtombs( NULL, &src, len, &ps ) + 1, sizeof( char ) );
src = buffer;
memset( &ps, 0, sizeof( ps ) );
wcsrtombs( result, &src, len, &ps );
free( buffer );
// mbstate_t ps = mbstate_t();
//
// result = calloc( wcsrtombs( NULL, &buffer, len, &ps) + 1 );
// wcsrtombs( result, &buffer, len, &ps);
// free( buffer );
}
debug_print( L"lisp_string_to_c_string( ", DEBUG_IO, 0 );
debug_print_object( s, DEBUG_IO, 0 );
debug_printf( DEBUG_IO, 0, L") => '%s'\n", result );
return result;
}
/**
* Return a lisp symbol representation of this wide character string. In
* symbols, I am accepting only lower case characters and certain punctuation.
*/
struct pso_pointer c_string_to_lisp_symbol( struct pso_pointer frame_pointer,
char32_t *symbol ) {
struct pso_pointer result = nil;
for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) {
char32_t c = symbol[i];
if ( symbol_char_p(c)) {
result = make_symbol( frame_pointer, c, result );
}
}
return result;
}
/**
* Return a lisp keyword representation of this wide character string. In
* keywords, I am accepting only lower case characters and numbers.
*/
struct pso_pointer c_string_to_lisp_keyword( struct pso_pointer frame_pointer,
char32_t *symbol ) {
struct pso_pointer result = nil;
for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) {
char32_t c = towlower( symbol[i] );
if ( iswalnum( c ) || c == L'-' ) {
result = make_keyword( frame_pointer, c, result );
}
}
return result;
}
/**
* @return t if `arg` represents an end of string, else false.
* \todo candidate for moving to a memory/string.c file
*/
bool end_of_stringp( struct pso_pointer arg ) {
return c_nilp( arg ) ||
( stringp( arg ) &&
pointer_to_object( arg )->payload.string.character ==
( wint_t ) '\0' );
}