223 lines
6.8 KiB
C
223 lines
6.8 KiB
C
/**
|
|
* ops/string_ops.h
|
|
*
|
|
* Operations on a Lisp string frame.
|
|
*
|
|
* (c) 2026 Simon Brooke <simon@journeyman.cc>
|
|
* Licensed under GPL version 2.0, or, at your option, any later version.
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
/*
|
|
* wide characters
|
|
*/
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
|
|
#include "debug.h"
|
|
#include "memory/node.h"
|
|
#include "memory/pointer.h"
|
|
#include "memory/pso.h"
|
|
#include "memory/pso2.h"
|
|
#include "memory/tags.h"
|
|
|
|
#include "ops/string_ops.h"
|
|
#include "ops/truth.h"
|
|
|
|
#include "payloads/cons.h"
|
|
#include "payloads/exception.h"
|
|
#include "payloads/keyword.h"
|
|
#include "payloads/symbol.h"
|
|
|
|
|
|
/**
|
|
* Return a hash value for this string like thing.
|
|
*
|
|
* What's important here is that two strings with the same characters in the
|
|
* same order should have the same hash value, even if one was created using
|
|
* `"foobar"` and the other by `(append "foo" "bar")`. I *think* this function
|
|
* has that property. I doubt that it's the most efficient hash function to
|
|
* have that property.
|
|
*
|
|
* returns 0 for things which are not string like.
|
|
*/
|
|
uint32_t calculate_hash( wint_t c, struct pso_pointer ptr ) {
|
|
struct pso2 *cell = pointer_to_object( ptr );
|
|
uint32_t result = 0;
|
|
|
|
switch ( get_tag_value( ptr ) ) {
|
|
case KEYTV:
|
|
case STRINGTV:
|
|
case SYMBOLTV:
|
|
if ( c_nilp( cell->payload.string.cdr ) ) {
|
|
result = ( uint32_t ) c;
|
|
} else {
|
|
result =
|
|
( ( uint32_t ) c *
|
|
cell->payload.string.hash ) & 0xffffffff;
|
|
}
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Construct a string from this character (which later will be UTF) and
|
|
* this tail. A string is implemented as a flat list of cells each of which
|
|
* has one character and a pointer to the next; in the last cell the
|
|
* pointer to next is nil.
|
|
*
|
|
* NOTE THAT: in 0.1.X, we may allocate symbols and keywords as arrays of
|
|
* char32_t in larger pso classes, so this function may be only for strings
|
|
* (and thus simpler).
|
|
*/
|
|
struct pso_pointer make_string_like_thing( struct pso_pointer frame_pointer,
|
|
wint_t c, struct pso_pointer tail,
|
|
char *tag ) {
|
|
struct pso_pointer pointer = tail;
|
|
|
|
if ( check_type( tail, tag ) || c_nilp( tail ) ) {
|
|
pointer = allocate( frame_pointer, tag, CONS_SIZE_CLASS );
|
|
struct pso2 *cell = pointer_to_object( pointer );
|
|
|
|
cell->payload.string.character = c;
|
|
cell->payload.string.cdr = tail;
|
|
|
|
cell->payload.string.hash = calculate_hash( c, tail );
|
|
debug_printf( DEBUG_ALLOC, 0,
|
|
L"Building string-like-thing of type %3.3s: ",
|
|
cell->header.tag.bytes.mnemonic );
|
|
debug_print_object( pointer, DEBUG_ALLOC, 0 );
|
|
debug_println( DEBUG_ALLOC );
|
|
} else {
|
|
// \todo should throw an exception!
|
|
struct pso2 *tobj = pointer_to_object( tail );
|
|
debug_printf( DEBUG_ALLOC, 0,
|
|
L"Warning: %3.3s cannot be prepended to %3.3s\n",
|
|
tag, tobj->header.tag.bytes.mnemonic );
|
|
}
|
|
|
|
|
|
return pointer;
|
|
}
|
|
|
|
|
|
/**
|
|
* Return a lisp string representation of this wide character string.
|
|
*/
|
|
struct pso_pointer c_string_to_lisp_string( struct pso_pointer frame_pointer,
|
|
wchar_t *string ) {
|
|
struct pso_pointer result = nil;
|
|
|
|
for ( int i = wcslen( string ) - 1; i >= 0; i-- ) {
|
|
if ( string[i] != '"' ) {
|
|
result = make_string( frame_pointer, string[i], result );
|
|
} else {
|
|
result = make_string( frame_pointer, L'\\',
|
|
make_string( frame_pointer, string[i],
|
|
result ) );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Convert this lisp string-like-thing (also works for symbols, and, later
|
|
* keywords) into a UTF-8 string. NOTE that the returned value has been
|
|
* malloced and must be freed. TODO: candidate to moving into a utilities
|
|
* file.
|
|
*
|
|
* @param s the lisp string or symbol;
|
|
* @return the c string.
|
|
*/
|
|
char *lisp_string_to_c_string( struct pso_pointer s ) {
|
|
char *result = NULL;
|
|
if ( stringp( s ) || symbolp( s ) ) {
|
|
int len = 1;
|
|
for ( struct pso_pointer c = s; !c_nilp( c ); c = c_cdr( c ) ) {
|
|
len++;
|
|
}
|
|
|
|
wchar_t *buffer = calloc( len, sizeof( char32_t ) );
|
|
int i = 0;
|
|
for ( struct pso_pointer c = s; !c_nilp( c ); c = c_cdr( c ) ) {
|
|
buffer[i++] =
|
|
( wchar_t ) ( pointer_to_object( c )->payload.
|
|
string.character );
|
|
}
|
|
|
|
mbstate_t ps;
|
|
const wchar_t *src = buffer;
|
|
memset( &ps, 0, sizeof( ps ) );
|
|
result =
|
|
calloc( wcsrtombs( NULL, &src, len, &ps ) + 1, sizeof( char ) );
|
|
src = buffer;
|
|
memset( &ps, 0, sizeof( ps ) );
|
|
wcsrtombs( result, &src, len, &ps );
|
|
free( buffer );
|
|
// mbstate_t ps = mbstate_t();
|
|
//
|
|
// result = calloc( wcsrtombs( NULL, &buffer, len, &ps) + 1 );
|
|
// wcsrtombs( result, &buffer, len, &ps);
|
|
// free( buffer );
|
|
}
|
|
|
|
debug_print( L"lisp_string_to_c_string( ", DEBUG_IO, 0 );
|
|
debug_print_object( s, DEBUG_IO, 0 );
|
|
debug_printf( DEBUG_IO, 0, L") => '%s'\n", result );
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Return a lisp symbol representation of this wide character string. In
|
|
* symbols, I am accepting only lower case characters and certain punctuation.
|
|
*/
|
|
struct pso_pointer c_string_to_lisp_symbol( struct pso_pointer frame_pointer,
|
|
char32_t *symbol ) {
|
|
struct pso_pointer result = nil;
|
|
|
|
for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) {
|
|
char32_t c = symbol[i];
|
|
|
|
if ( symbol_char_p(c)) {
|
|
result = make_symbol( frame_pointer, c, result );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Return a lisp keyword representation of this wide character string. In
|
|
* keywords, I am accepting only lower case characters and numbers.
|
|
*/
|
|
struct pso_pointer c_string_to_lisp_keyword( struct pso_pointer frame_pointer,
|
|
char32_t *symbol ) {
|
|
struct pso_pointer result = nil;
|
|
|
|
for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) {
|
|
char32_t c = towlower( symbol[i] );
|
|
|
|
if ( iswalnum( c ) || c == L'-' ) {
|
|
result = make_keyword( frame_pointer, c, result );
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
* @return t if `arg` represents an end of string, else false.
|
|
* \todo candidate for moving to a memory/string.c file
|
|
*/
|
|
bool end_of_stringp( struct pso_pointer arg ) {
|
|
return c_nilp( arg ) ||
|
|
( stringp( arg ) &&
|
|
pointer_to_object( arg )->payload.string.character ==
|
|
( wint_t ) '\0' );
|
|
}
|