diff --git a/.gitignore b/.gitignore index b428e03..1968658 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,9 @@ log* utils_src/readprintwc/out *.dump + +*.bak + +src/io/fopen + +hi\.* diff --git a/Makefile b/Makefile index 7179c91..c4c4ef3 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ INDENT_FLAGS := -nbad -bap -nbc -br -brf -brs -c33 -cd33 -ncdb -ce -ci4 -cli4 \ -npsl -nsc -nsob -nss -nut -prs -l79 -ts2 CPPFLAGS ?= $(INC_FLAGS) -MMD -MP -g -DDEBUG -LDFLAGS := -lm +LDFLAGS := -lm -lcurl all: $(TARGET) diff --git a/lisp/slurp.lisp b/lisp/slurp.lisp new file mode 100644 index 0000000..e927bcb --- /dev/null +++ b/lisp/slurp.lisp @@ -0,0 +1 @@ +(slurp (set! f (open "http://www.journeyman.cc/"))) diff --git a/src/arith/integer.c b/src/arith/integer.c index 6a26126..48992ca 100644 --- a/src/arith/integer.c +++ b/src/arith/integer.c @@ -12,12 +12,6 @@ #include #include #include -/* safe_iop, as available in the Ubuntu repository, is this one: - * https://code.google.com/archive/p/safe-iop/wikis/README.wiki - * which is installed as `libsafe-iop-dev`. There is an alternate - * implementation here: https://github.com/redpig/safe-iop/ - * which shares the same version number but is not compatible. */ -#include /* * wide characters */ @@ -76,20 +70,16 @@ struct cons_pointer make_integer( int64_t value, struct cons_pointer more ) { * \see add_integers */ __int128_t cell_value( struct cons_pointer c, char op, bool is_first_cell ) { - long int val = nilp( c ) ? - 0 : - pointer2cell( c ).payload.integer.value; + long int val = nilp( c ) ? 0 : pointer2cell( c ).payload.integer.value; long int carry = is_first_cell ? 0 : ( MAX_INTEGER + 1 ); __int128_t result = ( __int128_t ) integerp( c ) ? - ( val == 0 ) ? - carry : - val : - op == '*' ? 1 : 0; + ( val == 0 ) ? carry : val : op == '*' ? 1 : 0; debug_printf( DEBUG_ARITH, L"cell_value: raw value is %ld, is_first_cell = %s; %4.4s; returning ", - val, is_first_cell ? "true" : "false", pointer2cell(c).tag.bytes); + val, is_first_cell ? "true" : "false", + pointer2cell( c ).tag.bytes ); debug_print_128bit( result, DEBUG_ARITH ); debug_println( DEBUG_ARITH ); @@ -109,9 +99,8 @@ __int128_t cell_value( struct cons_pointer c, char op, bool is_first_cell ) { * @return carry, if any, else 0. */ __int128_t int128_to_integer( __int128_t val, - struct cons_pointer less_significant, - struct cons_pointer new) -{ + struct cons_pointer less_significant, + struct cons_pointer new ) { struct cons_pointer cursor = NIL; __int128_t carry = 0; @@ -120,12 +109,12 @@ __int128_t int128_to_integer( __int128_t val, } else { carry = val >> 60; debug_printf( DEBUG_ARITH, - L"int128_to_integer: 64 bit overflow; setting carry to %ld\n", - ( int64_t ) carry ); + L"int128_to_integer: 64 bit overflow; setting carry to %ld\n", + ( int64_t ) carry ); val &= MAX_INTEGER; } - struct cons_space_object * newc = &pointer2cell( new); + struct cons_space_object *newc = &pointer2cell( new ); newc->payload.integer.value = val; if ( integerp( less_significant ) ) { @@ -137,19 +126,21 @@ __int128_t int128_to_integer( __int128_t val, return carry; } -struct cons_pointer make_integer_128(__int128_t val, - struct cons_pointer less_significant) { +struct cons_pointer make_integer_128( __int128_t val, + struct cons_pointer less_significant ) { struct cons_pointer result = NIL; do { if ( MAX_INTEGER >= val ) { - result = make_integer( (long int) val, less_significant); + result = make_integer( ( long int ) val, less_significant ); } else { - less_significant = make_integer( (long int)val & MAX_INTEGER, less_significant); + less_significant = + make_integer( ( long int ) val & MAX_INTEGER, + less_significant ); val = val >> 60; } - } while (nilp(result)); + } while ( nilp( result ) ); return result; } @@ -164,10 +155,10 @@ struct cons_pointer add_integers( struct cons_pointer a, struct cons_pointer cursor = NIL; debug_print( L"add_integers: a = ", DEBUG_ARITH ); - debug_print_object(a, DEBUG_ARITH); + debug_print_object( a, DEBUG_ARITH ); debug_print( L"; b = ", DEBUG_ARITH ); - debug_print_object(b, DEBUG_ARITH); - debug_println(DEBUG_ARITH); + debug_print_object( b, DEBUG_ARITH ); + debug_println( DEBUG_ARITH ); __int128_t carry = 0; bool is_first_cell = true; @@ -194,8 +185,8 @@ struct cons_pointer add_integers( struct cons_pointer a, debug_print_128bit( rv, DEBUG_ARITH ); debug_print( L"\n", DEBUG_ARITH ); - struct cons_pointer new = make_integer( 0, NIL); - carry = int128_to_integer(rv, cursor, new); + struct cons_pointer new = make_integer( 0, NIL ); + carry = int128_to_integer( rv, cursor, new ); cursor = new; if ( nilp( result ) ) { @@ -215,14 +206,14 @@ struct cons_pointer add_integers( struct cons_pointer a, return result; } -struct cons_pointer base_partial(int depth) { - struct cons_pointer result = NIL; +struct cons_pointer base_partial( int depth ) { + struct cons_pointer result = NIL; - for (int i = 0; i < depth; i++) { - result = make_integer(0, result); - } + for ( int i = 0; i < depth; i++ ) { + result = make_integer( 0, result ); + } - return result; + return result; } /** @@ -236,69 +227,70 @@ struct cons_pointer base_partial(int depth) { struct cons_pointer multiply_integers( struct cons_pointer a, struct cons_pointer b ) { struct cons_pointer result = NIL; - bool neg = is_negative(a) != is_negative(b); + bool neg = is_negative( a ) != is_negative( b ); bool is_first_b = true; int oom = -1; debug_print( L"multiply_integers: a = ", DEBUG_ARITH ); - debug_print_object(a, DEBUG_ARITH); + debug_print_object( a, DEBUG_ARITH ); debug_print( L"; b = ", DEBUG_ARITH ); - debug_print_object(b, DEBUG_ARITH); - debug_println(DEBUG_ARITH); + debug_print_object( b, DEBUG_ARITH ); + debug_println( DEBUG_ARITH ); if ( integerp( a ) && integerp( b ) ) { while ( !nilp( b ) ) { - bool is_first_d = true; - struct cons_pointer d = a; - struct cons_pointer partial = base_partial(++oom); - __int128_t carry = 0; + bool is_first_d = true; + struct cons_pointer d = a; + struct cons_pointer partial = base_partial( ++oom ); + __int128_t carry = 0; - while ( !nilp(d) || carry != 0) { - partial = make_integer(0, partial); - struct cons_pointer new = NIL; - __int128_t dv = cell_value( d, '+', is_first_d ); - __int128_t bv = cell_value( b, '+', is_first_b ); + while ( !nilp( d ) || carry != 0 ) { + partial = make_integer( 0, partial ); + struct cons_pointer new = NIL; + __int128_t dv = cell_value( d, '+', is_first_d ); + __int128_t bv = cell_value( b, '+', is_first_b ); - __int128_t rv = (dv * bv) + carry; + __int128_t rv = ( dv * bv ) + carry; - debug_print( L"multiply_integers: d = ", DEBUG_ARITH); - debug_print_object( d, DEBUG_ARITH); - debug_print( L"; dv = ", DEBUG_ARITH ); - debug_print_128bit( dv, DEBUG_ARITH ); - debug_print( L"; bv = ", DEBUG_ARITH ); - debug_print_128bit( bv, DEBUG_ARITH ); - debug_print( L"; carry = ", DEBUG_ARITH ); - debug_print_128bit( carry, DEBUG_ARITH ); - debug_print( L"; rv = ", DEBUG_ARITH ); - debug_print_128bit( rv, DEBUG_ARITH ); - debug_print( L"; acc = ", DEBUG_ARITH ); - debug_print_object( result, DEBUG_ARITH); - debug_print( L"; partial = ", DEBUG_ARITH ); - debug_print_object( partial, DEBUG_ARITH); - debug_print( L"\n", DEBUG_ARITH ); + debug_print( L"multiply_integers: d = ", DEBUG_ARITH ); + debug_print_object( d, DEBUG_ARITH ); + debug_print( L"; dv = ", DEBUG_ARITH ); + debug_print_128bit( dv, DEBUG_ARITH ); + debug_print( L"; bv = ", DEBUG_ARITH ); + debug_print_128bit( bv, DEBUG_ARITH ); + debug_print( L"; carry = ", DEBUG_ARITH ); + debug_print_128bit( carry, DEBUG_ARITH ); + debug_print( L"; rv = ", DEBUG_ARITH ); + debug_print_128bit( rv, DEBUG_ARITH ); + debug_print( L"; acc = ", DEBUG_ARITH ); + debug_print_object( result, DEBUG_ARITH ); + debug_print( L"; partial = ", DEBUG_ARITH ); + debug_print_object( partial, DEBUG_ARITH ); + debug_print( L"\n", DEBUG_ARITH ); - new = make_integer_128(rv, base_partial(oom)); + new = make_integer_128( rv, base_partial( oom ) ); - if ( zerop(partial)) { - partial = new; - } else { - partial = add_integers(partial, new); + if ( zerop( partial ) ) { + partial = new; + } else { + partial = add_integers( partial, new ); + } + + d = integerp( d ) ? pointer2cell( d ).payload.integer. + more : NIL; + is_first_d = false; } - d = integerp(d) ? pointer2cell( d ).payload.integer.more : NIL; - is_first_d = false; - } - - if (nilp(result) || zerop(result)) { - result = partial; - } else { - struct cons_pointer old = result; - result = add_integers(partial, result); - //if (!eq(result, old)) dec_ref(old); - //if (!eq(result, partial)) dec_ref(partial); - } - b = pointer2cell( b ).payload.integer.more; - is_first_b = false; + if ( nilp( result ) || zerop( result ) ) { + result = partial; + } else { + struct cons_pointer old = result; + result = add_integers( partial, result ); + //if (!eq(result, old)) dec_ref(old); + //if (!eq(result, partial)) dec_ref(partial); + } + b = pointer2cell( b ).payload.integer.more; + is_first_b = false; } } @@ -314,7 +306,6 @@ struct cons_pointer multiply_integers( struct cons_pointer a, */ struct cons_pointer integer_to_string_add_digit( int digit, int digits, struct cons_pointer tail ) { - digits++; wint_t character = btowc( hex_digits[digit] ); return ( digits % 3 == 0 ) ? make_string( L',', make_string( character, @@ -352,10 +343,7 @@ struct cons_pointer integer_to_string( struct cons_pointer int_pointer, while ( accumulator > 0 || !nilp( integer.payload.integer.more ) ) { if ( !nilp( integer.payload.integer.more ) ) { integer = pointer2cell( integer.payload.integer.more ); - accumulator += integer.payload.integer.value == 0 ? - MAX_INTEGER : - ( llabs( integer.payload.integer.value ) * - ( MAX_INTEGER + 1 ) ); + accumulator += integer.payload.integer.value; debug_print ( L"integer_to_string: crossing cell boundary, accumulator is: ", DEBUG_IO ); @@ -369,10 +357,12 @@ struct cons_pointer integer_to_string( struct cons_pointer int_pointer, L"integer_to_string: digit is %ld, hexadecimal is %c, accumulator is: ", offset, hex_digits[offset] ); debug_print_128bit( accumulator, DEBUG_IO ); + debug_print( L"; result is: ", DEBUG_IO ); + debug_print_object( result, DEBUG_IO ); debug_println( DEBUG_IO ); result = - integer_to_string_add_digit( offset, digits++, result ); + integer_to_string_add_digit( offset, ++digits, result ); accumulator = accumulator / base; } while ( accumulator > base ); } diff --git a/src/arith/peano.c b/src/arith/peano.c index 7db638a..8e4cb43 100644 --- a/src/arith/peano.c +++ b/src/arith/peano.c @@ -43,13 +43,14 @@ bool zerop( struct cons_pointer arg ) { struct cons_space_object cell = pointer2cell( arg ); switch ( cell.tag.value ) { - case INTEGERTV: { + case INTEGERTV:{ do { - debug_print(L"zerop: ", DEBUG_ARITH); - debug_dump_object(arg, DEBUG_ARITH); - result = (pointer2cell( arg ).payload.integer.value == 0); - arg = pointer2cell(arg).payload.integer.more; - } while (result && integerp(arg)); + debug_print( L"zerop: ", DEBUG_ARITH ); + debug_dump_object( arg, DEBUG_ARITH ); + result = + ( pointer2cell( arg ).payload.integer.value == 0 ); + arg = pointer2cell( arg ).payload.integer.more; + } while ( result && integerp( arg ) ); } break; case RATIOTV: @@ -66,7 +67,7 @@ bool zerop( struct cons_pointer arg ) { /** * does this `arg` point to a negative number? */ -bool is_negative( struct cons_pointer arg) { +bool is_negative( struct cons_pointer arg ) { bool result = false; struct cons_space_object cell = pointer2cell( arg ); @@ -85,27 +86,31 @@ bool is_negative( struct cons_pointer arg) { return result; } -struct cons_pointer absolute( struct cons_pointer frame_pointer, struct cons_pointer arg) { - struct cons_pointer result = NIL; +struct cons_pointer absolute( struct cons_pointer frame_pointer, + struct cons_pointer arg ) { + struct cons_pointer result = NIL; struct cons_space_object cell = pointer2cell( arg ); - if ( is_negative( arg)) { - switch ( cell.tag.value ) { - case INTEGERTV: - result = make_integer(llabs(cell.payload.integer.value), cell.payload.integer.more); - break; - case RATIOTV: - result = make_ratio(frame_pointer, - absolute(frame_pointer, cell.payload.ratio.dividend), - cell.payload.ratio.divisor); - break; - case REALTV: - result = make_real( 0 - cell.payload.real.value ); - break; + if ( is_negative( arg ) ) { + switch ( cell.tag.value ) { + case INTEGERTV: + result = + make_integer( llabs( cell.payload.integer.value ), + cell.payload.integer.more ); + break; + case RATIOTV: + result = make_ratio( frame_pointer, + absolute( frame_pointer, + cell.payload.ratio.dividend ), + cell.payload.ratio.divisor ); + break; + case REALTV: + result = make_real( 0 - cell.payload.real.value ); + break; + } } - } - return result; + return result; } /** @@ -126,7 +131,7 @@ long double to_long_double( struct cons_pointer arg ) { switch ( cell.tag.value ) { case INTEGERTV: // obviously, this doesn't work for bignums - result = (long double)cell.payload.integer.value; + result = ( long double ) cell.payload.integer.value; // sadly, this doesn't work at all. // result += 1.0; // for (bool is_first = false; integerp(arg); is_first = true) { @@ -141,8 +146,8 @@ long double to_long_double( struct cons_pointer arg ) { // } break; case RATIOTV: - result = to_long_double(cell.payload.ratio.dividend) / - to_long_double(cell.payload.ratio.divisor); + result = to_long_double( cell.payload.ratio.dividend ) / + to_long_double( cell.payload.ratio.divisor ); break; case REALTV: result = cell.payload.real.value; @@ -203,9 +208,9 @@ int64_t to_long_int( struct cons_pointer arg ) { * argument, or NIL if it was not a number. */ struct cons_pointer lisp_absolute( struct stack_frame - *frame, struct cons_pointer frame_pointer, struct - cons_pointer env ) { - return absolute( frame_pointer, frame->arg[0]); + *frame, struct cons_pointer frame_pointer, struct + cons_pointer env ) { + return absolute( frame_pointer, frame->arg[0] ); } /** @@ -388,10 +393,12 @@ struct cons_pointer multiply_2( struct stack_frame *frame, to_long_double( arg2 ) ); break; default: - result = throw_exception( make_cons( - c_string_to_lisp_string( L"Cannot multiply: argument 2 is not a number: " ), - c_type(arg2)), - frame_pointer ); + result = + throw_exception( make_cons + ( c_string_to_lisp_string + ( L"Cannot multiply: argument 2 is not a number: " ), + c_type( arg2 ) ), + frame_pointer ); break; } break; @@ -415,11 +422,12 @@ struct cons_pointer multiply_2( struct stack_frame *frame, to_long_double( arg2 ) ); break; default: - result = throw_exception( - make_cons(c_string_to_lisp_string - ( L"Cannot multiply: argument 2 is not a number" ), - c_type(arg2)), - frame_pointer ); + result = + throw_exception( make_cons + ( c_string_to_lisp_string + ( L"Cannot multiply: argument 2 is not a number" ), + c_type( arg2 ) ), + frame_pointer ); } break; case REALTV: @@ -428,11 +436,10 @@ struct cons_pointer multiply_2( struct stack_frame *frame, to_long_double( arg2 ) ); break; default: - result = throw_exception( - make_cons(c_string_to_lisp_string - ( L"Cannot multiply: argument 1 is not a number" ), - c_type(arg1)), - frame_pointer ); + result = throw_exception( make_cons( c_string_to_lisp_string + ( L"Cannot multiply: argument 1 is not a number" ), + c_type( arg1 ) ), + frame_pointer ); break; } } @@ -460,30 +467,27 @@ struct cons_pointer lisp_multiply( struct struct cons_pointer result = make_integer( 1, NIL ); struct cons_pointer tmp; - for ( int i = 0; - i < args_in_frame - && !nilp( frame->arg[i] ) - && !exceptionp( result ); - i++ ) { - debug_print( L"lisp_multiply: accumulator = ",DEBUG_ARITH); - debug_print_object(result, DEBUG_ARITH); - debug_print( L"; arg = ", DEBUG_ARITH); - debug_print_object(frame->arg[i], DEBUG_ARITH); - debug_println( DEBUG_ARITH); + for ( int i = 0; i < args_in_frame && !nilp( frame->arg[i] ) + && !exceptionp( result ); i++ ) { + debug_print( L"lisp_multiply: accumulator = ", DEBUG_ARITH ); + debug_print_object( result, DEBUG_ARITH ); + debug_print( L"; arg = ", DEBUG_ARITH ); + debug_print_object( frame->arg[i], DEBUG_ARITH ); + debug_println( DEBUG_ARITH ); - multiply_one_arg(frame->arg[i]); + multiply_one_arg( frame->arg[i] ); } struct cons_pointer more = frame->more; while ( consp( more ) && !exceptionp( result ) ) { - multiply_one_arg(c_car( more )); + multiply_one_arg( c_car( more ) ); more = c_cdr( more ); } - debug_print( L"lisp_multiply returning: ",DEBUG_ARITH); - debug_print_object(result, DEBUG_ARITH); - debug_println(DEBUG_ARITH); + debug_print( L"lisp_multiply returning: ", DEBUG_ARITH ); + debug_print_object( result, DEBUG_ARITH ); + debug_println( DEBUG_ARITH ); return result; } @@ -538,9 +542,10 @@ struct cons_pointer negative( struct cons_pointer frame, * was not. */ struct cons_pointer lisp_is_negative( struct stack_frame - *frame, struct cons_pointer frame_pointer, struct - cons_pointer env ) { - return is_negative(frame->arg[0]) ? TRUE : NIL; + *frame, + struct cons_pointer frame_pointer, struct + cons_pointer env ) { + return is_negative( frame->arg[0] ) ? TRUE : NIL; } diff --git a/src/arith/peano.h b/src/arith/peano.h index 7164a24..7ad7662 100644 --- a/src/arith/peano.h +++ b/src/arith/peano.h @@ -22,23 +22,25 @@ bool zerop( struct cons_pointer arg ); struct cons_pointer negative( struct cons_pointer frame, struct cons_pointer arg ); -bool is_negative( struct cons_pointer arg); +bool is_negative( struct cons_pointer arg ); -struct cons_pointer absolute( struct cons_pointer frame_pointer, struct cons_pointer arg); +struct cons_pointer absolute( struct cons_pointer frame_pointer, + struct cons_pointer arg ); long double to_long_double( struct cons_pointer arg ); struct cons_pointer lisp_absolute( struct stack_frame - *frame, struct cons_pointer frame_pointer, struct - cons_pointer env ); + *frame, struct cons_pointer frame_pointer, struct + cons_pointer env ); struct cons_pointer lisp_add( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ); struct cons_pointer lisp_is_negative( struct stack_frame - *frame, struct cons_pointer frame_pointer, struct - cons_pointer env ); + *frame, + struct cons_pointer frame_pointer, struct + cons_pointer env ); struct cons_pointer lisp_multiply( struct stack_frame *frame, diff --git a/src/arith/ratio.c b/src/arith/ratio.c index 784e71e..65b09da 100644 --- a/src/arith/ratio.c +++ b/src/arith/ratio.c @@ -55,10 +55,10 @@ struct cons_pointer simplify_ratio( struct cons_pointer frame_pointer, if ( ratiop( arg ) ) { int64_t ddrv = - pointer2cell( pointer2cell( arg ).payload.ratio.dividend ).payload. - integer.value, drrv = - pointer2cell( pointer2cell( arg ).payload.ratio.divisor ).payload. - integer.value, gcd = greatest_common_divisor( ddrv, drrv ); + pointer2cell( pointer2cell( arg ).payload.ratio.dividend ). + payload.integer.value, drrv = + pointer2cell( pointer2cell( arg ).payload.ratio.divisor ). + payload.integer.value, gcd = greatest_common_divisor( ddrv, drrv ); if ( gcd > 1 ) { if ( drrv / gcd == 1 ) { @@ -199,10 +199,10 @@ struct cons_pointer divide_ratio_ratio( struct cons_pointer frame_pointer, struct cons_pointer arg1, struct cons_pointer arg2 ) { struct cons_pointer i = make_ratio( frame_pointer, - pointer2cell( arg2 ).payload.ratio. - divisor, - pointer2cell( arg2 ).payload.ratio. - dividend ), result = + pointer2cell( arg2 ).payload. + ratio.divisor, + pointer2cell( arg2 ).payload. + ratio.dividend ), result = multiply_ratio_ratio( frame_pointer, arg1, i ); dec_ref( i ); diff --git a/src/debug.c b/src/debug.c index d694827..c8b9771 100644 --- a/src/debug.c +++ b/src/debug.c @@ -21,6 +21,7 @@ #include "consspaceobject.h" #include "debug.h" #include "dump.h" +#include "io.h" #include "print.h" /** @@ -104,8 +105,10 @@ void debug_printf( int level, wchar_t *format, ... ) { void debug_print_object( struct cons_pointer pointer, int level ) { #ifdef DEBUG if ( level & verbosity ) { + URL_FILE *ustderr = file_to_url_file( stderr ); fwide( stderr, 1 ); - print( stderr, pointer ); + print( ustderr, pointer ); + free( ustderr ); } #endif } @@ -116,8 +119,10 @@ void debug_print_object( struct cons_pointer pointer, int level ) { void debug_dump_object( struct cons_pointer pointer, int level ) { #ifdef DEBUG if ( level & verbosity ) { + URL_FILE *ustderr = file_to_url_file( stderr ); fwide( stderr, 1 ); - dump_object( stderr, pointer ); + dump_object( ustderr, pointer ); + free( ustderr ); } #endif } diff --git a/src/init.c b/src/init.c index e0d2b01..6074ba5 100644 --- a/src/init.c +++ b/src/init.c @@ -9,21 +9,29 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ +#include #include #include #include #include #include +/* libcurl, used for io */ +#include + #include "version.h" #include "conspage.h" #include "consspaceobject.h" #include "debug.h" #include "intern.h" +#include "io.h" #include "lispops.h" +#include "map.h" +#include "meta.h" #include "peano.h" #include "print.h" #include "repl.h" +#include "psse_time.h" // extern char *optarg; /* defined in unistd.h */ @@ -38,11 +46,13 @@ void bind_function( wchar_t *name, struct cons_pointer ( *executable ) ( struct stack_frame *, struct cons_pointer, struct cons_pointer ) ) { struct cons_pointer n = c_string_to_lisp_symbol( name ); - inc_ref( n ); + struct cons_pointer meta = + make_cons( make_cons( c_string_to_lisp_keyword( L"primitive" ), TRUE ), + make_cons( make_cons( c_string_to_lisp_keyword( L"name" ), + n ), + NIL ) ); - deep_bind( n, make_function( NIL, executable ) ); - - dec_ref( n ); + deep_bind( n, make_function( meta, executable ) ); } /** @@ -53,11 +63,13 @@ void bind_special( wchar_t *name, struct cons_pointer ( *executable ) ( struct stack_frame *, struct cons_pointer, struct cons_pointer ) ) { struct cons_pointer n = c_string_to_lisp_symbol( name ); - inc_ref( n ); + struct cons_pointer meta = + make_cons( make_cons( c_string_to_lisp_keyword( L"primitive" ), TRUE ), + make_cons( make_cons( c_string_to_lisp_keyword( L"name" ), + n ), + NIL ) ); - deep_bind( n, make_special( NIL, executable ) ); - - dec_ref( n ); + deep_bind( n, make_special( meta, executable ) ); } /** @@ -81,11 +93,14 @@ int main( int argc, char *argv[] ) { bool dump_at_end = false; bool show_prompt = false; - while ( ( option = getopt( argc, argv, "cpdv:" ) ) != -1 ) { + setlocale( LC_ALL, "" ); + if ( io_init( ) != 0 ) { + fputs( "Failed to initialise I/O subsystem\n", stderr ); + exit( 1 ); + } + + while ( ( option = getopt( argc, argv, "pdv:" ) ) != -1 ) { switch ( option ) { - case 'c': - print_use_colours = true; - break; case 'd': dump_at_end = true; break; @@ -123,22 +138,45 @@ int main( int argc, char *argv[] ) { * standard input, output, error and sink streams * attempt to set wide character acceptance on all streams */ - FILE *sink = fopen( "/dev/null", "w" ); + URL_FILE *sink = url_fopen( "/dev/null", "w" ); fwide( stdin, 1 ); fwide( stdout, 1 ); fwide( stderr, 1 ); - fwide( sink, 1 ); - bind_value( L"*in*", make_read_stream( stdin ) ); - bind_value( L"*out*", make_write_stream( stdout ) ); - bind_value( L"*log*", make_write_stream( stderr ) ); - bind_value( L"*sink*", make_write_stream( sink ) ); - + fwide( sink->handle.file, 1 ); + bind_value( L"*in*", make_read_stream( file_to_url_file( stdin ), + make_cons( make_cons + ( c_string_to_lisp_keyword + ( L"url" ), + c_string_to_lisp_string + ( L"system:standard input" ) ), + NIL ) ) ); + bind_value( L"*out*", + make_write_stream( file_to_url_file( stdout ), + make_cons( make_cons + ( c_string_to_lisp_keyword + ( L"url" ), + c_string_to_lisp_string + ( L"system:standard output]" ) ), + NIL ) ) ); + bind_value( L"*log*", make_write_stream( file_to_url_file( stderr ), + make_cons( make_cons + ( c_string_to_lisp_keyword + ( L"url" ), + c_string_to_lisp_string + ( L"system:standard log" ) ), + NIL ) ) ); + bind_value( L"*sink*", make_write_stream( sink, + make_cons( make_cons + ( c_string_to_lisp_keyword + ( L"url" ), + c_string_to_lisp_string + ( L"system:standard sink" ) ), + NIL ) ) ); /* * the default prompt */ bind_value( L"*prompt*", show_prompt ? c_string_to_lisp_symbol( L":: " ) : NIL ); - /* * primitive function operations */ @@ -148,6 +186,7 @@ int main( int argc, char *argv[] ) { bind_function( L"assoc", &lisp_assoc ); bind_function( L"car", &lisp_car ); bind_function( L"cdr", &lisp_cdr ); + bind_function( L"close", &lisp_close ); bind_function( L"cons", &lisp_cons ); bind_function( L"divide", &lisp_divide ); bind_function( L"eq", &lisp_eq ); @@ -155,50 +194,52 @@ int main( int argc, char *argv[] ) { bind_function( L"eval", &lisp_eval ); bind_function( L"exception", &lisp_exception ); bind_function( L"inspect", &lisp_inspect ); + bind_function( L"make-map", &lisp_make_map); + bind_function( L"meta", &lisp_metadata ); + bind_function( L"metadata", &lisp_metadata ); bind_function( L"multiply", &lisp_multiply ); - bind_function( L"negative?", &lisp_is_negative); - bind_function( L"read", &lisp_read ); - bind_function( L"repl", &lisp_repl ); + bind_function( L"negative?", &lisp_is_negative ); bind_function( L"oblist", &lisp_oblist ); + bind_function( L"open", &lisp_open ); bind_function( L"print", &lisp_print ); bind_function( L"progn", &lisp_progn ); + bind_function( L"read", &lisp_read ); + bind_function( L"read-char", &lisp_read_char ); + bind_function( L"repl", &lisp_repl ); bind_function( L"reverse", &lisp_reverse ); bind_function( L"set", &lisp_set ); + bind_function( L"slurp", &lisp_slurp ); bind_function( L"source", &lisp_source ); bind_function( L"subtract", &lisp_subtract ); bind_function( L"throw", &lisp_exception ); + bind_function( L"time", &lisp_time ); bind_function( L"type", &lisp_type ); - bind_function( L"+", &lisp_add ); bind_function( L"*", &lisp_multiply ); bind_function( L"-", &lisp_subtract ); bind_function( L"/", &lisp_divide ); bind_function( L"=", &lisp_equal ); - /* * primitive special forms */ bind_special( L"cond", &lisp_cond ); bind_special( L"lambda", &lisp_lambda ); - // bind_special( L"λ", &lisp_lambda ); + bind_special( L"\u03bb", &lisp_lambda ); // λ bind_special( L"nlambda", &lisp_nlambda ); - // bind_special( L"nλ", &lisp_nlambda ); + bind_special( L"n\u03bb", &lisp_nlambda ); bind_special( L"progn", &lisp_progn ); bind_special( L"quote", &lisp_quote ); bind_special( L"set!", &lisp_set_shriek ); - debug_print( L"Initialised oblist\n", DEBUG_BOOTSTRAP ); debug_dump_object( oblist, DEBUG_BOOTSTRAP ); - repl( show_prompt ); - debug_print( L"Freeing oblist\n", DEBUG_BOOTSTRAP ); dec_ref( oblist ); debug_dump_object( oblist, DEBUG_BOOTSTRAP ); - if ( dump_at_end ) { - dump_pages( stdout ); + dump_pages( file_to_url_file( stdout ) ); } + curl_global_cleanup( ); return ( 0 ); } diff --git a/src/io/fopen.c b/src/io/fopen.c new file mode 100644 index 0000000..d3ece5c --- /dev/null +++ b/src/io/fopen.c @@ -0,0 +1,526 @@ +/* + * fopen.c + * + * adapted from https://curl.haxx.se/libcurl/c/fopen.html. + * + * Modifications to read/write wide character streams by + * Simon Brooke. + * + * NOTE THAT: for my purposes, I'm only interested in wide characters, + * and I always read them one character at a time. + * + * Copyright (c) 2003, 2017 Simtec Electronics + * Some portions (c) 2019 Simon Brooke + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This example requires libcurl 7.9.7 or later. + */ + +#include +#include +#ifndef WIN32 +#include +#endif +#include +#include + +#include + +#include "fopen.h" +#ifdef FOPEN_STANDALONE +CURLSH *io_share; +#else +#include "consspaceobject.h" +#include "io.h" +#include "utils.h" +#endif + + +/* exported functions */ +URL_FILE *url_fopen( const char *url, const char *operation ); +int url_fclose( URL_FILE * file ); +int url_feof( URL_FILE * file ); +size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ); +char *url_fgets( char *ptr, size_t size, URL_FILE * file ); +void url_rewind( URL_FILE * file ); + +/* we use a global one for convenience */ +static CURLM *multi_handle; + +/* curl calls this routine to get more data */ +static size_t write_callback( char *buffer, + size_t size, size_t nitems, void *userp ) { + char *newbuff; + size_t rembuff; + + URL_FILE *url = ( URL_FILE * ) userp; + size *= nitems; + + rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */ + + if ( size > rembuff ) { + /* not enough space in buffer */ + newbuff = realloc( url->buffer, url->buffer_len + ( size - rembuff ) ); + if ( newbuff == NULL ) { + fprintf( stderr, "callback buffer grow failed\n" ); + size = rembuff; + } else { + /* realloc succeeded increase buffer size */ + url->buffer_len += size - rembuff; + url->buffer = newbuff; + } + } + + memcpy( &url->buffer[url->buffer_pos], buffer, size ); + url->buffer_pos += size; + + return size; +} + +/* use to attempt to fill the read buffer up to requested number of bytes */ +static int fill_buffer( URL_FILE * file, size_t want ) { + fd_set fdread; + fd_set fdwrite; + fd_set fdexcep; + struct timeval timeout; + int rc; + CURLMcode mc; /* curl_multi_fdset() return code */ + + /* only attempt to fill buffer if transactions still running and buffer + * doesn't exceed required size already + */ + if ( ( !file->still_running ) || ( file->buffer_pos > want ) ) + return 0; + + /* attempt to fill buffer */ + do { + int maxfd = -1; + long curl_timeo = -1; + + FD_ZERO( &fdread ); + FD_ZERO( &fdwrite ); + FD_ZERO( &fdexcep ); + + /* set a suitable timeout to fail on */ + timeout.tv_sec = 60; /* 1 minute */ + timeout.tv_usec = 0; + + curl_multi_timeout( multi_handle, &curl_timeo ); + if ( curl_timeo >= 0 ) { + timeout.tv_sec = curl_timeo / 1000; + if ( timeout.tv_sec > 1 ) + timeout.tv_sec = 1; + else + timeout.tv_usec = ( curl_timeo % 1000 ) * 1000; + } + + /* get file descriptors from the transfers */ + mc = curl_multi_fdset( multi_handle, &fdread, &fdwrite, &fdexcep, + &maxfd ); + + if ( mc != CURLM_OK ) { + fprintf( stderr, "curl_multi_fdset() failed, code %d.\n", mc ); + break; + } + + /* On success the value of maxfd is guaranteed to be >= -1. We call + select(maxfd + 1, ...); specially in case of (maxfd == -1) there are + no fds ready yet so we call select(0, ...) --or Sleep() on Windows-- + to sleep 100ms, which is the minimum suggested value in the + curl_multi_fdset() doc. */ + + if ( maxfd == -1 ) { +#ifdef _WIN32 + Sleep( 100 ); + rc = 0; +#else + /* Portable sleep for platforms other than Windows. */ + struct timeval wait = { 0, 100 * 1000 }; /* 100ms */ + rc = select( 0, NULL, NULL, NULL, &wait ); +#endif + } else { + /* Note that on some platforms 'timeout' may be modified by select(). + If you need access to the original value save a copy beforehand. */ + rc = select( maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout ); + } + + switch ( rc ) { + case -1: + /* select error */ + break; + + case 0: + default: + /* timeout or readable/writable sockets */ + curl_multi_perform( multi_handle, &file->still_running ); + break; + } + } while ( file->still_running && ( file->buffer_pos < want ) ); + return 1; +} + +/* use to remove want bytes from the front of a files buffer */ +static int use_buffer( URL_FILE * file, size_t want ) { + /* sort out buffer */ + if ( ( file->buffer_pos - want ) <= 0 ) { + /* ditch buffer - write will recreate */ + free( file->buffer ); + file->buffer = NULL; + file->buffer_pos = 0; + file->buffer_len = 0; + } else { + /* move rest down make it available for later */ + memmove( file->buffer, + &file->buffer[want], ( file->buffer_pos - want ) ); + + file->buffer_pos -= want; + } + return 0; +} + +URL_FILE *url_fopen( const char *url, const char *operation ) { + /* this code could check for URLs or types in the 'url' and + basically use the real fopen() for standard files */ + + URL_FILE *file; + ( void ) operation; + + file = calloc( 1, sizeof( URL_FILE ) ); + if ( !file ) + return NULL; + + file->handle.file = fopen( url, operation ); + if ( file->handle.file ) { + file->type = CFTYPE_FILE; /* marked as file */ + } else if ( index_of(':', url ) > -1 ) { + file->type = CFTYPE_CURL; /* marked as URL */ + file->handle.curl = curl_easy_init( ); + + curl_easy_setopt( file->handle.curl, CURLOPT_URL, url ); + curl_easy_setopt( file->handle.curl, CURLOPT_WRITEDATA, file ); + curl_easy_setopt( file->handle.curl, CURLOPT_VERBOSE, 0L ); + curl_easy_setopt( file->handle.curl, CURLOPT_WRITEFUNCTION, + write_callback ); + /* use the share object */ + curl_easy_setopt( file->handle.curl, CURLOPT_SHARE, io_share ); + + + if ( !multi_handle ) + multi_handle = curl_multi_init( ); + + curl_multi_add_handle( multi_handle, file->handle.curl ); + + /* lets start the fetch */ + curl_multi_perform( multi_handle, &file->still_running ); + + if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) { + /* if still_running is 0 now, we should return NULL */ + + /* make sure the easy handle is not in the multi handle anymore */ + curl_multi_remove_handle( multi_handle, file->handle.curl ); + + /* cleanup */ + curl_easy_cleanup( file->handle.curl ); + + free( file ); + + file = NULL; + } + } else { + file->type = CFTYPE_NONE; + /* not a file, and doesn't look like a URL. */ + } + + return file; +} + +int url_fclose( URL_FILE * file ) { + int ret = 0; /* default is good return */ + + switch ( file->type ) { + case CFTYPE_FILE: + ret = fclose( file->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL: + /* make sure the easy handle is not in the multi handle anymore */ + curl_multi_remove_handle( multi_handle, file->handle.curl ); + + /* cleanup */ + curl_easy_cleanup( file->handle.curl ); + break; + + default: /* unknown or supported type - oh dear */ + ret = EOF; + errno = EBADF; + break; + } + + free( file->buffer ); /* free any allocated buffer space */ + free( file ); + + return ret; +} + +int url_feof( URL_FILE * file ) { + int ret = 0; + + switch ( file->type ) { + case CFTYPE_FILE: + ret = feof( file->handle.file ); + break; + + case CFTYPE_CURL: + if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) + ret = 1; + break; + + default: /* unknown or supported type - oh dear */ + ret = -1; + errno = EBADF; + break; + } + return ret; +} + +size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ) { + size_t want; + + switch ( file->type ) { + case CFTYPE_FILE: + want = fread( ptr, size, nmemb, file->handle.file ); + break; + + case CFTYPE_CURL: + want = nmemb * size; + + fill_buffer( file, want ); + + /* check if there's data in the buffer - if not fill_buffer() + * either errored or EOF */ + if ( !file->buffer_pos ) + return 0; + + /* ensure only available data is considered */ + if ( file->buffer_pos < want ) + want = file->buffer_pos; + + /* xfer data to caller */ + memcpy( ptr, file->buffer, want ); + + use_buffer( file, want ); + + want = want / size; /* number of items */ + break; + + default: /* unknown or supported type - oh dear */ + want = 0; + errno = EBADF; + break; + + } + return want; +} + +char *url_fgets( char *ptr, size_t size, URL_FILE * file ) { + size_t want = size - 1; /* always need to leave room for zero termination */ + size_t loop; + + switch ( file->type ) { + case CFTYPE_FILE: + ptr = fgets( ptr, ( int ) size, file->handle.file ); + break; + + case CFTYPE_CURL: + fill_buffer( file, want ); + + /* check if there's data in the buffer - if not fill either errored or + * EOF */ + if ( !file->buffer_pos ) + return NULL; + + /* ensure only available data is considered */ + if ( file->buffer_pos < want ) + want = file->buffer_pos; + + /*buffer contains data */ + /* look for newline or eof */ + for ( loop = 0; loop < want; loop++ ) { + if ( file->buffer[loop] == '\n' ) { + want = loop + 1; /* include newline */ + break; + } + } + + /* xfer data to caller */ + memcpy( ptr, file->buffer, want ); + ptr[want] = 0; /* always null terminate */ + + use_buffer( file, want ); + + break; + + default: /* unknown or supported type - oh dear */ + ptr = NULL; + errno = EBADF; + break; + } + + return ptr; /*success */ +} + +void url_rewind( URL_FILE * file ) { + switch ( file->type ) { + case CFTYPE_FILE: + rewind( file->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL: + /* halt transaction */ + curl_multi_remove_handle( multi_handle, file->handle.curl ); + + /* restart */ + curl_multi_add_handle( multi_handle, file->handle.curl ); + + /* ditch buffer - write will recreate - resets stream pos */ + free( file->buffer ); + file->buffer = NULL; + file->buffer_pos = 0; + file->buffer_len = 0; + + break; + + default: /* unknown or supported type - oh dear */ + break; + } +} + +#ifdef FOPEN_STANDALONE +#define FGETSFILE "fgets.test" +#define FREADFILE "fread.test" +#define REWINDFILE "rewind.test" + +/* Small main program to retrieve from a url using fgets and fread saving the + * output to two test files (note the fgets method will corrupt binary files if + * they contain 0 chars */ +int main( int argc, char *argv[] ) { + URL_FILE *handle; + FILE *outf; + + size_t nread; + char buffer[256]; + const char *url; + + CURL *curl; + CURLcode res; + + curl_global_init( CURL_GLOBAL_DEFAULT ); + + curl = curl_easy_init( ); + + + if ( argc < 2 ) + url = "http://192.168.7.3/testfile"; /* default to testurl */ + else + url = argv[1]; /* use passed url */ + + /* copy from url line by line with fgets */ + outf = fopen( FGETSFILE, "wb+" ); + if ( !outf ) { + perror( "couldn't open fgets output file\n" ); + return 1; + } + + handle = url_fopen( url, "r" ); + if ( !handle ) { + printf( "couldn't url_fopen() %s\n", url ); + fclose( outf ); + return 2; + } + + while ( !url_feof( handle ) ) { + url_fgets( buffer, sizeof( buffer ), handle ); + fwrite( buffer, 1, strlen( buffer ), outf ); + } + + url_fclose( handle ); + + fclose( outf ); + + + /* Copy from url with fread */ + outf = fopen( FREADFILE, "wb+" ); + if ( !outf ) { + perror( "couldn't open fread output file\n" ); + return 1; + } + + handle = url_fopen( "testfile", "r" ); + if ( !handle ) { + printf( "couldn't url_fopen() testfile\n" ); + fclose( outf ); + return 2; + } + + do { + nread = url_fread( buffer, 1, sizeof( buffer ), handle ); + fwrite( buffer, 1, nread, outf ); + } while ( nread ); + + url_fclose( handle ); + + fclose( outf ); + + + /* Test rewind */ + outf = fopen( REWINDFILE, "wb+" ); + if ( !outf ) { + perror( "couldn't open fread output file\n" ); + return 1; + } + + handle = url_fopen( "testfile", "r" ); + if ( !handle ) { + printf( "couldn't url_fopen() testfile\n" ); + fclose( outf ); + return 2; + } + + nread = url_fread( buffer, 1, sizeof( buffer ), handle ); + fwrite( buffer, 1, nread, outf ); + url_rewind( handle ); + + buffer[0] = '\n'; + fwrite( buffer, 1, 1, outf ); + + nread = url_fread( buffer, 1, sizeof( buffer ), handle ); + fwrite( buffer, 1, nread, outf ); + + url_fclose( handle ); + + fclose( outf ); + + return 0; /* all done */ +} +#endif diff --git a/src/io/fopen.h b/src/io/fopen.h new file mode 100644 index 0000000..5f87bd2 --- /dev/null +++ b/src/io/fopen.h @@ -0,0 +1,83 @@ +/* + * fopen.h + * + * adapted from https://curl.haxx.se/libcurl/c/fopen.html. + * + * + * Modifications to read/write wide character streams by + * Simon Brooke. + * + * NOTE THAT: for my purposes, I'm only interested in wide characters, + * and I always read them one character at a time. + * + * Copyright (c) 2003, 2017 Simtec Electronics + * Some portions (c) 2019 Simon Brooke + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This example requires libcurl 7.9.7 or later. + */ + +#ifndef __fopen_h +#define __fopen_h +#include +/* + * wide characters + */ +#include +#include + +#define url_fwprintf(f, ...) ((f->type = CFTYPE_FILE) ? fwprintf( f->handle.file, __VA_ARGS__) : -1) +#define url_fputws(ws, f) ((f->type = CFTYPE_FILE) ? fputws(ws, f->handle.file) : 0) +#define url_fputwc(wc, f) ((f->type = CFTYPE_FILE) ? fputwc(wc, f->handle.file) : 0) + +enum fcurl_type_e { + CFTYPE_NONE = 0, + CFTYPE_FILE = 1, + CFTYPE_CURL = 2 +}; + +struct fcurl_data { + enum fcurl_type_e type; /* type of handle */ + union { + CURL *curl; + FILE *file; + } handle; /* handle */ + + char *buffer; /* buffer to store cached data */ + size_t buffer_len; /* currently allocated buffer's length */ + size_t buffer_pos; /* cursor into in buffer */ + int still_running; /* Is background url fetch still in progress */ +}; + +typedef struct fcurl_data URL_FILE; + +/* exported functions */ +URL_FILE *url_fopen( const char *url, const char *operation ); +int url_fclose( URL_FILE * file ); +int url_feof( URL_FILE * file ); +size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ); +char *url_fgets( char *ptr, size_t size, URL_FILE * file ); +void url_rewind( URL_FILE * file ); + +#endif diff --git a/src/io/io.c b/src/io/io.c new file mode 100644 index 0000000..5065044 --- /dev/null +++ b/src/io/io.c @@ -0,0 +1,530 @@ +/* + * io.c + * + * Communication between PSSE and the outside world, via libcurl. NOTE + * that this file destructively changes metadata on URL connections, + * because the metadata is not available until the stream has been read + * from. It would be better to find a workaround! + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* + * wide characters + */ +#include +#include + +#include + +#include "conspage.h" +#include "consspaceobject.h" +#include "debug.h" +#include "fopen.h" +#include "integer.h" +#include "intern.h" +#include "lispops.h" +#include "utils.h" + +/** + * The sharing hub for all connections. TODO: Ultimately this probably doesn't + * work for a multi-user environment and we will need one sharing hub for each + * user, or else we will need to not share at least cookies and ssl sessions. + */ +CURLSH *io_share; + +/** + * Allow a one-character unget facility. This may not be enough - we may need + * to allocate a buffer. + */ +wint_t ungotten = 0; + +/** + * Initialise the I/O subsystem. + * + * @return 0 on success; any other value means failure. + */ +int io_init( ) { + CURL *curl; + CURLcode res; + int result = curl_global_init( CURL_GLOBAL_SSL ); + + io_share = curl_share_init( ); + + if ( result == 0 ) { + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, + CURL_LOCK_DATA_SSL_SESSION ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL ); + } + + return result; +} + +/** + * Convert this lisp string-like-thing (also works for symbols, and, later + * keywords) into a UTF-8 string. NOTE that the returned value has been + * malloced and must be freed. TODO: candidate to moving into a utilities + * file. + * + * @param s the lisp string or symbol; + * @return the c string. + */ +char *lisp_string_to_c_string( struct cons_pointer s ) { + char *result = NULL; + + if ( stringp( s ) || symbolp( s ) ) { + int len = 0; + + for ( struct cons_pointer c = s; !nilp( c ); + c = pointer2cell( c ).payload.string.cdr ) { + len++; + } + + wchar_t *buffer = calloc( len + 1, sizeof( wchar_t ) ); + /* worst case, one wide char = four utf bytes */ + result = calloc( ( len * 4 ) + 1, sizeof( char ) ); + + int i = 0; + for ( struct cons_pointer c = s; !nilp( c ); + c = pointer2cell( c ).payload.string.cdr ) { + buffer[i++] = pointer2cell( c ).payload.string.character; + } + + wcstombs( result, buffer, len ); + free( buffer ); + } + + debug_print( L"lisp_string_to_c_string( ", DEBUG_IO ); + debug_print_object( s, DEBUG_IO ); + debug_printf( DEBUG_IO, L") => '%s'\n", result ); + + return result; +} + + +/** + * given this file handle f, return a new url_file handle wrapping it. + * + * @param f the file to be wrapped; + * @return the new handle, or null if no such handle could be allocated. + */ +URL_FILE *file_to_url_file( FILE * f ) { + URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) ); + + if ( result != NULL ) { + result->type = CFTYPE_FILE, result->handle.file = f; + } + + return result; +} + + +/** + * get one wide character from the buffer. + * + * @param file the stream to read from; + * @return the next wide character on the stream, or zero if no more. + */ +wint_t url_fgetwc( URL_FILE * input ) { + wint_t result = -1; + + if ( ungotten != 0 ) { + /* TODO: not thread safe */ + result = ungotten; + ungotten = 0; + } else { + switch ( input->type ) { + case CFTYPE_FILE: + fwide( input->handle.file, 1 ); /* wide characters */ + result = fgetwc( input->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL:{ + char *cbuff = + calloc( sizeof( wchar_t ) + 2, sizeof( char ) ); + wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) ); + + size_t count = 0; + + debug_print( L"url_fgetwc: about to call url_fgets\n", + DEBUG_IO ); + url_fgets( cbuff, 2, input ); + debug_print( L"url_fgetwc: back from url_fgets\n", + DEBUG_IO ); + int c = ( int ) cbuff[0]; + debug_printf( DEBUG_IO, + L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n", + cbuff, c, c & 0xf7 ); + /* The value of each individual byte indicates its UTF-8 function, as follows: + * + * 00 to 7F hex (0 to 127): first and only byte of a sequence. + * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence. + * C2 to DF hex (194 to 223): first byte of a two-byte sequence. + * E0 to EF hex (224 to 239): first byte of a three-byte sequence. + * F0 to FF hex (240 to 255): first byte of a four-byte sequence. + */ + if ( c <= 0x07 ) { + count = 1; + } else if ( c >= '0xc2' && c <= '0xdf' ) { + count = 2; + } else if ( c >= '0xe0' && c <= '0xef' ) { + count = 3; + } else if ( c >= '0xf0' && c <= '0xff' ) { + count = 4; + } + + if ( count > 1 ) { + url_fgets( ( char * ) &cbuff[1], count, input ); + } + mbstowcs( wbuff, cbuff, 2 ); //(char *)(&input->buffer[input->buffer_pos]), 1 ); + result = wbuff[0]; + + free( wbuff ); + free( cbuff ); + } + break; + case CFTYPE_NONE: + break; + } + } + + debug_printf( DEBUG_IO, L"url_fgetwc returning %d (%C)\n", result, + result ); + return result; +} + +wint_t url_ungetwc( wint_t wc, URL_FILE * input ) { + wint_t result = -1; + + switch ( input->type ) { + case CFTYPE_FILE: + fwide( input->handle.file, 1 ); /* wide characters */ + result = ungetwc( wc, input->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL:{ + ungotten = wc; + break; + case CFTYPE_NONE: + break; + } + } + + return result; +} + + +/** + * Function, sort-of: close the file indicated by my first arg, and return + * nil. If the first arg is not a stream, does nothing. All other args are + * ignored. + * + * * (close stream) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment. + * @return T if the stream was successfully closed, else NIL. + */ +struct cons_pointer +lisp_close( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + struct cons_pointer result = NIL; + + if ( readp( frame->arg[0] ) || writep( frame->arg[0] ) ) { + if ( url_fclose( pointer2cell( frame->arg[0] ).payload.stream.stream ) + == 0 ) { + result = TRUE; + } + } + + return result; +} + +struct cons_pointer add_meta_integer( struct cons_pointer meta, wchar_t *key, + long int value ) { + return + make_cons( make_cons + ( c_string_to_lisp_keyword( key ), + make_integer( value, NIL ) ), meta ); +} + +struct cons_pointer add_meta_string( struct cons_pointer meta, wchar_t *key, + char *value ) { + value = trim( value); + wchar_t buffer[strlen( value ) + 1]; + mbstowcs( buffer, value, strlen( value ) + 1 ); + + return make_cons( make_cons( c_string_to_lisp_keyword( key ), + c_string_to_lisp_string( buffer ) ), meta ); +} + +struct cons_pointer add_meta_time( struct cons_pointer meta, wchar_t *key, + time_t * value ) { + /* I don't yet have a concept of a date-time object, which is a + * bit of an oversight! */ + char datestring[256]; + + strftime( datestring, + sizeof( datestring ), + nl_langinfo( D_T_FMT ), + localtime( value ) ); + + return add_meta_string( meta, key, datestring ); +} + +/** + * Callback to assemble metadata for a URL stream. This is naughty because + * it modifies data, but it's really the only way to create metadata. + */ +static size_t write_meta_callback( char *string, size_t size, size_t nmemb, + struct cons_pointer stream ) { + struct cons_space_object *cell = &pointer2cell( stream ); + + /* make a copy of the string that we can destructively change */ + char *s = calloc( strlen( string ), sizeof( char ) ); + + strcpy( s, string ); + + if ( strncmp( &cell->tag.bytes[0], READTAG, 4 ) || + strncmp( &cell->tag.bytes[0], WRITETAG, 4 ) ) { + int offset = index_of( ':', s ); + + if ( offset != -1 ) { + s[offset] = ( char ) 0; + char *name = trim( s ); + char *value = trim( &s[++offset] ); + wchar_t wname[strlen( name )]; + + mbstowcs( wname, name, strlen( name ) + 1 ); + + cell->payload.stream.meta = + add_meta_string( cell->payload.stream.meta, wname, value ); + + debug_printf( DEBUG_IO, + L"write_meta_callback: added header '%s': value '%s'\n", + name, value ); + } else if ( strncmp( "HTTP", s, 4 ) == 0 ) { + int offset = index_of( ' ', s ); + char *value = trim( &s[offset] ); + + cell->payload.stream.meta = + add_meta_integer( add_meta_string + ( cell->payload.stream.meta, L"status", + value ), L"status-code", strtol( value, + NULL, + 10 ) ); + + debug_printf( DEBUG_IO, + L"write_meta_callback: added header 'status': value '%s'\n", + value ); + } else { + debug_printf( DEBUG_IO, + L"write_meta_callback: header passed with no colon: '%s'\n", + s ); + } + } else { + debug_print + ( L"Pointer passed to write_meta_callback did not point to a stream: ", + DEBUG_IO ); + debug_dump_object( stream, DEBUG_IO ); + } + + free( s ); + return strlen( string ); +} + +void collect_meta( struct cons_pointer stream, char *url ) { + struct cons_space_object *cell = &pointer2cell( stream ); + URL_FILE *s = pointer2cell( stream ).payload.stream.stream; + struct cons_pointer meta = + add_meta_string( cell->payload.stream.meta, L"url", url ); + struct stat statbuf; + int result = stat( url, &statbuf ); + struct passwd *pwd; + struct group *grp; + + switch ( s->type ) { + case CFTYPE_NONE: + break; + case CFTYPE_FILE: + if ( result == 0 ) { + if ( ( pwd = getpwuid( statbuf.st_uid ) ) != NULL ) { + meta = add_meta_string( meta, L"owner", pwd->pw_name ); + } else { + meta = add_meta_integer( meta, L"owner", statbuf.st_uid ); + } + + if ( ( grp = getgrgid( statbuf.st_gid ) ) != NULL ) { + meta = add_meta_string( meta, L"group", grp->gr_name ); + } else { + meta = add_meta_integer( meta, L"group", statbuf.st_gid ); + } + + meta = + add_meta_integer( meta, L"size", + ( intmax_t ) statbuf.st_size ); + + meta = add_meta_time( meta, L"modified", &statbuf.st_mtime ); + } + break; + case CFTYPE_CURL: + curl_easy_setopt( s->handle.curl, CURLOPT_VERBOSE, 1L ); + curl_easy_setopt( s->handle.curl, CURLOPT_HEADERFUNCTION, + write_meta_callback ); + curl_easy_setopt( s->handle.curl, CURLOPT_HEADERDATA, stream ); + break; + } + + /* this is destructive change before the cell is released into the + * wild, and consequently permissible, just. */ + cell->payload.stream.meta = meta; +} + + +/** + * Function: return a stream open on the URL indicated by the first argument; + * if a second argument is present and is non-nil, open it for reading. At + * present, further arguments are ignored and there is no mechanism to open + * to append, or error if the URL is faulty or indicates an unavailable + * resource. + * + * * (read-char stream) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment. + * @return a string of one character, namely the next available character + * on my stream, if any, else NIL. + */ +struct cons_pointer +lisp_open( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + struct cons_pointer result = NIL; + + if ( stringp( frame->arg[0] ) ) { + char *url = lisp_string_to_c_string( frame->arg[0] ); + + if ( nilp( frame->arg[1] ) ) { + URL_FILE *stream = url_fopen( url, "r" ); + + debug_printf( DEBUG_IO, + L"lisp_open: stream @ %d, stream type = %d, stream handle = %d\n", + (int) &stream, (int)stream->type, (int)stream->handle.file); + + switch (stream->type) { + case CFTYPE_NONE: + return make_exception( + c_string_to_lisp_string( L"Could not open stream"), + frame_pointer); + break; + case CFTYPE_FILE: + if (stream->handle.file == NULL) { + return make_exception( + c_string_to_lisp_string( L"Could not open file"), + frame_pointer); + } + break; + case CFTYPE_CURL: + /* can't tell whether a URL is bad without reading it */ + break; + } + + result = make_read_stream( stream, NIL ); + } else { + // TODO: anything more complex is a problem for another day. + URL_FILE *stream = url_fopen( url, "w" ); + result = make_write_stream( stream, NIL ); + } + + if ( pointer2cell( result ).payload.stream.stream == NULL ) { + result = NIL; + } else { + collect_meta( result, url ); + } + + free( url ); + } + + return result; +} + +/** + * Function: return the next character from the stream indicated by arg 0; + * further arguments are ignored. + * + * * (read-char stream) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment. + * @return a string of one character, namely the next available character + * on my stream, if any, else NIL. + */ +struct cons_pointer +lisp_read_char( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + struct cons_pointer result = NIL; + + if ( readp( frame->arg[0] ) ) { + result = + make_string( url_fgetwc + ( pointer2cell( frame->arg[0] ).payload. + stream.stream ), NIL ); + } + + return result; +} + +/** + * Function: return a string representing all characters from the stream + * indicated by arg 0; further arguments are ignored. + * + * * (slurp stream) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment. + * @return a string of one character, namely the next available character + * on my stream, if any, else NIL. + */ +struct cons_pointer +lisp_slurp( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + struct cons_pointer result = NIL; + + if ( readp( frame->arg[0] ) ) { + URL_FILE *stream = pointer2cell( frame->arg[0] ).payload.stream.stream; + struct cons_pointer cursor = make_string( url_fgetwc( stream ), NIL ); + result = cursor; + + for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ) && c != 0; + c = url_fgetwc( stream ) ) { + debug_print( L"slurp: cursor is: ", DEBUG_IO ); + debug_dump_object( cursor, DEBUG_IO ); + debug_print( L"; result is: ", DEBUG_IO ); + debug_dump_object( result, DEBUG_IO ); + debug_println( DEBUG_IO ); + + struct cons_space_object *cell = &pointer2cell( cursor ); + cursor = make_string( ( wchar_t ) c, NIL ); + cell->payload.string.cdr = cursor; + } + } + + return result; +} diff --git a/src/io/io.h b/src/io/io.h new file mode 100644 index 0000000..33f733f --- /dev/null +++ b/src/io/io.h @@ -0,0 +1,38 @@ + +/* + * io.h + * + * Communication between PSSE and the outside world, via libcurl. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_io_h +#define __psse_io_h +#include +#include "consspaceobject.h" + +extern CURLSH *io_share; + +int io_init( ); + +URL_FILE *file_to_url_file( FILE * f ); +wint_t url_fgetwc( URL_FILE * input ); +wint_t url_ungetwc( wint_t wc, URL_FILE * input ); + +struct cons_pointer +lisp_close( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ); +struct cons_pointer +lisp_open( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ); +struct cons_pointer +lisp_read_char( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ); +struct cons_pointer +lisp_slurp( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ); + + +#endif diff --git a/src/ops/print.c b/src/io/print.c similarity index 55% rename from src/ops/print.c rename to src/io/print.c index 604c07c..c886981 100644 --- a/src/ops/print.c +++ b/src/io/print.c @@ -20,27 +20,25 @@ #include "conspage.h" #include "consspaceobject.h" #include "integer.h" +#include "intern.h" +#include "map.h" #include "stack.h" #include "print.h" - -/** - * Whether or not we colorise output. - * \todo this should be a Lisp symbol binding, not a C variable. - */ -int print_use_colours = 0; +#include "psse_time.h" +#include "vectorspace.h" /** * print all the characters in the symbol or string indicated by `pointer` * onto this `output`; if `pointer` does not indicate a string or symbol, * don't print anything but just return. */ -void print_string_contents( FILE * output, struct cons_pointer pointer ) { - while ( stringp( pointer ) || symbolp( pointer ) ) { +void print_string_contents( URL_FILE * output, struct cons_pointer pointer ) { + while ( stringp( pointer ) || symbolp( pointer ) || keywordp( pointer ) ) { struct cons_space_object *cell = &pointer2cell( pointer ); wchar_t c = cell->payload.string.character; if ( c != '\0' ) { - fputwc( c, output ); + url_fputwc( c, output ); } pointer = cell->payload.string.cdr; } @@ -51,10 +49,10 @@ void print_string_contents( FILE * output, struct cons_pointer pointer ) { * the stream at this `output`, prepending and appending double quote * characters. */ -void print_string( FILE * output, struct cons_pointer pointer ) { - fputwc( btowc( '"' ), output ); +void print_string( URL_FILE * output, struct cons_pointer pointer ) { + url_fputwc( btowc( '"' ), output ); print_string_contents( output, pointer ); - fputwc( btowc( '"' ), output ); + url_fputwc( btowc( '"' ), output ); } /** @@ -63,14 +61,14 @@ void print_string( FILE * output, struct cons_pointer pointer ) { * a space character. */ void -print_list_contents( FILE * output, struct cons_pointer pointer, +print_list_contents( URL_FILE * output, struct cons_pointer pointer, bool initial_space ) { struct cons_space_object *cell = &pointer2cell( pointer ); switch ( cell->tag.value ) { case CONSTV: if ( initial_space ) { - fputwc( btowc( ' ' ), output ); + url_fputwc( btowc( ' ' ), output ); } print( output, cell->payload.cons.car ); @@ -79,32 +77,80 @@ print_list_contents( FILE * output, struct cons_pointer pointer, case NILTV: break; default: - fwprintf( output, L" . " ); + url_fwprintf( output, L" . " ); print( output, pointer ); } } -void print_list( FILE * output, struct cons_pointer pointer ) { - if ( print_use_colours ) { - fwprintf( output, L"%s(%s", "\x1B[31m", "\x1B[39m" ); - } else { - fputws( L"(", output ); - }; - +void print_list( URL_FILE * output, struct cons_pointer pointer ) { + url_fputws( L"(", output ); print_list_contents( output, pointer, false ); - if ( print_use_colours ) { - fwprintf( output, L"%s)%s", "\x1B[31m", "\x1B[39m" ); - } else { - fputws( L")", output ); - } - + url_fputws( L")", output ); } + +void print_map( URL_FILE * output, struct cons_pointer map) { + if ( vectorpointp( map)) { + struct vector_space_object *vso = pointer_to_vso( map); + + if ( mapp( vso ) ) { + url_fputwc( btowc( '{' ), output ); + + for ( struct cons_pointer ks = keys( map); + !nilp( ks); ks = c_cdr( ks)) { + print( output, c_car( ks)); + url_fputwc( btowc( ' ' ), output ); + print( output, c_assoc( c_car( ks), map)); + + if ( !nilp( c_cdr( ks))) { + url_fputws( L", ", output ); + } + } + + url_fputwc( btowc( '}' ), output ); + } + } +} + + +void print_vso( URL_FILE * output, struct cons_pointer pointer) { + struct vector_space_object *vso = + pointer2cell( pointer ).payload.vectorp.address; + + switch ( vso->header.tag.value) { + case MAPTV: + print_map( output, pointer); + break; + // \todo: others. + } +} + +/** + * stolen from https://stackoverflow.com/questions/11656241/how-to-print-uint128-t-number-using-gcc + */ +void print_128bit( URL_FILE * output, __int128_t n ) { + if ( n == 0 ) { + fwprintf( stderr, L"0" ); + } else { + char str[40] = { 0 }; // log10(1 << 128) + '\0' + char *s = str + sizeof( str ) - 1; // start at the end + while ( n != 0 ) { + if ( s == str ) + return; // never happens + + *--s = "0123456789"[n % 10]; // save last digit + n /= 10; // drop it + } + url_fwprintf( output, L"%s", s ); + } +} + + /** * Print the cons-space object indicated by `pointer` to the stream indicated * by `output`. */ -struct cons_pointer print( FILE * output, struct cons_pointer pointer ) { +struct cons_pointer print( URL_FILE * output, struct cons_pointer pointer ) { struct cons_space_object cell = pointer2cell( pointer ); char *buffer; @@ -117,23 +163,25 @@ struct cons_pointer print( FILE * output, struct cons_pointer pointer ) { print_list( output, pointer ); break; case EXCEPTIONTV: - fwprintf( output, L"\n%sException: ", - print_use_colours ? "\x1B[31m" : "" ); + url_fwuts( L"\nException: ", output ); dump_stack_trace( output, pointer ); break; case FUNCTIONTV: - fwprintf( output, L"" ); + url_fputws( L"', output); break; case INTEGERTV:{ struct cons_pointer s = integer_to_string( pointer, 10 ); inc_ref( s ); - if ( print_use_colours ) { - fputws( L"\x1B[34m", output ); - } print_string_contents( output, s ); dec_ref( s ); } break; + case KEYTV: + url_fputws( L":", output ); + print_string_contents( output, pointer ); + break; case LAMBDATV:{ struct cons_pointer to_print = make_cons( c_string_to_lisp_symbol( L"lambda" ), @@ -147,7 +195,7 @@ struct cons_pointer print( FILE * output, struct cons_pointer pointer ) { } break; case NILTV: - fwprintf( output, L"nil" ); + url_fwprintf( output, L"nil" ); break; case NLAMBDATV:{ struct cons_pointer to_print = @@ -163,11 +211,13 @@ struct cons_pointer print( FILE * output, struct cons_pointer pointer ) { break; case RATIOTV: print( output, cell.payload.ratio.dividend ); - fputws( L"/", output ); + url_fputws( L"/", output ); print( output, cell.payload.ratio.divisor ); break; case READTV: - fwprintf( output, L"" ); + url_fwprintf( output, L"', output); break; case REALTV: /* \todo using the C heap is a bad plan because it will fragment. @@ -182,49 +232,48 @@ struct cons_pointer print( FILE * output, struct cons_pointer pointer ) { buffer[i] = '\0'; } } - if ( print_use_colours ) { - fputws( L"\x1B[34m", output ); - } - fwprintf( output, L"%s", buffer ); + url_fwprintf( output, L"%s", buffer ); free( buffer ); break; case STRINGTV: - if ( print_use_colours ) { - fputws( L"\x1B[36m", output ); - } print_string( output, pointer ); break; case SYMBOLTV: - if ( print_use_colours ) { - fputws( L"\x1B[1;33m", output ); - } print_string_contents( output, pointer ); break; case SPECIALTV: - fwprintf( output, L"" ); + url_fwprintf( output, L"', output); + break; + case TIMETV: + url_fwprintf( output, L"', output); break; case TRUETV: - fwprintf( output, L"t" ); + url_fwprintf( output, L"t" ); + break; + case VECTORPOINTTV: + print_vso( output, pointer); break; case WRITETV: - fwprintf( output, L"" ); + url_fwprintf( output, L"', output); break; default: fwprintf( stderr, - L"%sError: Unrecognised tag value %d (%c%c%c%c)\n", - print_use_colours ? "\x1B[31m" : "", - cell.tag.value, cell.tag.bytes[0], cell.tag.bytes[1], - cell.tag.bytes[2], cell.tag.bytes[3] ); + L"Error: Unrecognised tag value %d (%4.4s)\n", + cell.tag.value, &cell.tag.bytes[0] ); break; } - if ( print_use_colours ) { - fputws( L"\x1B[39m", output ); - } - return pointer; } -void println( FILE * output ) { - fputws( L"\n", output ); +void println( URL_FILE * output ) { + url_fputws( L"\n", output ); } diff --git a/src/ops/print.h b/src/io/print.h similarity index 67% rename from src/ops/print.h rename to src/io/print.h index 2751032..006ef80 100644 --- a/src/ops/print.h +++ b/src/io/print.h @@ -14,8 +14,7 @@ #ifndef __print_h #define __print_h -struct cons_pointer print( FILE * output, struct cons_pointer pointer ); -void println( FILE * output ); -extern int print_use_colours; +struct cons_pointer print( URL_FILE * output, struct cons_pointer pointer ); +void println( URL_FILE * output ); #endif diff --git a/src/ops/read.c b/src/io/read.c similarity index 65% rename from src/ops/read.c rename to src/io/read.c index 4006c99..4f3ed0a 100644 --- a/src/ops/read.c +++ b/src/io/read.c @@ -22,7 +22,9 @@ #include "dump.h" #include "integer.h" #include "intern.h" +#include "io.h" #include "lispops.h" +#include "map.h" #include "peano.h" #include "print.h" #include "ratio.h" @@ -38,13 +40,17 @@ struct cons_pointer read_number( struct stack_frame *frame, struct cons_pointer frame_pointer, - FILE * input, wint_t initial, + URL_FILE * input, wint_t initial, bool seen_period ); struct cons_pointer read_list( struct stack_frame *frame, - struct cons_pointer frame_pointer, FILE * input, - wint_t initial ); -struct cons_pointer read_string( FILE * input, wint_t initial ); -struct cons_pointer read_symbol( FILE * input, wint_t initial ); + struct cons_pointer frame_pointer, + URL_FILE * input, wint_t initial ); +struct cons_pointer read_map( struct stack_frame *frame, + struct cons_pointer frame_pointer, + URL_FILE * input, wint_t initial ); +struct cons_pointer read_string( URL_FILE * input, wint_t initial ); +struct cons_pointer read_symbol_or_key( URL_FILE * input, char *tag, + wint_t initial ); /** * quote reader macro in C (!) @@ -61,23 +67,25 @@ struct cons_pointer c_quote( struct cons_pointer arg ) { */ struct cons_pointer read_continuation( struct stack_frame *frame, struct cons_pointer frame_pointer, - FILE * input, wint_t initial ) { + URL_FILE * input, wint_t initial ) { debug_print( L"entering read_continuation\n", DEBUG_IO ); struct cons_pointer result = NIL; wint_t c; for ( c = initial; - c == '\0' || iswblank( c ) || iswcntrl( c ); c = fgetwc( input ) ); + c == '\0' || iswblank( c ) || iswcntrl( c ); + c = url_fgetwc( input ) ); - if ( feof( input ) ) { + if ( url_feof( input ) ) { result = throw_exception( c_string_to_lisp_string ( L"End of file while reading" ), frame_pointer ); } else { switch ( c ) { case ';': - for ( c = fgetwc( input ); c != '\n'; c = fgetwc( input ) ); + for ( c = url_fgetwc( input ); c != '\n'; + c = url_fgetwc( input ) ); /* skip all characters from semi-colon to the end of the line */ break; case EOF: @@ -89,52 +97,62 @@ struct cons_pointer read_continuation( struct stack_frame *frame, result = c_quote( read_continuation ( frame, frame_pointer, input, - fgetwc( input ) ) ); + url_fgetwc( input ) ) ); break; case '(': result = - read_list( frame, frame_pointer, input, fgetwc( input ) ); + read_list( frame, frame_pointer, input, + url_fgetwc( input ) ); + break; + case '{': + result = read_map( frame, frame_pointer, input, + url_fgetwc( input ) ); break; case '"': - result = read_string( input, fgetwc( input ) ); + result = read_string( input, url_fgetwc( input ) ); break; case '-':{ - wint_t next = fgetwc( input ); - ungetwc( next, input ); + wint_t next = url_fgetwc( input ); + url_ungetwc( next, input ); if ( iswdigit( next ) ) { result = read_number( frame, frame_pointer, input, c, false ); } else { - result = read_symbol( input, c ); + result = read_symbol_or_key( input, SYMBOLTAG, c ); } } break; case '.': { - wint_t next = fgetwc( input ); + wint_t next = url_fgetwc( input ); if ( iswdigit( next ) ) { - ungetwc( next, input ); + url_ungetwc( next, input ); result = read_number( frame, frame_pointer, input, c, true ); } else if ( iswblank( next ) ) { /* dotted pair. \todo this isn't right, we * really need to backtrack up a level. */ - result = - read_continuation( frame, frame_pointer, input, - fgetwc( input ) ); + result = read_continuation( frame, frame_pointer, input, + url_fgetwc( input ) ); + debug_print( L"read_continuation: dotted pair; read cdr ", + DEBUG_IO); } else { - read_symbol( input, c ); + read_symbol_or_key( input, SYMBOLTAG, c ); } } break; + case ':': + result = + read_symbol_or_key( input, KEYTAG, url_fgetwc( input ) ); + break; default: if ( iswdigit( c ) ) { result = read_number( frame, frame_pointer, input, c, false ); } else if ( iswprint( c ) ) { - result = read_symbol( input, c ); + result = read_symbol_or_key( input, SYMBOLTAG, c ); } else { result = throw_exception( make_cons( c_string_to_lisp_string @@ -158,7 +176,7 @@ struct cons_pointer read_continuation( struct stack_frame *frame, */ struct cons_pointer read_number( struct stack_frame *frame, struct cons_pointer frame_pointer, - FILE * input, + URL_FILE * input, wint_t initial, bool seen_period ) { debug_print( L"entering read_number\n", DEBUG_IO ); @@ -172,14 +190,14 @@ struct cons_pointer read_number( struct stack_frame *frame, bool neg = initial == btowc( '-' ); if ( neg ) { - initial = fgetwc( input ); + initial = url_fgetwc( input ); } debug_printf( DEBUG_IO, L"read_number starting '%c' (%d)\n", initial, initial ); for ( c = initial; iswdigit( c ) - || c == L'.' || c == L'/' || c == L','; c = fgetwc( input ) ) { + || c == L'.' || c == L'/' || c == L','; c = url_fgetwc( input ) ) { switch ( c ) { case L'.': if ( seen_period || !nilp( dividend ) ) { @@ -228,7 +246,7 @@ struct cons_pointer read_number( struct stack_frame *frame, /* * push back the character read which was not a digit */ - ungetwc( c, input ); + url_ungetwc( c, input ); if ( seen_period ) { debug_print( L"read_number: converting result to real\n", DEBUG_IO ); @@ -266,19 +284,38 @@ struct cons_pointer read_number( struct stack_frame *frame, * left parenthesis. */ struct cons_pointer read_list( struct stack_frame *frame, - struct cons_pointer frame_pointer, - FILE * input, wint_t initial ) { + struct cons_pointer frame_pointer, + URL_FILE * input, wint_t initial ) { struct cons_pointer result = NIL; + wint_t c; + if ( initial != ')' ) { debug_printf( DEBUG_IO, - L"read_list starting '%C' (%d)\n", initial, initial ); + L"read_list starting '%C' (%d)\n", initial, initial ); struct cons_pointer car = read_continuation( frame, frame_pointer, input, - initial ); - result = - make_cons( car, - read_list( frame, frame_pointer, input, - fgetwc( input ) ) ); + initial ); + + /* skip whitespace */ + for (c = url_fgetwc( input ); + iswblank( c ) || iswcntrl( c ); + c = url_fgetwc( input )); + + if ( c == L'.') { + /* might be a dotted pair; indeed, if we rule out numbers with + * initial periods, it must be a dotted pair. \todo Ought to check, + * howerver, that there's only one form after the period. */ + result = + make_cons( car, + c_car( read_list( frame, + frame_pointer, + input, + url_fgetwc( input ) ) ) ); + } else { + result = + make_cons( car, + read_list( frame, frame_pointer, input, c ) ); + } } else { debug_print( L"End of list detected\n", DEBUG_IO ); } @@ -286,6 +323,37 @@ struct cons_pointer read_list( struct stack_frame *frame, return result; } + +struct cons_pointer read_map( struct stack_frame *frame, + struct cons_pointer frame_pointer, + URL_FILE * input, wint_t initial ) { + struct cons_pointer result = make_empty_map( NIL); + wint_t c = initial; + + while ( c != L'}' ) { + struct cons_pointer key = + read_continuation( frame, frame_pointer, input, c ); + + /* skip whitespace */ + for (c = url_fgetwc( input ); + iswblank( c ) || iswcntrl( c ); + c = url_fgetwc( input )); + + struct cons_pointer value = + read_continuation( frame, frame_pointer, input, c ); + + /* skip commaa and whitespace at this point. */ + for (c = url_fgetwc( input ); + c == L',' || iswblank( c ) || iswcntrl( c ); + c = url_fgetwc( input )); + + result = merge_into_map( result, make_cons( make_cons( key, value), NIL)); + } + + return result; +} + + /** * Read a string. This means either a string delimited by double quotes * (is_quoted == true), in which case it may contain whitespace but may @@ -293,7 +361,7 @@ struct cons_pointer read_list( struct stack_frame *frame, * so delimited in which case it may not contain whitespace (unless escaped) * but may contain a double quote character (probably not a good idea!) */ -struct cons_pointer read_string( FILE * input, wint_t initial ) { +struct cons_pointer read_string( URL_FILE * input, wint_t initial ) { struct cons_pointer cdr = NIL; struct cons_pointer result; switch ( initial ) { @@ -308,54 +376,57 @@ struct cons_pointer read_string( FILE * input, wint_t initial ) { break; default: result = - make_string( initial, read_string( input, fgetwc( input ) ) ); + make_string( initial, + read_string( input, url_fgetwc( input ) ) ); break; } return result; } -struct cons_pointer read_symbol( FILE * input, wint_t initial ) { +struct cons_pointer read_symbol_or_key( URL_FILE * input, char *tag, + wint_t initial ) { struct cons_pointer cdr = NIL; struct cons_pointer result; switch ( initial ) { case '\0': - result = make_symbol( initial, NIL ); + result = make_symbol_or_key( initial, NIL, tag ); break; case '"': - /* - * THIS IS NOT A GOOD IDEA, but is legal - */ - result = - make_symbol( initial, read_symbol( input, fgetwc( input ) ) ); - break; + case '\'': + /* unwise to allow embedded quotation marks in symbols */ case ')': + case ':': /* - * symbols may not include right-parenthesis + * symbols and keywords may not include right-parenthesis + * or colons. */ result = NIL; /* * push back the character read */ - ungetwc( initial, input ); + url_ungetwc( initial, input ); break; default: if ( iswprint( initial ) && !iswblank( initial ) ) { result = - make_symbol( initial, - read_symbol( input, fgetwc( input ) ) ); + make_symbol_or_key( initial, + read_symbol_or_key( input, + tag, + url_fgetwc + ( input ) ), tag ); } else { result = NIL; /* * push back the character read */ - ungetwc( initial, input ); + url_ungetwc( initial, input ); } break; } - debug_print( L"read_symbol returning\n", DEBUG_IO ); + debug_print( L"read_symbol_or_key returning\n", DEBUG_IO ); debug_dump_object( result, DEBUG_IO ); return result; @@ -367,6 +438,7 @@ struct cons_pointer read_symbol( FILE * input, wint_t initial ) { struct cons_pointer read( struct stack_frame *frame, struct cons_pointer frame_pointer, - FILE * input ) { - return read_continuation( frame, frame_pointer, input, fgetwc( input ) ); + URL_FILE * input ) { + return read_continuation( frame, frame_pointer, input, + url_fgetwc( input ) ); } diff --git a/src/ops/read.h b/src/io/read.h similarity index 77% rename from src/ops/read.h rename to src/io/read.h index c6dbba3..64f36b0 100644 --- a/src/ops/read.h +++ b/src/io/read.h @@ -15,6 +15,7 @@ * read the next object on this input stream and return a cons_pointer to it. */ struct cons_pointer read( struct stack_frame *frame, - struct cons_pointer frame_pointer, FILE * input ); + struct cons_pointer frame_pointer, + URL_FILE * input ); #endif diff --git a/src/memory/conspage.c b/src/memory/conspage.c index f3c1760..2d0958d 100644 --- a/src/memory/conspage.c +++ b/src/memory/conspage.c @@ -115,9 +115,9 @@ void make_cons_page( ) { /** * dump the allocated pages to this `output` stream. */ -void dump_pages( FILE * output ) { +void dump_pages( URL_FILE * output ) { for ( int i = 0; i < initialised_cons_pages; i++ ) { - fwprintf( output, L"\nDUMPING PAGE %d\n", i ); + url_fwprintf( output, L"\nDUMPING PAGE %d\n", i ); for ( int j = 0; j < CONSPAGESIZE; j++ ) { dump_object( output, ( struct cons_pointer ) { @@ -152,7 +152,7 @@ void free_cell( struct cons_pointer pointer ) { dec_ref( cell->payload.exception.frame ); break; case FUNCTIONTV: - dec_ref( cell->payload.function.source ); + dec_ref( cell->payload.function.meta ); break; case INTEGERTV: dec_ref( cell->payload.integer.more ); @@ -166,8 +166,13 @@ void free_cell( struct cons_pointer pointer ) { dec_ref( cell->payload.ratio.dividend ); dec_ref( cell->payload.ratio.divisor ); break; + case READTV: + case WRITETV: + dec_ref( cell->payload.stream.meta ); + url_fclose( cell->payload.stream.stream ); + break; case SPECIALTV: - dec_ref( cell->payload.special.source ); + dec_ref( cell->payload.special.meta ); break; case STRINGTV: case SYMBOLTV: diff --git a/src/memory/conspage.h b/src/memory/conspage.h index ab04d6d..f13a46b 100644 --- a/src/memory/conspage.h +++ b/src/memory/conspage.h @@ -1,7 +1,19 @@ -#include "consspaceobject.h" +/* + * conspage.h + * + * Setup and tear down cons pages, and (FOR NOW) do primitive + * allocation/deallocation of cells. + * NOTE THAT before we go multi-threaded, these functions must be + * aggressively + * thread safe. + * + * (c) 2017 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ +#ifndef __psse_conspage_h +#define __psse_conspage_h -#ifndef __conspage_h -#define __conspage_h +#include "consspaceobject.h" /** * the number of cons cells on a cons page. The maximum value this can @@ -47,6 +59,6 @@ struct cons_pointer allocate_cell( char *tag ); void initialise_cons_pages( ); -void dump_pages( FILE * output ); +void dump_pages( URL_FILE * output ); #endif diff --git a/src/memory/consspaceobject.c b/src/memory/consspaceobject.c index 6a7e2bd..344f4ae 100644 --- a/src/memory/consspaceobject.c +++ b/src/memory/consspaceobject.c @@ -21,6 +21,7 @@ #include "conspage.h" #include "consspaceobject.h" #include "debug.h" +#include "intern.h" #include "print.h" #include "stack.h" @@ -65,6 +66,71 @@ void dec_ref( struct cons_pointer pointer ) { } +/** + * Get the Lisp type of the single argument. + * @param pointer a pointer to the object whose type is requested. + * @return As a Lisp string, the tag of the object which is at that pointer. + */ +struct cons_pointer c_type( struct cons_pointer pointer ) { + struct cons_pointer result = NIL; + struct cons_space_object cell = pointer2cell( pointer ); + + for ( int i = TAGLENGTH - 1; i >= 0; i-- ) { + result = make_string( ( wchar_t ) cell.tag.bytes[i], result ); + } + + return result; +} + +/** + * Implementation of car in C. If arg is not a cons, does not error but returns nil. + */ +struct cons_pointer c_car( struct cons_pointer arg ) { + struct cons_pointer result = NIL; + + if ( consp( arg ) ) { + result = pointer2cell( arg ).payload.cons.car; + } + + return result; +} + +/** + * Implementation of cdr in C. If arg is not a sequence, does not error but returns nil. + */ +struct cons_pointer c_cdr( struct cons_pointer arg ) { + struct cons_pointer result = NIL; + + struct cons_space_object cell = pointer2cell( arg ); + + switch (cell.tag.value) { + case CONSTV: + result = pointer2cell( arg ).payload.cons.cdr; + break; + case KEYTV: + case STRINGTV: + case SYMBOLTV: + result = pointer2cell( arg ).payload.string.cdr; + break; + } + + return result; +} + +/** + * Implementation of `length` in C. If arg is not a cons, does not error but returns 0. + */ +int c_length( struct cons_pointer arg) { + int result = 0; + + for (struct cons_pointer c = arg; !nilp(c); c = c_cdr(c)) { + result ++; + } + + return result; +} + + /** * Construct a cons cell from this pair of pointers. */ @@ -95,8 +161,6 @@ struct cons_pointer make_exception( struct cons_pointer message, struct cons_pointer pointer = allocate_cell( EXCEPTIONTAG ); struct cons_space_object *cell = &pointer2cell( pointer ); -// inc_ref( pointer ); /* this is a hack; I don't know why it's necessary to do this, but if I don't the cell gets freed */ - inc_ref( message ); inc_ref( frame_pointer ); cell->payload.exception.message = message; @@ -109,16 +173,17 @@ struct cons_pointer make_exception( struct cons_pointer message, /** - * Construct a cell which points to an executable Lisp special form. + * Construct a cell which points to an executable Lisp function. */ struct cons_pointer -make_function( struct cons_pointer src, struct cons_pointer ( *executable ) +make_function( struct cons_pointer meta, struct cons_pointer ( *executable ) ( struct stack_frame *, struct cons_pointer, struct cons_pointer ) ) { struct cons_pointer pointer = allocate_cell( FUNCTIONTAG ); struct cons_space_object *cell = &pointer2cell( pointer ); + inc_ref( meta ); - cell->payload.function.source = src; + cell->payload.function.meta = meta; cell->payload.function.executable = executable; return pointer; @@ -205,27 +270,42 @@ struct cons_pointer make_string( wint_t c, struct cons_pointer tail ) { } /** - * Construct a symbol from the character `c` and this `tail`. A symbol is - * internally identical to a string except for having a different tag. + * Construct a symbol or keyword from the character `c` and this `tail`. + * Each is internally identical to a string except for having a different tag. * * @param c the character to add (prepend); * @param tail the symbol which is being built. + * @param tag the tag to use: expected to be "SYMB" or "KEYW" */ -struct cons_pointer make_symbol( wint_t c, struct cons_pointer tail ) { - return make_string_like_thing( c, tail, SYMBOLTAG ); +struct cons_pointer make_symbol_or_key( wint_t c, struct cons_pointer tail, + char *tag ) { + struct cons_pointer result = make_string_like_thing( c, tail, tag ); + + if ( strncmp( tag, KEYTAG, 4 ) == 0 ) { + struct cons_pointer r = internedp( result, oblist ); + + if ( nilp( r ) ) { + intern( result, oblist ); + } else { + result = r; + } + } + + return result; } /** * Construct a cell which points to an executable Lisp special form. */ struct cons_pointer -make_special( struct cons_pointer src, struct cons_pointer ( *executable ) +make_special( struct cons_pointer meta, struct cons_pointer ( *executable ) ( struct stack_frame * frame, struct cons_pointer, struct cons_pointer env ) ) { struct cons_pointer pointer = allocate_cell( SPECIALTAG ); struct cons_space_object *cell = &pointer2cell( pointer ); + inc_ref( meta ); - cell->payload.special.source = src; + cell->payload.special.meta = meta; cell->payload.special.executable = executable; return pointer; @@ -234,12 +314,16 @@ make_special( struct cons_pointer src, struct cons_pointer ( *executable ) /** * Construct a cell which points to a stream open for reading. * @param input the C stream to wrap. + * @param metadata a pointer to an associaton containing metadata on the stream. + * @return a pointer to the new read stream. */ -struct cons_pointer make_read_stream( FILE * input ) { +struct cons_pointer make_read_stream( URL_FILE * input, + struct cons_pointer metadata ) { struct cons_pointer pointer = allocate_cell( READTAG ); struct cons_space_object *cell = &pointer2cell( pointer ); cell->payload.stream.stream = input; + cell->payload.stream.meta = metadata; return pointer; } @@ -247,24 +331,48 @@ struct cons_pointer make_read_stream( FILE * input ) { /** * Construct a cell which points to a stream open for writing. * @param output the C stream to wrap. + * @param metadata a pointer to an associaton containing metadata on the stream. + * @return a pointer to the new read stream. */ -struct cons_pointer make_write_stream( FILE * output ) { +struct cons_pointer make_write_stream( URL_FILE * output, + struct cons_pointer metadata ) { struct cons_pointer pointer = allocate_cell( WRITETAG ); struct cons_space_object *cell = &pointer2cell( pointer ); cell->payload.stream.stream = output; + cell->payload.stream.meta = metadata; return pointer; } +/** + * Return a lisp keyword representation of this wide character string. In keywords, + * I am accepting only lower case characters and numbers. + */ +struct cons_pointer c_string_to_lisp_keyword( wchar_t *symbol ) { + struct cons_pointer result = NIL; + + for ( int i = wcslen( symbol ) -1; i >= 0; i-- ) { + wchar_t c = towlower(symbol[i]); + + if (iswalnum(c) || c == L'-') { + result = make_keyword( c, result ); + } + } + + return result; +} + /** * Return a lisp string representation of this wide character string. */ struct cons_pointer c_string_to_lisp_string( wchar_t *string ) { struct cons_pointer result = NIL; - for ( int i = wcslen( string ); i > 0; i-- ) { - result = make_string( string[i - 1], result ); + for ( int i = wcslen( string ) - 1; i >= 0; i-- ) { + if (iswprint(string[i]) && string[i] != '"') { + result = make_string( string[i], result ); + } } return result; diff --git a/src/memory/consspaceobject.h b/src/memory/consspaceobject.h index acc36df..9197172 100644 --- a/src/memory/consspaceobject.h +++ b/src/memory/consspaceobject.h @@ -8,6 +8,9 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ +#ifndef __psse_consspaceobject_h +#define __psse_consspaceobject_h + #include #include #include @@ -17,8 +20,8 @@ #include #include -#ifndef __consspaceobject_h -#define __consspaceobject_h +#include "fopen.h" + /** * The length of a tag, in bytes. @@ -37,6 +40,7 @@ /** * The string `CONS`, considered as an `unsigned int`. + * @todo tag values should be collected into an enum. */ #define CONSTV 1397641027 @@ -83,6 +87,16 @@ */ #define INTEGERTV 1381256777 +/** + * A keyword - an interned, self-evaluating string. + */ +#define KEYTAG "KEYW" + +/** + * The string `KEYW`, considered as an `unsigned int`. + */ +#define KEYTV 1465468235 + /** * A lambda cell. Lambdas are the interpretable (source) versions of functions. * \see FUNCTIONTAG. @@ -179,6 +193,16 @@ */ #define SYMBOLTV 1112365395 +/** + * A time stamp. + */ +#define TIMETAG "TIME" + +/** + * The string `TIME`, considered as an `unsigned int`. + */ +#define TIMETV 1162692948 + /** * The special cons cell at address {0,1} which is canonically different * from NIL. @@ -256,6 +280,11 @@ */ #define functionp(conspoint) (check_tag(conspoint,FUNCTIONTAG)) +/** + * true if `conspoint` points to a keyword, else false + */ +#define keywordp(conspoint) (check_tag(conspoint,KEYTAG)) + /** * true if `conspoint` points to a special Lambda cell, else false */ @@ -318,18 +347,25 @@ */ #define writep(conspoint) (check_tag(conspoint,WRITETAG)) +#define streamp(conspoint) (check_tag(conspoint,READTAG)||check_tag(conspoint,WRITETAG)) + /** * true if `conspoint` points to a true cell, else false * (there should only be one of these so it's slightly redundant). * Also note that anything that is not NIL is truthy. */ -#define tp(conspoint) (checktag(conspoint,TRUETAG)) +#define tp(conspoint) (check_tag(conspoint,TRUETAG)) + +/** + * true if `conspoint` points to a time cell, else false. + */ +#define timep(conspoint) (check_tag(conspoint,TIMETAG)) /** * true if `conspoint` points to something that is truthy, i.e. * anything but NIL. */ -#define truep(conspoint) (!checktag(conspoint,NILTAG)) +#define truep(conspoint) (!check_tag(conspoint,NILTAG)) /** * An indirect pointer to a cons cell @@ -395,10 +431,9 @@ struct exception_payload { */ struct function_payload { /** - * pointer to the source from which the function was compiled, or NIL - * if it is a primitive. + * pointer to metadata (e.g. the source from which the function was compiled). */ - struct cons_pointer source; + struct cons_pointer meta; /** pointer to a function which takes a cons pointer (representing * its argument list) and a cons pointer (representing its environment) and a * stack frame (representing the previous stack frame) as arguments and returns @@ -473,7 +508,7 @@ struct special_payload { * pointer to the source from which the special form was compiled, or NIL * if it is a primitive. */ - struct cons_pointer source; + struct cons_pointer meta; /** pointer to a function which takes a cons pointer (representing * its argument list) and a cons pointer (representing its environment) and a * stack frame (representing the previous stack frame) as arguments and returns @@ -488,14 +523,19 @@ struct special_payload { */ struct stream_payload { /** the stream to read from or write to. */ - FILE *stream; + URL_FILE *stream; + /** metadata on the stream (e.g. its file attributes if a file, its HTTP + * headers if a URL, etc). Expected to be an association, or nil. Not yet + * implemented. */ + struct cons_pointer meta; }; /** * payload of a string cell. At least at first, only one UTF character will * be stored in each cell. The doctrine that 'a symbol is just a string' - * didn't work; however, the payload of a symbol cell is identical to the - * payload of a string cell. + * didn't work; however, the payload of a symbol or keyword cell is identical + * to the payload of a string cell, except that a keyword may store a hash + * of its own value in the padding. */ struct string_payload { /** the actual character stored in this cell */ @@ -506,6 +546,15 @@ struct string_payload { struct cons_pointer cdr; }; +/** + * The payload of a time cell: an unsigned 128 bit value representing micro- + * seconds since the estimated date of the Big Bang (actually, for + * convenience, 14Bn years before 1st Jan 1970 (the UNIX epoch)) + */ +struct time_payload { + unsigned __int128 value; +}; + /** * payload of a vector pointer cell. */ @@ -591,6 +640,10 @@ struct cons_space_object { * if tag == STRINGTAG || tag == SYMBOLTAG */ struct string_payload string; + /** + * if tag == TIMETAG + */ + struct time_payload time; /** * if tag == TRUETAG; we'll treat the special cell T as just a cons */ @@ -608,6 +661,14 @@ void inc_ref( struct cons_pointer pointer ); void dec_ref( struct cons_pointer pointer ); +struct cons_pointer c_type( struct cons_pointer pointer ); + +struct cons_pointer c_car( struct cons_pointer arg ); + +struct cons_pointer c_cdr( struct cons_pointer arg ); + +int c_length( struct cons_pointer arg); + struct cons_pointer make_cons( struct cons_pointer car, struct cons_pointer cdr ); @@ -620,6 +681,8 @@ struct cons_pointer make_function( struct cons_pointer src, struct cons_pointer, struct cons_pointer ) ); +struct cons_pointer c_string_to_lisp_keyword( wchar_t *symbol ); + struct cons_pointer make_lambda( struct cons_pointer args, struct cons_pointer body ); @@ -634,11 +697,18 @@ struct cons_pointer make_special( struct cons_pointer src, struct cons_pointer make_string( wint_t c, struct cons_pointer tail ); -struct cons_pointer make_symbol( wint_t c, struct cons_pointer tail ); +struct cons_pointer make_symbol_or_key( wint_t c, struct cons_pointer tail, + char *tag ); -struct cons_pointer make_read_stream( FILE * input ); +#define make_symbol(c, t) (make_symbol_or_key( c, t, SYMBOLTAG)) -struct cons_pointer make_write_stream( FILE * output ); +#define make_keyword(c, t) (make_symbol_or_key( c, t, KEYTAG)) + +struct cons_pointer make_read_stream( URL_FILE * input, + struct cons_pointer metadata ); + +struct cons_pointer make_write_stream( URL_FILE * output, + struct cons_pointer metadata ); struct cons_pointer c_string_to_lisp_string( wchar_t *string ); diff --git a/src/memory/dump.c b/src/memory/dump.c index 7ec2631..074d1c4 100644 --- a/src/memory/dump.c +++ b/src/memory/dump.c @@ -21,103 +21,108 @@ #include "conspage.h" #include "consspaceobject.h" #include "debug.h" +#include "intern.h" +#include "map.h" #include "print.h" #include "stack.h" #include "vectorspace.h" -void dump_string_cell( FILE * output, wchar_t *prefix, +void dump_string_cell( URL_FILE * output, wchar_t *prefix, struct cons_pointer pointer ) { struct cons_space_object cell = pointer2cell( pointer ); if ( cell.payload.string.character == 0 ) { - fwprintf( output, - L"\t\t%ls cell: termination; next at page %d offset %d, count %u\n", - prefix, - cell.payload.string.cdr.page, cell.payload.string.cdr.offset, - cell.count ); + url_fwprintf( output, + L"\t\t%ls cell: termination; next at page %d offset %d, count %u\n", + prefix, + cell.payload.string.cdr.page, + cell.payload.string.cdr.offset, cell.count ); } else { - fwprintf( output, - L"\t\t%ls cell: character '%lc' (%d) next at page %d offset %d, count %u\n", - prefix, - ( wint_t ) cell.payload.string.character, - cell.payload.string.character, - cell.payload.string.cdr.page, - cell.payload.string.cdr.offset, cell.count ); - fwprintf( output, L"\t\t value: " ); + url_fwprintf( output, + L"\t\t%ls cell: character '%lc' (%d) next at page %d offset %d, count %u\n", + prefix, + ( wint_t ) cell.payload.string.character, + cell.payload.string.character, + cell.payload.string.cdr.page, + cell.payload.string.cdr.offset, cell.count ); + url_fwprintf( output, L"\t\t value: " ); print( output, pointer ); - fwprintf( output, L"\n" ); + url_fwprintf( output, L"\n" ); } } /** * dump the object at this cons_pointer to this output stream. */ -void dump_object( FILE * output, struct cons_pointer pointer ) { +void dump_object( URL_FILE * output, struct cons_pointer pointer ) { struct cons_space_object cell = pointer2cell( pointer ); - fwprintf( output, - L"\t%4.4s (%d) at page %d, offset %d count %u\n", - cell.tag.bytes, - cell.tag.value, pointer.page, pointer.offset, cell.count ); + url_fwprintf( output, + L"\t%4.4s (%d) at page %d, offset %d count %u\n", + cell.tag.bytes, + cell.tag.value, pointer.page, pointer.offset, cell.count ); switch ( cell.tag.value ) { case CONSTV: - fwprintf( output, - L"\t\tCons cell: car at page %d offset %d, cdr at page %d offset %d, count %u :", - cell.payload.cons.car.page, - cell.payload.cons.car.offset, - cell.payload.cons.cdr.page, - cell.payload.cons.cdr.offset, cell.count ); + url_fwprintf( output, + L"\t\tCons cell: car at page %d offset %d, cdr at page %d offset %d, count %u :", + cell.payload.cons.car.page, + cell.payload.cons.car.offset, + cell.payload.cons.cdr.page, + cell.payload.cons.cdr.offset, cell.count ); print( output, pointer ); - fputws( L"\n", output ); + url_fputws( L"\n", output ); break; case EXCEPTIONTV: - fwprintf( output, L"\t\tException cell: " ); + url_fwprintf( output, L"\t\tException cell: " ); dump_stack_trace( output, pointer ); break; case FREETV: - fwprintf( output, L"\t\tFree cell: next at page %d offset %d\n", - cell.payload.cons.cdr.page, - cell.payload.cons.cdr.offset ); + url_fwprintf( output, + L"\t\tFree cell: next at page %d offset %d\n", + cell.payload.cons.cdr.page, + cell.payload.cons.cdr.offset ); break; case INTEGERTV: - fwprintf( output, - L"\t\tInteger cell: value %ld, count %u\n", - cell.payload.integer.value, cell.count ); + url_fwprintf( output, + L"\t\tInteger cell: value %ld, count %u\n", + cell.payload.integer.value, cell.count ); if ( !nilp( cell.payload.integer.more ) ) { - fputws( L"\t\tBIGNUM! More at:\n", output ); + url_fputws( L"\t\tBIGNUM! More at:\n", output ); dump_object( output, cell.payload.integer.more ); } break; case LAMBDATV: - fwprintf( output, L"\t\tLambda cell;\n\t\t args: " ); + url_fwprintf( output, L"\t\t\u03bb cell;\n\t\t args: " ); print( output, cell.payload.lambda.args ); - fwprintf( output, L";\n\t\t\tbody: " ); + url_fwprintf( output, L";\n\t\t\tbody: " ); print( output, cell.payload.lambda.body ); - fputws( L"\n", output ); + url_fputws( L"\n", output ); break; case NILTV: break; case NLAMBDATV: - fwprintf( output, L"\t\tNlambda cell; \n\t\targs: " ); + url_fwprintf( output, L"\t\tn\u03bb cell; \n\t\targs: " ); print( output, cell.payload.lambda.args ); - fwprintf( output, L";\n\t\t\tbody: " ); + url_fwprintf( output, L";\n\t\t\tbody: " ); print( output, cell.payload.lambda.body ); - fputws( L"\n", output ); + url_fputws( L"\n", output ); break; case RATIOTV: - fwprintf( output, - L"\t\tRational cell: value %ld/%ld, count %u\n", - pointer2cell( cell.payload.ratio.dividend ).payload. - integer.value, - pointer2cell( cell.payload.ratio.divisor ).payload. - integer.value, cell.count ); + url_fwprintf( output, + L"\t\tRational cell: value %ld/%ld, count %u\n", + pointer2cell( cell.payload.ratio.dividend ). + payload.integer.value, + pointer2cell( cell.payload.ratio.divisor ). + payload.integer.value, cell.count ); break; case READTV: - fwprintf( output, L"\t\tInput stream\n" ); + url_fputws( L"\t\tInput stream; metadata: ", output ); + print( output, cell.payload.stream.meta ); + url_fputws( L"\n", output ); break; case REALTV: - fwprintf( output, L"\t\tReal cell: value %Lf, count %u\n", - cell.payload.real.value, cell.count ); + url_fwprintf( output, L"\t\tReal cell: value %Lf, count %u\n", + cell.payload.real.value, cell.count ); break; case STRINGTV: dump_string_cell( output, L"String", pointer ); @@ -128,14 +133,14 @@ void dump_object( FILE * output, struct cons_pointer pointer ) { case TRUETV: break; case VECTORPOINTTV:{ - fwprintf( output, - L"\t\tPointer to vector-space object at %p\n", - cell.payload.vectorp.address ); + url_fwprintf( output, + L"\t\tPointer to vector-space object at %p\n", + cell.payload.vectorp.address ); struct vector_space_object *vso = cell.payload.vectorp.address; - fwprintf( output, - L"\t\tVector space object of type %4.4s (%d), payload size %d bytes\n", - &vso->header.tag.bytes, vso->header.tag.value, - vso->header.size ); + url_fwprintf( output, + L"\t\tVector space object of type %4.4s (%d), payload size %d bytes\n", + &vso->header.tag.bytes, vso->header.tag.value, + vso->header.size ); if ( stackframep( vso ) ) { dump_frame( output, pointer ); } @@ -143,11 +148,16 @@ void dump_object( FILE * output, struct cons_pointer pointer ) { case STACKFRAMETV: dump_frame( output, pointer ); break; + case MAPTV: + dump_map( output, pointer); + break; } } break; case WRITETV: - fwprintf( output, L"\t\tOutput stream\n" ); + url_fputws( L"\t\tOutput stream; metadata: ", output ); + print( output, cell.payload.stream.meta ); + url_fputws( L"\n", output ); break; } } diff --git a/src/memory/dump.h b/src/memory/dump.h index ec8928e..f8ef75f 100644 --- a/src/memory/dump.h +++ b/src/memory/dump.h @@ -20,6 +20,6 @@ #define __dump_h -void dump_object( FILE * output, struct cons_pointer pointer ); +void dump_object( URL_FILE * output, struct cons_pointer pointer ); #endif diff --git a/src/memory/lookup3.c b/src/memory/lookup3.c new file mode 100644 index 0000000..006d513 --- /dev/null +++ b/src/memory/lookup3.c @@ -0,0 +1,1001 @@ +/* +------------------------------------------------------------------------------- +lookup3.c, by Bob Jenkins, May 2006, Public Domain. + +These are functions for producing 32-bit hashes for hash table lookup. +hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() +are externally useful functions. Routines to test the hash are included +if SELF_TEST is defined. You can use this free for any purpose. It's in +the public domain. It has no warranty. + +You probably want to use hashlittle(). hashlittle() and hashbig() +hash byte arrays. hashlittle() is is faster than hashbig() on +little-endian machines. Intel and AMD are little-endian machines. +On second thought, you probably want hashlittle2(), which is identical to +hashlittle() except it returns two 32-bit hashes for the price of one. +You could implement hashbig2() if you wanted but I haven't bothered here. + +If you want to find a hash of, say, exactly 7 integers, do + a = i1; b = i2; c = i3; + mix(a,b,c); + a += i4; b += i5; c += i6; + mix(a,b,c); + a += i7; + final(a,b,c); +then use c as the hash value. If you have a variable length array of +4-byte integers to hash, use hashword(). If you have a byte array (like +a character string), use hashlittle(). If you have several byte arrays, or +a mix of things, see the comments above hashlittle(). + +Why is this so big? I read 12 bytes at a time into 3 4-byte integers, +then mix those integers. This is fast (you can do a lot more thorough +mixing with 12*3 instructions on 3 integers than you can with 3 instructions +on 1 byte), but shoehorning those bytes into integers efficiently is messy. +------------------------------------------------------------------------------- +*/ +// #define SELF_TEST 1 + +#include /* defines printf for tests */ +#include /* defines time_t for timings in the test */ +#include /* defines uint32_t etc */ +#include /* attempt to define endianness */ +#ifdef linux +# include /* attempt to define endianness */ +#endif + +/* + * My best guess at if you are big-endian or little-endian. This may + * need adjustment. + */ +#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ + __BYTE_ORDER == __LITTLE_ENDIAN) || \ + (defined(i386) || defined(__i386__) || defined(__i486__) || \ + defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) +# define HASH_LITTLE_ENDIAN 1 +# define HASH_BIG_ENDIAN 0 +#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ + __BYTE_ORDER == __BIG_ENDIAN) || \ + (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 1 +#else +# define HASH_LITTLE_ENDIAN 0 +# define HASH_BIG_ENDIAN 0 +#endif + +#define hashsize(n) ((uint32_t)1<<(n)) +#define hashmask(n) (hashsize(n)-1) +#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) + +/* +------------------------------------------------------------------------------- +mix -- mix 3 32-bit values reversibly. + +This is reversible, so any information in (a,b,c) before mix() is +still in (a,b,c) after mix(). + +If four pairs of (a,b,c) inputs are run through mix(), or through +mix() in reverse, there are at least 32 bits of the output that +are sometimes the same for one pair and different for another pair. +This was tested for: +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that +satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 +Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing +for "differ" defined as + with a one-bit base and a two-bit delta. I +used http://burtleburtle.net/bob/hash/avalanche.html to choose +the operations, constants, and arrangements of the variables. + +This does not achieve avalanche. There are input bits of (a,b,c) +that fail to affect some output bits of (a,b,c), especially of a. The +most thoroughly mixed value is c, but it doesn't really even achieve +avalanche in c. + +This allows some parallelism. Read-after-writes are good at doubling +the number of bits affected, so the goal of mixing pulls in the opposite +direction as the goal of parallelism. I did what I could. Rotates +seem to cost as much as shifts on every machine I could lay my hands +on, and rotates are much kinder to the top and bottom bits, so I used +rotates. +------------------------------------------------------------------------------- +*/ +#define mix(a,b,c) \ +{ \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c,16); c += b; \ + b -= a; b ^= rot(a,19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} + +/* +------------------------------------------------------------------------------- +final -- final mixing of 3 32-bit values (a,b,c) into c + +Pairs of (a,b,c) values differing in only a few bits will usually +produce values of c that look totally different. This was tested for +* pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). +* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. +* the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + +These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 +and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 +------------------------------------------------------------------------------- +*/ +#define final(a,b,c) \ +{ \ + c ^= b; c -= rot(b,14); \ + a ^= c; a -= rot(c,11); \ + b ^= a; b -= rot(a,25); \ + c ^= b; c -= rot(b,16); \ + a ^= c; a -= rot(c,4); \ + b ^= a; b -= rot(a,14); \ + c ^= b; c -= rot(b,24); \ +} + +/* +-------------------------------------------------------------------- + This works on all machines. To be useful, it requires + -- that the key be an array of uint32_t's, and + -- that the length be the number of uint32_t's in the key + + The function hashword() is identical to hashlittle() on little-endian + machines, and identical to hashbig() on big-endian machines, + except that the length has to be measured in uint32_ts rather than in + bytes. hashlittle() is more complicated than hashword() only because + hashlittle() has to dance around fitting the key bytes into registers. +-------------------------------------------------------------------- +*/ +uint32_t hashword( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t initval) /* the previous hash, or an arbitrary value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + return c; +} + + +/* +-------------------------------------------------------------------- +hashword2() -- same as hashword(), but take two seeds and return two +32-bit values. pc and pb must both be nonnull, and *pc and *pb must +both be initialized with seeds. If you pass in (*pb)==0, the output +(*pc) will be the same as the return value from hashword(). +-------------------------------------------------------------------- +*/ +void hashword2 ( +const uint32_t *k, /* the key, an array of uint32_t values */ +size_t length, /* the length of the key, in uint32_ts */ +uint32_t *pc, /* IN: seed OUT: primary hash value */ +uint32_t *pb) /* IN: more seed OUT: secondary hash value */ +{ + uint32_t a,b,c; + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; + c += *pb; + + /*------------------------------------------------- handle most of the key */ + while (length > 3) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 3; + k += 3; + } + + /*------------------------------------------- handle the last 3 uint32_t's */ + switch(length) /* all the case statements fall through */ + { + case 3 : c+=k[2]; + case 2 : b+=k[1]; + case 1 : a+=k[0]; + final(a,b,c); + case 0: /* case 0: nothing left to add */ + break; + } + /*------------------------------------------------------ report the result */ + *pc=c; *pb=b; +} + + +/* +------------------------------------------------------------------------------- +hashlittle() -- hash a variable-length key into a 32-bit value + k : the key (the unaligned variable-length array of bytes) + length : the length of the key, counting by bytes + initval : can be any 4-byte value +Returns a 32-bit value. Every bit of the key affects every bit of +the return value. Two keys differing by one or two bits will have +totally different hash values. + +The best hash table sizes are powers of 2. There is no need to do +mod a prime (mod is sooo slow!). If you need less than 32 bits, +use a bitmask. For example, if you need only 10 bits, do + h = (h & hashmask(10)); +In which case, the hash table should have hashsize(10) elements. + +If you are hashing n strings (uint8_t **)k, do it like this: + for (i=0, h=0; i 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : return c; + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : return c; /* zero length requires no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + + +/* + * hashlittle2: return 2 32-bit hash values + * + * This is identical to hashlittle(), except it returns two 32-bit hash + * values instead of just one. This is good enough for hash table + * lookup with 2^^64 buckets, or if you want a second hash if you're not + * happy with the first, or if you want a probably-unique 64-bit ID for + * the key. *pc is better mixed than *pb, so use *pc first. If you want + * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". + */ +void hashlittle2( + const void *key, /* the key to hash */ + size_t length, /* length of the key */ + uint32_t *pc, /* IN: primary initval, OUT: primary hash */ + uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ +{ + uint32_t a,b,c; /* internal state */ + union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; + c += *pb; + + u.ptr = key; + if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + const uint8_t *k8; + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]&0xffffff" actually reads beyond the end of the string, but + * then masks off the part it's not allowed to read. Because the + * string is aligned, the masked-off tail is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff; a+=k[0]; break; + case 6 : b+=k[1]&0xffff; a+=k[0]; break; + case 5 : b+=k[1]&0xff; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff; break; + case 2 : a+=k[0]&0xffff; break; + case 1 : a+=k[0]&0xff; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ + case 1 : a+=k8[0]; break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + +#endif /* !valgrind */ + + } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { + const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ + const uint8_t *k8; + + /*--------------- all but last block: aligned reads and different mixing */ + while (length > 12) + { + a += k[0] + (((uint32_t)k[1])<<16); + b += k[2] + (((uint32_t)k[3])<<16); + c += k[4] + (((uint32_t)k[5])<<16); + mix(a,b,c); + length -= 12; + k += 6; + } + + /*----------------------------- handle the last (probably partial) block */ + k8 = (const uint8_t *)k; + switch(length) + { + case 12: c+=k[4]+(((uint32_t)k[5])<<16); + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ + case 10: c+=k[4]; + b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 9 : c+=k8[8]; /* fall through */ + case 8 : b+=k[2]+(((uint32_t)k[3])<<16); + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ + case 6 : b+=k[2]; + a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 5 : b+=k8[4]; /* fall through */ + case 4 : a+=k[0]+(((uint32_t)k[1])<<16); + break; + case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ + case 2 : a+=k[0]; + break; + case 1 : a+=k8[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + a += ((uint32_t)k[1])<<8; + a += ((uint32_t)k[2])<<16; + a += ((uint32_t)k[3])<<24; + b += k[4]; + b += ((uint32_t)k[5])<<8; + b += ((uint32_t)k[6])<<16; + b += ((uint32_t)k[7])<<24; + c += k[8]; + c += ((uint32_t)k[9])<<8; + c += ((uint32_t)k[10])<<16; + c += ((uint32_t)k[11])<<24; + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=((uint32_t)k[11])<<24; + case 11: c+=((uint32_t)k[10])<<16; + case 10: c+=((uint32_t)k[9])<<8; + case 9 : c+=k[8]; + case 8 : b+=((uint32_t)k[7])<<24; + case 7 : b+=((uint32_t)k[6])<<16; + case 6 : b+=((uint32_t)k[5])<<8; + case 5 : b+=k[4]; + case 4 : a+=((uint32_t)k[3])<<24; + case 3 : a+=((uint32_t)k[2])<<16; + case 2 : a+=((uint32_t)k[1])<<8; + case 1 : a+=k[0]; + break; + case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ + } + } + + final(a,b,c); + *pc=c; *pb=b; +} + + + +/* + * hashbig(): + * This is the same as hashword() on big-endian machines. It is different + * from hashlittle() on all machines. hashbig() takes advantage of + * big-endian byte ordering. + */ +uint32_t hashbig( const void *key, size_t length, uint32_t initval) +{ + uint32_t a,b,c; + union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ + + /* Set up the internal state */ + a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; + + u.ptr = key; + if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { + const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ + const uint8_t *k8; + + /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ + while (length > 12) + { + a += k[0]; + b += k[1]; + c += k[2]; + mix(a,b,c); + length -= 12; + k += 3; + } + + /*----------------------------- handle the last (probably partial) block */ + /* + * "k[2]<<8" actually reads beyond the end of the string, but + * then shifts out the part it's not allowed to read. Because the + * string is aligned, the illegal read is in the same word as the + * rest of the string. Every machine with memory protection I've seen + * does it on word boundaries, so is OK with this. But VALGRIND will + * still catch it and complain. The masking trick does make the hash + * noticably faster for short strings (like English words). + */ +#ifndef VALGRIND + + switch(length) + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; + case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; + case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; + case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; + case 5 : b+=k[1]&0xff000000; a+=k[0]; break; + case 4 : a+=k[0]; break; + case 3 : a+=k[0]&0xffffff00; break; + case 2 : a+=k[0]&0xffff0000; break; + case 1 : a+=k[0]&0xff000000; break; + case 0 : return c; /* zero length strings require no mixing */ + } + +#else /* make valgrind happy */ + + k8 = (const uint8_t *)k; + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; + case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ + case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ + case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ + case 8 : b+=k[1]; a+=k[0]; break; + case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ + case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ + case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ + case 4 : a+=k[0]; break; + case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ + case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ + case 1 : a+=((uint32_t)k8[0])<<24; break; + case 0 : return c; + } + +#endif /* !VALGRIND */ + + } else { /* need to read the key one byte at a time */ + const uint8_t *k = (const uint8_t *)key; + + /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) + { + a += ((uint32_t)k[0])<<24; + a += ((uint32_t)k[1])<<16; + a += ((uint32_t)k[2])<<8; + a += ((uint32_t)k[3]); + b += ((uint32_t)k[4])<<24; + b += ((uint32_t)k[5])<<16; + b += ((uint32_t)k[6])<<8; + b += ((uint32_t)k[7]); + c += ((uint32_t)k[8])<<24; + c += ((uint32_t)k[9])<<16; + c += ((uint32_t)k[10])<<8; + c += ((uint32_t)k[11]); + mix(a,b,c); + length -= 12; + k += 12; + } + + /*-------------------------------- last block: affect all 32 bits of (c) */ + switch(length) /* all the case statements fall through */ + { + case 12: c+=k[11]; + case 11: c+=((uint32_t)k[10])<<8; + case 10: c+=((uint32_t)k[9])<<16; + case 9 : c+=((uint32_t)k[8])<<24; + case 8 : b+=k[7]; + case 7 : b+=((uint32_t)k[6])<<8; + case 6 : b+=((uint32_t)k[5])<<16; + case 5 : b+=((uint32_t)k[4])<<24; + case 4 : a+=k[3]; + case 3 : a+=((uint32_t)k[2])<<8; + case 2 : a+=((uint32_t)k[1])<<16; + case 1 : a+=((uint32_t)k[0])<<24; + break; + case 0 : return c; + } + } + + final(a,b,c); + return c; +} + + +#ifdef SELF_TEST + +/* used for timings */ +void driver1() +{ + uint8_t buf[256]; + uint32_t i; + uint32_t h=0; + time_t a,z; + + time(&a); + for (i=0; i<256; ++i) buf[i] = 'x'; + for (i=0; i<1; ++i) + { + h = hashlittle(&buf[0],1,h); + } + time(&z); + if (z-a > 0) printf("time %d %.8x\n", z-a, h); +} + +/* check that every input bit changes every output bit half the time */ +#define HASHSTATE 1 +#define HASHLEN 1 +#define MAXPAIR 60 +#define MAXLEN 70 +void driver2() +{ + uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1]; + uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z; + uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE]; + uint32_t x[HASHSTATE],y[HASHSTATE]; + uint32_t hlen; + + printf("No more than %d trials should ever be needed \n",MAXPAIR/2); + for (hlen=0; hlen < MAXLEN; ++hlen) + { + z=0; + for (i=0; i>(8-j)); + c[0] = hashlittle(a, hlen, m); + b[i] ^= ((k+1)<>(8-j)); + d[0] = hashlittle(b, hlen, m); + /* check every bit is 1, 0, set, and not set at least once */ + for (l=0; lz) z=k; + if (k==MAXPAIR) + { + printf("Some bit didn't change: "); + printf("%.8x %.8x %.8x %.8x %.8x %.8x ", + e[0],f[0],g[0],h[0],x[0],y[0]); + printf("i %d j %d m %d len %d\n", i, j, m, hlen); + } + if (z==MAXPAIR) goto done; + } + } + } + done: + if (z < MAXPAIR) + { + printf("Mix success %2d bytes %2d initvals ",i,m); + printf("required %d trials\n", z/2); + } + } + printf("\n"); +} + +/* Check for reading beyond the end of the buffer and alignment problems */ +void driver3() +{ + uint8_t buf[MAXLEN+20], *b; + uint32_t len; + uint8_t q[] = "This is the time for all good men to come to the aid of their country..."; + uint32_t h; + uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country..."; + uint32_t i; + uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country..."; + uint32_t j; + uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country..."; + uint32_t ref,x,y; + uint8_t *p; + + printf("Endianness. These lines should all be the same (for values filled in):\n"); + printf("%.8x %.8x %.8x\n", + hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13), + hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13), + hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13)); + p = q; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qq[1]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqq[2]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + p = &qqqq[3]; + printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n", + hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13), + hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13), + hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13), + hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13), + hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13), + hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13)); + printf("\n"); + + /* check that hashlittle2 and hashlittle produce the same results */ + i=47; j=0; + hashlittle2(q, sizeof(q), &i, &j); + if (hashlittle(q, sizeof(q), 47) != i) + printf("hashlittle2 and hashlittle mismatch\n"); + + /* check that hashword2 and hashword produce the same results */ + len = 0xdeadbeef; + i=47, j=0; + hashword2(&len, 1, &i, &j); + if (hashword(&len, 1, 47) != i) + printf("hashword2 and hashword mismatch %x %x\n", + i, hashword(&len, 1, 47)); + + /* check hashlittle doesn't read before or after the ends of the string */ + for (h=0, b=buf+1; h<8; ++h, ++b) + { + for (i=0; i + * Public domain. + */ + +#ifndef __lookup3_h +#define __lookup3_h + +uint32_t hashword( +const uint32_t *k, +size_t length, +uint32_t initval); + +#endif diff --git a/src/memory/map.c b/src/memory/map.c new file mode 100644 index 0000000..cbad3df --- /dev/null +++ b/src/memory/map.c @@ -0,0 +1,289 @@ +/* + * map.c + * + * An immutable hashmap in vector space. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "consspaceobject.h" +#include "conspage.h" +#include "debug.h" +#include "dump.h" +#include "fopen.h" +#include "intern.h" +#include "io.h" +#include "lookup3.h" +#include "map.h" +#include "print.h" +#include "vectorspace.h" + +/* \todo: a lot of this will be inherited by namespaces, regularities and + * homogeneities. Exactly how I don't yet know. */ + +/** + * Get a hash value for this key. + */ +uint32_t get_hash_32(struct cons_pointer f, struct cons_pointer key) { + uint32_t result = 0; + int l = c_length(key); + + if (keywordp(key) || stringp(key) || symbolp( key)) { + if ( l > 0) { + uint32_t buffer[l]; + + if (!nilp(f)) { + fputws(L"Custom hashing functions are not yet implemented.\n", stderr); + } + for (int i = 0; i < l; i++) { + buffer[i] = (uint32_t)pointer2cell(key).payload.string.character; + } + + result = hashword( buffer, l, 0); + } + } else { + fputws(L"Hashing is thus far implemented only for keys, strings and symbols.\n", stderr); + } + + return result; +} + +/** + * get the actual map object from this `pointer`, or NULL if + * `pointer` is not a map pointer. + */ +struct map_payload *get_map_payload( struct cons_pointer pointer ) { + struct map_payload *result = NULL; + struct vector_space_object *vso = + pointer2cell( pointer ).payload.vectorp.address; + + if (vectorpointp(pointer) && mapp( vso ) ) { + result = ( struct map_payload * ) &( vso->payload ); + debug_printf( DEBUG_BIND, + L"get_map_payload: all good, returning %p\n", result ); + } else { + debug_print( L"get_map_payload: fail, returning NULL\n", DEBUG_BIND ); + } + + return result; +} + + +/** + * Make an empty immutable map, and return it. + * + * @param hash_function a pointer to a function of one argument, which + * returns an integer; or (more usually) `nil`. + * @return the new map, or NULL if memory is exhausted. + */ +struct cons_pointer make_empty_map( struct cons_pointer hash_function ) { + debug_print( L"Entering make_empty_map\n", DEBUG_BIND ); + struct cons_pointer result = + make_vso( MAPTAG, sizeof( struct map_payload ) ); + + if ( !nilp( result ) ) { + struct map_payload *payload = get_map_payload( result ); + + payload->hash_function = functionp( hash_function) ? hash_function : NIL; + inc_ref(hash_function); + + for ( int i = 0; i < BUCKETSINMAP; i++) { + payload->buckets[i] = NIL; + } + } + + debug_print( L"Leaving make_empty_map\n", DEBUG_BIND ); + return result; +} + + +struct cons_pointer make_duplicate_map( struct cons_pointer parent) { + struct cons_pointer result = NIL; + struct map_payload * parent_payload = get_map_payload(parent); + + if (parent_payload != NULL) { + result = + make_vso( MAPTAG, sizeof( struct map_payload ) ); + + if ( !nilp( result ) ) { + struct map_payload *payload = get_map_payload( result ); + + payload->hash_function = parent_payload->hash_function; + inc_ref(payload->hash_function); + + for ( int i = 0; i < BUCKETSINMAP; i++) { + payload->buckets[i] = parent_payload->buckets[i]; + inc_ref(payload->buckets[i]); + } + } + } + + return result; +} + + +struct cons_pointer bind_in_map( struct cons_pointer parent, + struct cons_pointer key, + struct cons_pointer value) { + struct cons_pointer result = make_duplicate_map(parent); + + if ( !nilp( result)) { + struct map_payload * payload = get_map_payload( result ); + int bucket = get_hash_32(payload->hash_function, key) % BUCKETSINMAP; + + payload->buckets[bucket] = make_cons( + make_cons(key, value), payload->buckets[bucket]); + + inc_ref(payload->buckets[bucket]); + } + + return result; +} + + +struct cons_pointer keys( struct cons_pointer store) { + debug_print( L"Entering keys\n", DEBUG_BIND ); + struct cons_pointer result = NIL; + + struct cons_space_object cell = pointer2cell( store ); + + switch (pointer2cell( store ).tag.value) { + case CONSTV: + for (struct cons_pointer c = store; !nilp(c); c = c_cdr(c)) { + result = make_cons( c_car( c_car( c)), result); + } + break; + case VECTORPOINTTV: { + struct vector_space_object *vso = + pointer2cell( store ).payload.vectorp.address; + + if ( mapp( vso ) ) { + struct map_payload * payload = get_map_payload( store ); + + for (int bucket = 0; bucket < BUCKETSINMAP; bucket++) { + for (struct cons_pointer c = payload->buckets[bucket]; + !nilp(c); c = c_cdr(c)) { + debug_print( L"keys: c is ", DEBUG_BIND); + debug_print_object( c, DEBUG_BIND); + + result = make_cons( c_car( c_car( c)), result); + debug_print( L"; result is ", DEBUG_BIND); + debug_print_object( result, DEBUG_BIND); + debug_println( DEBUG_BIND); + } + } + } + } + break; + } + debug_print( L"keys returning ", DEBUG_BIND ); + debug_print_object( result, DEBUG_BIND ); + debug_println( DEBUG_BIND); + + return result; +} + +/** + * Return a new map which represents the merger of `to_merge` into + * `parent`. `parent` must be a map, but `to_merge` may be a map or + * an assoc list. + * + * @param parent a map; + * @param to_merge an association from which key/value pairs will be merged. + * @result a new map, containing all key/value pairs from `to_merge` + * together with those key/value pairs from `parent` whose keys did not + * collide. + */ +struct cons_pointer merge_into_map( struct cons_pointer parent, + struct cons_pointer to_merge) { + debug_print( L"Entering merge_into_map\n", DEBUG_BIND ); + struct cons_pointer result = make_duplicate_map(parent); + + if (!nilp(result)) { + struct map_payload *payload = get_map_payload( result ); + for (struct cons_pointer c = keys(to_merge); + !nilp(c); c = c_cdr(c)) { + struct cons_pointer key = c_car( c); + int bucket = get_hash_32(payload->hash_function, key) % BUCKETSINMAP; + + payload->buckets[bucket] = make_cons( + make_cons( key, c_assoc( key, to_merge)), + payload->buckets[bucket]); + } + } + + debug_print( L"Leaving merge_into_map\n", DEBUG_BIND ); + + return result; +} + + +struct cons_pointer assoc_in_map( struct cons_pointer key, + struct cons_pointer map) { + debug_print( L"Entering assoc_in_map\n", DEBUG_BIND ); + struct cons_pointer result = NIL; + struct map_payload *payload = get_map_payload( map ); + + if (payload != NULL) { + int bucket = get_hash_32(payload->hash_function, key) % BUCKETSINMAP; + result = c_assoc(key, payload->buckets[bucket]); + } + + debug_print( L"assoc_in_map returning ", DEBUG_BIND ); + debug_print_object( result, DEBUG_BIND); + debug_println( DEBUG_BIND); + + return result; +} + + +/** + * Function: create a map initialised with key/value pairs from my + * first argument. + * + * * (make-map) + * * (make-map store) + * + * @param frame the stack frame in which the expression is to be interpreted; + * @param frame_pointer a pointer to my stack_frame. + * @param env the environment in which it is to be intepreted. + * @return a new containing all the key/value pairs from store. + */ +struct cons_pointer +lisp_make_map( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + return merge_into_map( make_empty_map( NIL), frame->arg[0]); +} + +/** + * Dump a map to this stream for debugging + * @param output the stream + * @param map_pointer the pointer to the frame + */ +void dump_map( URL_FILE * output, struct cons_pointer map_pointer ) { + struct vector_space_object *vso = + pointer2cell( map_pointer ).payload.vectorp.address; + + if (vectorpointp(map_pointer) && mapp( vso ) ) { + struct map_payload *payload = get_map_payload( map_pointer ); + + if ( payload != NULL ) { + url_fputws( L"Immutable map; hash function: ", output ); + + if (nilp(payload->hash_function)) { + url_fputws( L"default", output); + } else { + dump_object( output, payload->hash_function); + } + + for (int i = 0; i < BUCKETSINMAP; i++) { + url_fwprintf(output, L"\n\tBucket %d: ", i); + print( output, payload->buckets[i]); + } + } + } +} + diff --git a/src/memory/map.h b/src/memory/map.h new file mode 100644 index 0000000..c9b5cfc --- /dev/null +++ b/src/memory/map.h @@ -0,0 +1,96 @@ +/* + * map.h + * + * An immutable hashmap in vector space. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_map_h +#define __psse_map_h + +#include "consspaceobject.h" +#include "conspage.h" + +/** + * macros for the tag of a mutable map. + */ +#define MAPTAG "IMAP" +#define MAPTV 1346456905 + +/** + * Number of buckets in a single tier map. + */ +#define BUCKETSINMAP 256 + +/** + * Maximum number of entries in an association-list bucket. + */ +#define MAXENTRIESINASSOC 16 + +/** + * true if this vector_space_object is a map, else false. + */ +#define mapp( vso) (((struct vector_space_object *)vso)->header.tag.value == MAPTV) + +/** + * The vector-space payload of a map object. Essentially a vector of + * `BUCKETSINMAP` + 1 `cons_pointer`s, but the first one is considered + * special. + */ +struct map_payload { + /** + * There is a default hash function, which is used if `hash_function` is + * `nil` (which it normally should be); and keywords will probably carry + * their own hash values. But it will be possible to override the hash + * function by putting a function of one argument returning an integer + * here. */ + struct cons_pointer hash_function; + + /** + * Obviously the number of buckets in a map is a trade off, and this may need + * tuning - or it may even be necessary to have different sized base maps. The + * idea here is that the value of a bucket is + * + * 1. `nil`; or + * 2. an association list; or + * 3. a map. + * + * All buckets are initially `nil`. Adding a value to a `nil` bucket returns + * a map with a new bucket in the form of an assoc list. Subsequent additions + * cons new key/value pairs onto the assoc list, until there are + * `MAXENTRIESINASSOC` pairs, at which point if a further value is added to + * the same bucket the bucket returned will be in the form of a second level + * map. My plan is that buckets the first level map will be indexed on the + * first sixteen bits of the hash value, those in the second on the second + * sixteen, and, potentially, so on. + */ + struct cons_pointer buckets[BUCKETSINMAP]; +}; + +uint32_t get_hash_32(struct cons_pointer f, struct cons_pointer key); + +struct map_payload *get_map_payload( struct cons_pointer pointer ); + +struct cons_pointer make_empty_map( struct cons_pointer hash_function ); + +struct cons_pointer bind_in_map( struct cons_pointer parent, + struct cons_pointer key, + struct cons_pointer value); + +struct cons_pointer keys( struct cons_pointer store); + +struct cons_pointer merge_into_map( struct cons_pointer parent, + struct cons_pointer to_merge); + +struct cons_pointer assoc_in_map( struct cons_pointer key, + struct cons_pointer map); + +struct cons_pointer lisp_make_map( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); + +void dump_map( URL_FILE * output, struct cons_pointer map_pointer ); + +#endif diff --git a/src/memory/stack.c b/src/memory/stack.c index cf68701..3f4a271 100644 --- a/src/memory/stack.c +++ b/src/memory/stack.c @@ -34,9 +34,9 @@ void set_reg( struct stack_frame *frame, int reg, struct cons_pointer value ) { debug_printf( DEBUG_STACK, L"Setting register %d to ", reg ); debug_print_object( value, DEBUG_STACK ); debug_println( DEBUG_STACK ); - dec_ref(frame->arg[reg]); /* if there was anything in that slot - * previously other than NIL, we need to decrement it; - * NIL won't be decremented as it is locked. */ + dec_ref( frame->arg[reg] ); /* if there was anything in that slot + * previously other than NIL, we need to decrement it; + * NIL won't be decremented as it is locked. */ frame->arg[reg] = value; inc_ref( value ); @@ -241,34 +241,34 @@ void free_stack_frame( struct stack_frame *frame ) { * @param output the stream * @param frame_pointer the pointer to the frame */ -void dump_frame( FILE * output, struct cons_pointer frame_pointer ) { +void dump_frame( URL_FILE * output, struct cons_pointer frame_pointer ) { struct stack_frame *frame = get_stack_frame( frame_pointer ); if ( frame != NULL ) { - fwprintf( output, L"Stack frame with %d arguments:\n", frame->args ); + url_fwprintf( output, L"Stack frame with %d arguments:\n", + frame->args ); for ( int arg = 0; arg < frame->args; arg++ ) { struct cons_space_object cell = pointer2cell( frame->arg[arg] ); - fwprintf( output, L"Arg %d:\t%c%c%c%c\tcount: %10u\tvalue: ", arg, - cell.tag.bytes[0], - cell.tag.bytes[1], cell.tag.bytes[2], cell.tag.bytes[3], - cell.count ); + url_fwprintf( output, L"Arg %d:\t%c%c%c%c\tcount: %10u\tvalue: ", + arg, cell.tag.bytes[0], cell.tag.bytes[1], + cell.tag.bytes[2], cell.tag.bytes[3], cell.count ); print( output, frame->arg[arg] ); - fputws( L"\n", output ); + url_fputws( L"\n", output ); } if ( !nilp( frame->more ) ) { - fputws( L"More: \t", output ); + url_fputws( L"More: \t", output ); print( output, frame->more ); - fputws( L"\n", output ); + url_fputws( L"\n", output ); } } } -void dump_stack_trace( FILE * output, struct cons_pointer pointer ) { +void dump_stack_trace( URL_FILE * output, struct cons_pointer pointer ) { if ( exceptionp( pointer ) ) { print( output, pointer2cell( pointer ).payload.exception.message ); - fputws( L"\n", output ); + url_fputws( L"\n", output ); dump_stack_trace( output, pointer2cell( pointer ).payload.exception.frame ); } else { diff --git a/src/memory/stack.h b/src/memory/stack.h index 11763b2..f132c69 100644 --- a/src/memory/stack.h +++ b/src/memory/stack.h @@ -18,12 +18,12 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ +#ifndef __psse_stack_h +#define __psse_stack_h + #include "consspaceobject.h" #include "conspage.h" -#ifndef __stack_h -#define __stack_h - /** * macros for the tag of a stack frame. */ @@ -47,9 +47,9 @@ struct cons_pointer make_stack_frame( struct cons_pointer previous, void free_stack_frame( struct stack_frame *frame ); -void dump_frame( FILE * output, struct cons_pointer pointer ); +void dump_frame( URL_FILE * output, struct cons_pointer pointer ); -void dump_stack_trace( FILE * output, struct cons_pointer frame_pointer ); +void dump_stack_trace( URL_FILE * output, struct cons_pointer frame_pointer ); struct cons_pointer fetch_arg( struct stack_frame *frame, unsigned int n ); diff --git a/src/memory/vectorspace.c b/src/memory/vectorspace.c index 9d98a77..480effb 100644 --- a/src/memory/vectorspace.c +++ b/src/memory/vectorspace.c @@ -36,7 +36,8 @@ * @return a cons_pointer to the object, or NIL if the object could not be * allocated due to memory exhaustion. */ -struct cons_pointer make_vec_pointer( struct vector_space_object *address, char *tag ) { +struct cons_pointer make_vec_pointer( struct vector_space_object *address, + char *tag ) { debug_print( L"Entered make_vec_pointer\n", DEBUG_ALLOC ); struct cons_pointer pointer = allocate_cell( VECTORPOINTTAG ); struct cons_space_object *cell = &pointer2cell( pointer ); @@ -46,7 +47,7 @@ struct cons_pointer make_vec_pointer( struct vector_space_object *address, char address ); cell->payload.vectorp.address = address; - strncpy(&cell->payload.vectorp.tag.bytes[0], tag, TAGLENGTH); + strncpy( &cell->payload.vectorp.tag.bytes[0], tag, TAGLENGTH ); debug_printf( DEBUG_ALLOC, L"make_vec_pointer: all good, returning pointer to %p\n", diff --git a/src/ops/equal.c b/src/ops/equal.c index 0c01a81..c4d7f54 100644 --- a/src/ops/equal.c +++ b/src/ops/equal.c @@ -67,6 +67,7 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) { && equal( cell_a->payload.cons.cdr, cell_b->payload.cons.cdr ); break; + case KEYTV: case STRINGTV: case SYMBOLTV: /* diff --git a/src/ops/intern.c b/src/ops/intern.c index 1e32a36..cf86e6b 100644 --- a/src/ops/intern.c +++ b/src/ops/intern.c @@ -24,6 +24,7 @@ #include "debug.h" #include "equal.h" #include "lispops.h" +#include "map.h" #include "print.h" /** @@ -51,7 +52,7 @@ struct cons_pointer internedp( struct cons_pointer key, struct cons_pointer store ) { struct cons_pointer result = NIL; - if ( symbolp( key ) ) { + if ( symbolp( key ) || keywordp( key ) ) { for ( struct cons_pointer next = store; nilp( result ) && consp( next ); next = pointer2cell( next ).payload.cons.cdr ) { @@ -73,7 +74,7 @@ internedp( struct cons_pointer key, struct cons_pointer store ) { debug_print_object( key, DEBUG_BIND ); debug_print( L"` is a ", DEBUG_BIND ); debug_print_object( c_type( key ), DEBUG_BIND ); - debug_print( L", not a SYMB", DEBUG_BIND ); + debug_print( L", not a KEYW or SYMB", DEBUG_BIND ); } return result; @@ -88,20 +89,32 @@ internedp( struct cons_pointer key, struct cons_pointer store ) { * of that key from the store; otherwise return NIL. */ struct cons_pointer c_assoc( struct cons_pointer key, - struct cons_pointer store ) { + struct cons_pointer store ) { struct cons_pointer result = NIL; - for ( struct cons_pointer next = store; - consp( next ); next = pointer2cell( next ).payload.cons.cdr ) { - struct cons_space_object entry = - pointer2cell( pointer2cell( next ).payload.cons.car ); + debug_print( L"c_assoc; key is `", DEBUG_BIND); + debug_print_object( key, DEBUG_BIND); + debug_print( L"`\n", DEBUG_BIND); - if ( equal( key, entry.payload.cons.car ) ) { - result = entry.payload.cons.cdr; - break; + if (consp(store)) { + for ( struct cons_pointer next = store; + consp( next ); next = pointer2cell( next ).payload.cons.cdr ) { + struct cons_space_object entry = + pointer2cell( pointer2cell( next ).payload.cons.car ); + + if ( equal( key, entry.payload.cons.car ) ) { + result = entry.payload.cons.cdr; + break; + } } + } else if (vectorpointp( store)) { + result = assoc_in_map( key, store); } + debug_print( L"c_assoc returning ", DEBUG_BIND); + debug_print_object( result, DEBUG_BIND); + debug_println( DEBUG_BIND); + return result; } @@ -110,15 +123,29 @@ struct cons_pointer c_assoc( struct cons_pointer key, * with this key/value pair added to the front. */ struct cons_pointer -bind( struct cons_pointer key, struct cons_pointer value, - struct cons_pointer store ) { - debug_print( L"Binding ", DEBUG_BIND ); + set( struct cons_pointer key, struct cons_pointer value, + struct cons_pointer store ) { + struct cons_pointer result = NIL; + + debug_print( L"set: binding `", DEBUG_BIND ); debug_print_object( key, DEBUG_BIND ); - debug_print( L" to ", DEBUG_BIND ); + debug_print( L"` to `", DEBUG_BIND ); debug_print_object( value, DEBUG_BIND ); + debug_print( L"` in store ", DEBUG_BIND ); + debug_dump_object( store, DEBUG_BIND); debug_println( DEBUG_BIND ); - return make_cons( make_cons( key, value ), store ); + if (nilp( store) || consp(store)) { + result = make_cons( make_cons( key, value ), store ); + } else if (vectorpointp( store)) { + result = bind_in_map( store, key, value); + } + + debug_print( L"set returning ", DEBUG_BIND); + debug_print_object( result, DEBUG_BIND); + debug_println( DEBUG_BIND); + + return result; } /** @@ -131,11 +158,19 @@ deep_bind( struct cons_pointer key, struct cons_pointer value ) { debug_print( L"Entering deep_bind\n", DEBUG_BIND ); struct cons_pointer old = oblist; - oblist = bind( key, value, oblist ); + debug_print( L"deep_bind: binding `", DEBUG_BIND ); + debug_print_object( key, DEBUG_BIND ); + debug_print( L"` to ", DEBUG_BIND ); + debug_print_object( value, DEBUG_BIND ); + debug_println( DEBUG_BIND ); + + oblist = set( key, value, oblist ); inc_ref( oblist ); dec_ref( old ); - debug_print( L"Leaving deep_bind\n", DEBUG_BIND ); + debug_print( L"deep_bind returning ", DEBUG_BIND ); + debug_print_object( oblist, DEBUG_BIND ); + debug_println( DEBUG_BIND ); return oblist; } @@ -153,7 +188,7 @@ intern( struct cons_pointer key, struct cons_pointer environment ) { /* * not currently bound */ - result = bind( key, NIL, environment ); + result = set( key, NIL, environment ); } return result; diff --git a/src/ops/intern.h b/src/ops/intern.h index b261242..fa17563 100644 --- a/src/ops/intern.h +++ b/src/ops/intern.h @@ -28,9 +28,9 @@ struct cons_pointer c_assoc( struct cons_pointer key, struct cons_pointer internedp( struct cons_pointer key, struct cons_pointer environment ); -struct cons_pointer bind( struct cons_pointer key, - struct cons_pointer value, - struct cons_pointer store ); +struct cons_pointer set( struct cons_pointer key, + struct cons_pointer value, + struct cons_pointer store ); struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value ); diff --git a/src/ops/lispops.c b/src/ops/lispops.c index c80d965..4e2ddbf 100644 --- a/src/ops/lispops.c +++ b/src/ops/lispops.c @@ -31,10 +31,13 @@ #include "equal.h" #include "integer.h" #include "intern.h" +#include "io.h" #include "lispops.h" +#include "map.h" #include "print.h" #include "read.h" #include "stack.h" +#include "vectorspace.h" /* * also to create in this section: @@ -46,32 +49,6 @@ * and others I haven't thought of yet. */ -/** - * Implementation of car in C. If arg is not a cons, does not error but returns nil. - */ -struct cons_pointer c_car( struct cons_pointer arg ) { - struct cons_pointer result = NIL; - - if ( consp( arg ) ) { - result = pointer2cell( arg ).payload.cons.car; - } - - return result; -} - -/** - * Implementation of cdr in C. If arg is not a cons, does not error but returns nil. - */ -struct cons_pointer c_cdr( struct cons_pointer arg ) { - struct cons_pointer result = NIL; - - if ( consp( arg ) || stringp( arg ) || symbolp( arg ) ) { - result = pointer2cell( arg ).payload.cons.cdr; - } - - return result; -} - /** * Useful building block; evaluate this single form in the context of this @@ -231,7 +208,7 @@ eval_lambda( struct cons_space_object cell, struct stack_frame *frame, struct cons_pointer name = c_car( names ); struct cons_pointer val = frame->arg[i]; - new_env = bind( name, val, new_env ); + new_env = set( name, val, new_env ); log_binding( name, val ); names = c_cdr( names ); @@ -256,7 +233,7 @@ eval_lambda( struct cons_space_object cell, struct stack_frame *frame, } } - new_env = bind( names, vals, new_env ); + new_env = set( names, vals, new_env ); inc_ref( new_env ); } @@ -294,8 +271,8 @@ eval_lambda( struct cons_space_object cell, struct stack_frame *frame, * @return the result of evaluating the function with its arguments. */ struct cons_pointer -c_apply( struct stack_frame *frame, struct cons_pointer frame_pointer, - struct cons_pointer env ) { + c_apply( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { debug_print( L"Entering c_apply\n", DEBUG_EVAL ); struct cons_pointer result = NIL; @@ -310,97 +287,122 @@ c_apply( struct stack_frame *frame, struct cons_pointer frame_pointer, switch ( fn_cell.tag.value ) { case EXCEPTIONTV: - /* just pass exceptions straight back */ - result = fn_pointer; - break; - case FUNCTIONTV: - { - struct cons_pointer exep = NIL; - struct cons_pointer next_pointer = - make_stack_frame( frame_pointer, args, env ); - inc_ref( next_pointer ); - if ( exceptionp( next_pointer ) ) { - result = next_pointer; - } else { - struct stack_frame *next = - get_stack_frame( next_pointer ); + /* just pass exceptions straight back */ + result = fn_pointer; + break; - result = - ( *fn_cell.payload.function.executable ) ( next, - next_pointer, - env ); - dec_ref( next_pointer ); - } + case FUNCTIONTV: + { + struct cons_pointer exep = NIL; + struct cons_pointer next_pointer = + make_stack_frame( frame_pointer, args, env ); + inc_ref( next_pointer ); + if ( exceptionp( next_pointer ) ) { + result = next_pointer; + } else { + struct stack_frame *next = + get_stack_frame( next_pointer ); + + result = + ( *fn_cell.payload.function.executable ) ( next, + next_pointer, + env ); + dec_ref( next_pointer ); } - break; + } + break; + + case KEYTV: + result = c_assoc( fn_pointer, + eval_form(frame, + frame_pointer, + c_car( c_cdr( frame->arg[0])), + env)); + break; + case LAMBDATV: - { - struct cons_pointer exep = NIL; - struct cons_pointer next_pointer = - make_stack_frame( frame_pointer, args, env ); - inc_ref( next_pointer ); - if ( exceptionp( next_pointer ) ) { - result = next_pointer; - } else { - struct stack_frame *next = - get_stack_frame( next_pointer ); - result = - eval_lambda( fn_cell, next, next_pointer, env ); - if ( !exceptionp( result ) ) { - dec_ref( next_pointer ); - } + { + struct cons_pointer exep = NIL; + struct cons_pointer next_pointer = + make_stack_frame( frame_pointer, args, env ); + inc_ref( next_pointer ); + if ( exceptionp( next_pointer ) ) { + result = next_pointer; + } else { + struct stack_frame *next = + get_stack_frame( next_pointer ); + result = + eval_lambda( fn_cell, next, next_pointer, env ); + if ( !exceptionp( result ) ) { + dec_ref( next_pointer ); } } + } + break; + + case VECTORPOINTTV: + switch ( pointer_to_vso(fn_pointer)->header.tag.value) { + case MAPTV: + /* \todo: if arg[0] is a CONS, treat it as a path */ + result = c_assoc( eval_form(frame, + frame_pointer, + c_car( c_cdr( frame->arg[0])), + env), + fn_pointer); break; + } + break; + case NLAMBDATV: - { - struct cons_pointer next_pointer = - make_special_frame( frame_pointer, args, env ); - inc_ref( next_pointer ); - if ( exceptionp( next_pointer ) ) { - result = next_pointer; - } else { - struct stack_frame *next = - get_stack_frame( next_pointer ); - result = - eval_lambda( fn_cell, next, next_pointer, env ); - dec_ref( next_pointer ); - } + { + struct cons_pointer next_pointer = + make_special_frame( frame_pointer, args, env ); + inc_ref( next_pointer ); + if ( exceptionp( next_pointer ) ) { + result = next_pointer; + } else { + struct stack_frame *next = + get_stack_frame( next_pointer ); + result = + eval_lambda( fn_cell, next, next_pointer, env ); + dec_ref( next_pointer ); } - break; + } + break; + case SPECIALTV: - { - struct cons_pointer next_pointer = - make_special_frame( frame_pointer, args, env ); - inc_ref( next_pointer ); - if ( exceptionp( next_pointer ) ) { - result = next_pointer; - } else { - result = - ( *fn_cell.payload. - special.executable ) ( get_stack_frame - ( next_pointer ), - next_pointer, env ); - debug_print( L"Special form returning: ", DEBUG_EVAL ); - debug_print_object( result, DEBUG_EVAL ); - debug_println( DEBUG_EVAL ); - dec_ref( next_pointer ); - } + { + struct cons_pointer next_pointer = + make_special_frame( frame_pointer, args, env ); + inc_ref( next_pointer ); + if ( exceptionp( next_pointer ) ) { + result = next_pointer; + } else { + result = + ( *fn_cell.payload.special. + executable ) ( get_stack_frame( next_pointer ), + next_pointer, env ); + debug_print( L"Special form returning: ", DEBUG_EVAL ); + debug_print_object( result, DEBUG_EVAL ); + debug_println( DEBUG_EVAL ); + dec_ref( next_pointer ); } - break; + } + break; + default: - { - int bs = sizeof( wchar_t ) * 1024; - wchar_t *buffer = malloc( bs ); - memset( buffer, '\0', bs ); - swprintf( buffer, bs, - L"Unexpected cell with tag %d (%4.4s) in function position", - fn_cell.tag.value, &fn_cell.tag.bytes[0] ); - struct cons_pointer message = - c_string_to_lisp_string( buffer ); - free( buffer ); - result = throw_exception( message, frame_pointer ); - } + { + int bs = sizeof( wchar_t ) * 1024; + wchar_t *buffer = malloc( bs ); + memset( buffer, '\0', bs ); + swprintf( buffer, bs, + L"Unexpected cell with tag %d (%4.4s) in function position", + fn_cell.tag.value, &fn_cell.tag.bytes[0] ); + struct cons_pointer message = + c_string_to_lisp_string( buffer ); + free( buffer ); + result = throw_exception( message, frame_pointer ); + } } } @@ -411,24 +413,6 @@ c_apply( struct stack_frame *frame, struct cons_pointer frame_pointer, return result; } - -/** - * Get the Lisp type of the single argument. - * @param pointer a pointer to the object whose type is requested. - * @return As a Lisp string, the tag of the object which is at that pointer. - */ -struct cons_pointer c_type( struct cons_pointer pointer ) { - struct cons_pointer result = NIL; - struct cons_space_object cell = pointer2cell( pointer ); - - for ( int i = TAGLENGTH - 1; i >= 0; i-- ) { - result = make_string( ( wchar_t ) cell.tag.bytes[i], result ); - } - - return result; -} - - /** * Function; evaluate the expression which is the first argument in the frame; * further arguments are ignored. @@ -460,9 +444,7 @@ lisp_eval( struct stack_frame *frame, struct cons_pointer frame_pointer, switch ( cell.tag.value ) { case CONSTV: - { result = c_apply( frame, frame_pointer, env ); - } break; case SYMBOLTV: @@ -627,10 +609,10 @@ lisp_set_shriek( struct stack_frame *frame, struct cons_pointer frame_pointer, * @return true if `arg` represents an end of string, else false. * \todo candidate for moving to a memory/string.c file */ -bool end_of_stringp(struct cons_pointer arg) { - return nilp(arg) || - ( stringp( arg ) && - pointer2cell(arg).payload.string.character == (wint_t)'\0'); +bool end_of_stringp( struct cons_pointer arg ) { + return nilp( arg ) || + ( stringp( arg ) && + pointer2cell( arg ).payload.string.character == ( wint_t ) '\0' ); } /** @@ -656,8 +638,8 @@ lisp_cons( struct stack_frame *frame, struct cons_pointer frame_pointer, if ( nilp( car ) && nilp( cdr ) ) { return NIL; } else if ( stringp( car ) && stringp( cdr ) && - end_of_stringp( c_cdr( car)) ) { - // \todo check that car is of length 1 + end_of_stringp( c_cdr( car ) ) ) { + // \todo check that car is of length 1 result = make_string( pointer2cell( car ).payload.string.character, cdr ); } else { @@ -690,11 +672,12 @@ lisp_car( struct stack_frame *frame, struct cons_pointer frame_pointer, case CONSTV: result = cell.payload.cons.car; break; - case READTV: - result = make_string( fgetwc( cell.payload.stream.stream ), NIL ); - break; case NILTV: break; + case READTV: + result = + make_string( url_fgetwc( cell.payload.stream.stream ), NIL ); + break; case STRINGTV: result = make_string( cell.payload.string.character, NIL ); break; @@ -733,15 +716,15 @@ lisp_cdr( struct stack_frame *frame, struct cons_pointer frame_pointer, case CONSTV: result = cell.payload.cons.cdr; break; + case NILTV: + break; case READTV: - fgetwc( cell.payload.stream.stream ); + url_fgetwc( cell.payload.stream.stream ); result = frame->arg[0]; break; case STRINGTV: result = cell.payload.string.cdr; break; - case NILTV: - break; default: result = throw_exception( c_string_to_lisp_string @@ -752,6 +735,22 @@ lisp_cdr( struct stack_frame *frame, struct cons_pointer frame_pointer, return result; } +/** + * Function: return, as an integer, the length of the sequence indicated by + * the first argument, or zero if it is not a sequence. + * + * * (length any) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment (ignored). + * @return the length of `any`, if it is a sequence, or zero otherwise. + */ +struct cons_pointer lisp_length( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + return make_integer( c_length( frame->arg[0]), NIL); +} + /** * Function; look up the value of a `key` in a `store`. * @@ -839,7 +838,8 @@ lisp_read( struct stack_frame *frame, struct cons_pointer frame_pointer, #ifdef DEBUG debug_print( L"entering lisp_read\n", DEBUG_IO ); #endif - FILE *input = stdin; + URL_FILE *input; + struct cons_pointer in_stream = readp( frame->arg[0] ) ? frame->arg[0] : get_default_stream( true, env ); @@ -848,6 +848,8 @@ lisp_read( struct stack_frame *frame, struct cons_pointer frame_pointer, debug_dump_object( in_stream, DEBUG_IO ); input = pointer2cell( in_stream ).payload.stream.stream; inc_ref( in_stream ); + } else { + input = file_to_url_file( stdin ); } struct cons_pointer result = read( frame, frame_pointer, input ); @@ -856,8 +858,11 @@ lisp_read( struct stack_frame *frame, struct cons_pointer frame_pointer, if ( readp( in_stream ) ) { dec_ref( in_stream ); + } else { + free( input ); } + return result; } @@ -878,7 +883,9 @@ struct cons_pointer c_reverse( struct cons_pointer arg ) { result = make_string( o.payload.string.character, result ); break; case SYMBOLTV: - result = make_symbol( o.payload.string.character, result ); + result = + make_symbol_or_key( o.payload.string.character, result, + SYMBOLTAG ); break; } } @@ -922,7 +929,7 @@ lisp_print( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ) { debug_print( L"Entering print\n", DEBUG_IO ); struct cons_pointer result = NIL; - FILE *output = stdout; + URL_FILE *output; struct cons_pointer out_stream = writep( frame->arg[1] ) ? frame->arg[1] : get_default_stream( false, env ); @@ -931,6 +938,8 @@ lisp_print( struct stack_frame *frame, struct cons_pointer frame_pointer, debug_dump_object( out_stream, DEBUG_IO ); output = pointer2cell( out_stream ).payload.stream.stream; inc_ref( out_stream ); + } else { + output = file_to_url_file( stderr ); } debug_print( L"lisp_print: about to print\n", DEBUG_IO ); @@ -943,6 +952,8 @@ lisp_print( struct stack_frame *frame, struct cons_pointer frame_pointer, if ( writep( out_stream ) ) { dec_ref( out_stream ); + } else { + free( output ); } return result; @@ -1035,7 +1046,7 @@ lisp_progn( struct stack_frame *frame, struct cons_pointer frame_pointer, * @return the value of the last expression of the first successful `clause`. */ struct cons_pointer - lisp_cond( struct stack_frame *frame, struct cons_pointer frame_pointer, +lisp_cond( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ) { struct cons_pointer result = NIL; bool done = false; @@ -1148,7 +1159,7 @@ struct cons_pointer lisp_repl( struct stack_frame *frame, struct cons_pointer input = get_default_stream( true, env ); struct cons_pointer output = get_default_stream( false, env ); - FILE *os = pointer2cell( output ).payload.stream.stream; + URL_FILE *os = pointer2cell( output ).payload.stream.stream; struct cons_pointer prompt_name = c_string_to_lisp_symbol( L"*prompt*" ); struct cons_pointer old_oblist = oblist; struct cons_pointer new_env = env; @@ -1165,7 +1176,7 @@ struct cons_pointer lisp_repl( struct stack_frame *frame, * print as parent. */ while ( readp( input ) && writep( output ) - && !feof( pointer2cell( input ).payload.stream.stream ) ) { + && !url_feof( pointer2cell( input ).payload.stream.stream ) ) { /* OK, here's a really subtle problem: because lists are immutable, anything * bound in the oblist subsequent to this function being invoked isn't in the * environment. So, for example, changes to *prompt* or *log* made in the oblist @@ -1203,7 +1214,7 @@ struct cons_pointer lisp_repl( struct stack_frame *frame, inc_ref( expr ); if ( exceptionp( expr ) - && feof( pointer2cell( input ).payload.stream.stream ) ) { + && url_feof( pointer2cell( input ).payload.stream.stream ) ) { /* suppress printing end of stream exception */ break; } @@ -1240,13 +1251,13 @@ struct cons_pointer lisp_source( struct stack_frame *frame, struct cons_pointer env ) { struct cons_pointer result = NIL; struct cons_space_object cell = pointer2cell( frame->arg[0] ); - + struct cons_pointer source_key = c_string_to_lisp_keyword( L"source" ); switch ( cell.tag.value ) { case FUNCTIONTV: - result = cell.payload.function.source; + result = c_assoc( source_key, cell.payload.function.meta ); break; case SPECIALTV: - result = cell.payload.special.source; + result = c_assoc( source_key, cell.payload.special.meta ); break; case LAMBDATV: result = make_cons( c_string_to_lisp_symbol( L"lambda" ), @@ -1282,7 +1293,7 @@ struct cons_pointer lisp_inspect( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ) { debug_print( L"Entering print\n", DEBUG_IO ); - FILE *output = stdout; + URL_FILE *output; struct cons_pointer out_stream = writep( frame->arg[1] ) ? frame->arg[1] : get_default_stream( false, env ); @@ -1291,11 +1302,17 @@ struct cons_pointer lisp_inspect( struct stack_frame *frame, debug_dump_object( out_stream, DEBUG_IO ); output = pointer2cell( out_stream ).payload.stream.stream; inc_ref( out_stream ); + } else { + output = file_to_url_file( stdout ); } + dump_object( output, frame->arg[0] ); + url_fputws( L"\n", output ); if ( writep( out_stream ) ) { dec_ref( out_stream ); + } else { + free( output ); } return frame->arg[0]; diff --git a/src/ops/lispops.h b/src/ops/lispops.h index 1aff486..122635f 100644 --- a/src/ops/lispops.h +++ b/src/ops/lispops.h @@ -19,26 +19,13 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ +#ifndef __psse_lispops_h +#define __psse_lispops_h + /* * utilities */ -/** - * Get the Lisp type of the single argument. - * @param pointer a pointer to the object whose type is requested. - * @return As a Lisp string, the tag of the object which is at that pointer. - */ -struct cons_pointer c_type( struct cons_pointer pointer ); - -/** - * Implementation of car in C. If arg is not a cons, does not error but returns nil. - */ -struct cons_pointer c_car( struct cons_pointer arg ); - -/** - * Implementation of cdr in C. If arg is not a cons, does not error but returns nil. - */ -struct cons_pointer c_cdr( struct cons_pointer arg ); struct cons_pointer c_reverse( struct cons_pointer arg ); @@ -98,7 +85,9 @@ lisp_set_shriek( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer lisp_lambda( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ); - +struct cons_pointer lisp_length( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); /** * Construct an interpretable special form. * @@ -205,3 +194,5 @@ struct cons_pointer lisp_source( struct stack_frame *frame, struct cons_pointer lisp_inspect( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ); + +#endif diff --git a/src/ops/meta.c b/src/ops/meta.c new file mode 100644 index 0000000..a27d2af --- /dev/null +++ b/src/ops/meta.c @@ -0,0 +1,45 @@ +/* + * meta.c + * + * Get metadata from a cell which has it. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "conspage.h" +#include "debug.h" + +/** + * Function: get metadata describing my first argument. + * + * * (metadata any) + * + * @return a pointer to the metadata of my first argument, or nil if none. + */ +struct cons_pointer lisp_metadata( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ) { + debug_print( L"lisp_metadata: entered\n", DEBUG_EVAL ); + debug_dump_object( frame->arg[0], DEBUG_EVAL ); + struct cons_pointer result = NIL; + struct cons_space_object cell = pointer2cell( frame->arg[0] ); + + switch ( cell.tag.value ) { + case FUNCTIONTV: + result = cell.payload.function.meta; + break; + case SPECIALTV: + result = cell.payload.special.meta; + break; + case READTV: + case WRITETV: + result = cell.payload.stream.meta; + break; + } + + return make_cons( make_cons( c_string_to_lisp_keyword( L"type" ), + c_type( frame->arg[0] ) ), result ); + +// return result; +} diff --git a/src/ops/meta.h b/src/ops/meta.h new file mode 100644 index 0000000..f441a50 --- /dev/null +++ b/src/ops/meta.h @@ -0,0 +1,18 @@ +/* + * meta.h + * + * Get metadata from a cell which has it. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_meta_h +#define __psse_meta_h + + +struct cons_pointer lisp_metadata( struct stack_frame *frame, + struct cons_pointer frame_pointer, + struct cons_pointer env ); + +#endif diff --git a/src/time/psse_time.c b/src/time/psse_time.c new file mode 100644 index 0000000..76f52a9 --- /dev/null +++ b/src/time/psse_time.c @@ -0,0 +1,107 @@ +/* + * psse_time.c + * + * Bare bones of PSSE time. See issue #16. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include +/* + * wide characters + */ +#include +#include + +#include "conspage.h" +#include "consspaceobject.h" +#include "integer.h" +#include "psse_time.h" +#define _GNU_SOURCE + +#define seconds_per_year 31557600L + +/** + * PSSE Lisp epoch is 14 Bn years, or 441,806,400,000,000,000 seconds, before + * the UNIX epoch; the value in microseconds will break the C reader. + */ +unsigned __int128 epoch_offset = ((__int128)(seconds_per_year * 1000000000L) * + (__int128)(14L * 1000000000L)); + +/** + * Return the UNIX time value which represents this time, if it falls within + * the period representable in UNIX time, or zero otherwise. + */ +long int lisp_time_to_unix_time(struct cons_pointer t) { + long int result = 0; + + if (timep( t)) { + unsigned __int128 value = pointer2cell(t).payload.time.value; + + if (value > epoch_offset) { // \todo && value < UNIX time rollover + result = ((value - epoch_offset) / 1000000000); + } + } + + return result; +} + +unsigned __int128 unix_time_to_lisp_time( time_t t) { + unsigned __int128 result = epoch_offset + (t * 1000000000); + + return result; +} + +struct cons_pointer make_time( struct cons_pointer integer_or_nil) { + struct cons_pointer pointer = allocate_cell( TIMETAG ); + struct cons_space_object *cell = &pointer2cell( pointer ); + + if (integerp(integer_or_nil)) { + cell->payload.time.value = pointer2cell(integer_or_nil).payload.integer.value; + // \todo: if integer is a bignum, deal with it. + } else { + cell->payload.time.value = unix_time_to_lisp_time( time(NULL)); + } + + return pointer; +} + +/** + * Function; return a time representation of the first argument in the frame; + * further arguments are ignored. + * + * * (time integer_or_nil) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment. + * @return a lisp time; if `integer_or_nil` is an integer, return a time which + * is that number of microseconds after the notional big bang; else the current + * time. + */ +struct cons_pointer lisp_time( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ) { + return make_time( frame->arg[0]); +} + +/** + * This is temporary, for bootstrapping. + */ +struct cons_pointer time_to_string( struct cons_pointer pointer) { + struct cons_pointer result = NIL; + long int t = lisp_time_to_unix_time(pointer); + + if ( t != 0) { + char * bytes = ctime(&t); + int l = strlen(bytes) + 1; + wchar_t buffer[ l]; + + mbstowcs( buffer, bytes, l); + result = c_string_to_lisp_string( buffer); + } + + return result; +} diff --git a/src/time/psse_time.h b/src/time/psse_time.h new file mode 100644 index 0000000..af70966 --- /dev/null +++ b/src/time/psse_time.h @@ -0,0 +1,20 @@ +/* + * psse_time.h + * + * Bare bones of PSSE time. See issue #16. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_time_h +#define __psse_time_h + +#define _GNU_SOURCE +#include "consspaceobject.h" + +struct cons_pointer lisp_time( struct stack_frame *frame, struct cons_pointer frame_pointer, + struct cons_pointer env ); +struct cons_pointer time_to_string( struct cons_pointer pointer); + +#endif diff --git a/src/utils.c b/src/utils.c new file mode 100644 index 0000000..9919dbe --- /dev/null +++ b/src/utils.c @@ -0,0 +1,33 @@ +/* + * utils.c + * + * little generally useful functions which aren't in any way special to PSSE. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include + + +int index_of( char c, const char *s ) { + int i; + + for ( i = 0; s[i] != c && s[i] != 0; i++ ); + + return s[i] == c ? i : -1; +} + +char *trim( char *s ) { + int i; + + for ( i = strlen( s ); ( isblank( s[i] ) || iscntrl( s[i] ) ) && i >= 0; + i-- ) { + s[i] = '\0'; + } + for ( i = 0; s[i] != '\0' && ( isblank( s[i] ) || iscntrl( s[i] ) ); i++ ); + + return ( char * ) &s[i]; +} diff --git a/src/utils.h b/src/utils.h new file mode 100644 index 0000000..456e4d0 --- /dev/null +++ b/src/utils.h @@ -0,0 +1,17 @@ +/* + * utils.h + * + * little generally useful functions which aren't in any way special to PSSE. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_utils_h +#define __psse_utils_h + +int index_of( char c, const char *s ); + +char *trim( char *s ); + +#endif diff --git a/unit-tests/bignum-print.sh b/unit-tests/bignum-print.sh index 5615871..d556e71 100755 --- a/unit-tests/bignum-print.sh +++ b/unit-tests/bignum-print.sh @@ -18,17 +18,6 @@ else exit 1 fi -echo -n "checking no bignum was created: " -grep -v 'BIGNUM!' psse.log > /dev/null -if [ $? -eq "0" ] -then - echo "OK" -else - echo "Fail" - exit 1 -fi - - ##################################################################### # right on the boundary @@ -48,17 +37,6 @@ else exit 1 fi -echo -n "checking no bignum was created: " -grep -v 'BIGNUM!' psse.log > /dev/null -if [ $? -eq "0" ] -then - echo "OK" -else - echo "Fail" - exit 1 -fi - - ##################################################################### # definitely a bignum @@ -79,16 +57,10 @@ else fi -echo -n "checking a bignum was created: " -grep 'BIGNUM!' psse.log > /dev/null -if [ $? -eq "0" ] -then - echo "OK" -else - echo "Fail" - exit 1 -fi - +# Currently failing from here on, but it's failing in read because of +# the multiply bug. We know printing blows up at the 3 cell boundary +# because `lisp/scratchpad2.lisp` constructs a 3 cell bignum by +# repeated addition. ##################################################################### # Just on the three cell boundary expected='1329227995784915872903807060280344576' @@ -103,7 +75,7 @@ if [ "${expected}" = "${actual}" ] then echo "OK" else - echo "Fail: expected '${expected}', got '${actual}'" + echo "Fail: expected '${expected}', \n got '${actual}'" exit 1 fi diff --git a/unit-tests/eval-quote-symbol.sh b/unit-tests/eval-quote-symbol.sh index 7e80c48..e977461 100755 --- a/unit-tests/eval-quote-symbol.sh +++ b/unit-tests/eval-quote-symbol.sh @@ -1,6 +1,6 @@ #!/bin/bash -expected='' +expected='' actual=`echo "(eval 'cond)" | target/psse | tail -1` if [ "${expected}" = "${actual}" ] diff --git a/unit-tests/map.sh b/unit-tests/map.sh new file mode 100755 index 0000000..f40c321 --- /dev/null +++ b/unit-tests/map.sh @@ -0,0 +1,89 @@ +#!/bin/bash + +##################################################################### +# Create an empty map using map notation +expected='{}' +actual=`echo "$expected" | target/psse | tail -1` + +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi + +##################################################################### +# Create an empty map using make-map +expected='{}' +actual=`echo "(make-map)" | target/psse | tail -1` + +echo -n "Empty map using (make-map): " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi + +##################################################################### +# Create a map using map notation: order of keys in output is not +# significant at this stage, but in the long term should be sorted +# alphanumerically +expected='{:two 2, :one 1, :three 3}' +actual=`echo "{:one 1 :two 2 :three 3}" | target/psse | tail -1` + +echo -n "Map using map notation: " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi + +##################################################################### +# Create a map using make-map: order of keys in output is not +# significant at this stage, but in the long term should be sorted +# alphanumerically +expected='{:two 2, :one 1, :three 3}' +actual=`echo "(make-map '((:one . 1)(:two . 2)(:three . 3)))" | target/psse | tail -1` + +echo -n "Map using (make-map): " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi + +##################################################################### +# Keyword in function position +expected='2' +actual=`echo "(:two {:one 1 :two 2 :three 3})" | target/psse | tail -1` + +echo -n "Keyword in function position: " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi + + +##################################################################### +# Map in function position +expected='2' +actual=`echo "({:one 1 :two 2 :three 3} :two)" | target/psse | tail -1` + +echo -n "Map in function position: " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi diff --git a/unit-tests/slurp.sh b/unit-tests/slurp.sh new file mode 100755 index 0000000..0a9bc7c --- /dev/null +++ b/unit-tests/slurp.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +tmp=hi.$$ +echo "Hello, there." > ${tmp} +expected='"Hello, there.' +actual=`echo "(slurp (open \"${tmp}\"))" | target/psse | tail -2 | head -1` + +if [ "${expected}" = "${actual}" ] +then + echo "OK" + rm ${tmp} + exit 0 +else + echo "Fail: expected '$expected', got '$actual'" + exit 1 +fi diff --git a/unit-tests/string-cons.sh b/unit-tests/string-cons.sh old mode 100644 new mode 100755 diff --git a/unit-tests/wide-character.sh b/unit-tests/wide-character.sh new file mode 100755 index 0000000..d56544e --- /dev/null +++ b/unit-tests/wide-character.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +expected='"λάμ(β)δα"' +actual=`echo $expected | target/psse | tail -1` + +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + exit 1 +fi diff --git a/where-does-it-break.sh b/where-does-it-break.sh deleted file mode 100755 index 4d70041..0000000 --- a/where-does-it-break.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Not really a unit test, but a check to see where bignum addition breaks - -broken=0 -i=11529215046068469750 -# we've already proven we can successfullu get up to here -increment=1 - -while [ $broken -eq "0" ] -do - expr="(+ $i $increment)" - # Use sbcl as our reference implementation... - expected=`echo "$expr" | sbcl --noinform | grep -v '*'` - actual=`echo "$expr" | target/psse | tail -1 | sed 's/\,//g'` - - echo -n "adding $increment to $i: " - - if [ "${expected}" = "${actual}" ] - then - echo "OK" - else - echo "Fail: expected '${expected}', got '${actual}'" - broken=1 - exit 1 - fi - - i=$expected -done