From 432ccb2d44586a5a533ee40c39c28e3a7ba9869c Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Fri, 13 Jan 2017 00:45:00 +0000 Subject: [PATCH] Frustrating. I think all the conversion to 'wide' (UTF) character handling is done, and all the existing unit tests pass - but UTF characters are nevertheless not read or printed correctly. --- src/consspaceobject.c | 7 ++-- src/consspaceobject.h | 5 ++- src/init.c | 5 +-- src/intern.c | 79 ++++++++++++++++++++++++++++++++++++------- src/intern.h | 17 +++++++++- src/lispops.c | 19 ++++++----- src/print.c | 7 ++-- src/read.c | 61 ++++++++++++++++----------------- 8 files changed, 139 insertions(+), 61 deletions(-) diff --git a/src/consspaceobject.c b/src/consspaceobject.c index 0ef8ab1..ae2721f 100644 --- a/src/consspaceobject.c +++ b/src/consspaceobject.c @@ -11,6 +11,9 @@ #include #include #include +/* wide characters */ +#include +#include #include "conspage.h" #include "consspaceobject.h" @@ -87,7 +90,7 @@ void dump_object( FILE* output, struct cons_pointer pointer) { } else if ( check_tag(pointer, REALTAG)) { fprintf( output, "\t\tReal cell: value %Lf\n", cell.payload.real.value); } else if ( check_tag( pointer, STRINGTAG)) { - fprintf( output, "\t\tString cell: character '%c' next at page %d offset %d\n", + fwprintf( output, L"\t\tString cell: character '%C' next at page %d offset %d\n", cell.payload.string.character, cell.payload.string.cdr.page, cell.payload.string.cdr.offset); }; @@ -124,7 +127,7 @@ struct cons_pointer make_string( char c, struct cons_pointer tail) { struct cons_space_object* cell = &pointer2cell(pointer); inc_ref(tail); - cell->payload.string.character = (uint32_t) c; + cell->payload.string.character = (wint_t) c; cell->payload.string.cdr.page = tail.page; cell->payload.string.cdr.offset = tail.offset; } else { diff --git a/src/consspaceobject.h b/src/consspaceobject.h index e151fc4..bc7d6be 100644 --- a/src/consspaceobject.h +++ b/src/consspaceobject.h @@ -11,6 +11,9 @@ #include #include #include +/* wide characters */ +#include +#include #ifndef __consspaceobject_h #define __consspaceobject_h @@ -155,7 +158,7 @@ struct real_payload { * payload of a string cell. At least at first, only one UTF character will be stored in each cell. */ struct string_payload { - uint32_t character; /* the actual character stored in this cell */ + wint_t character; /* the actual character stored in this cell */ uint32_t padding; /* unused padding to word-align the cdr */ struct cons_pointer cdr; }; diff --git a/src/init.c b/src/init.c index 63b8558..33c9836 100644 --- a/src/init.c +++ b/src/init.c @@ -1,8 +1,8 @@ /** * init.c * - * Start up and initialise the environement - just enough to get working and (ultimately) - * hand off to the executive. + * Start up and initialise the environement - just enough to get working + * and (ultimately) hand off to the executive. * * * (c) 2017 Simon Brooke @@ -16,6 +16,7 @@ #include "consspaceobject.h" #include "print.h" #include "read.h" +#include "lispops.h" int main (int argc, char *argv[]) { fprintf( stderr, "Post scarcity software environment version %s\n", VERSION); diff --git a/src/intern.c b/src/intern.c index 6770bdf..bf5b9f4 100644 --- a/src/intern.c +++ b/src/intern.c @@ -22,6 +22,7 @@ #include "equal.h" #include "conspage.h" #include "consspaceobject.h" +#include "equal.h" /** * The object list. What is added to this during system setup is 'global', that is, @@ -67,21 +68,56 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store) { } -/** - * Return true if this key is present as a key in this enviroment, defaulting to - * the oblist if no environment is passed. +/** + * Internal workings of internedp, q.v. Not intended to be called from anywhere + * else. Note that this is VERY similar to assoc, but returns the car (key) of + * the binding rather than the cdr (value). */ -bool internedp( struct cons_pointer key, struct cons_pointer environment) { - bool result = false; - - if ( nilp( environment)) { - if ( !nilp( oblist)) { - result = internedp( key, oblist); +struct cons_pointer __internedp( struct cons_pointer key, + struct cons_pointer store) { + struct cons_pointer result = NIL; + + if ( consp( store)) { + struct cons_space_object* cell_store = &pointer2cell( store); + + if ( consp( cell_store->payload.cons.car)) { + struct cons_space_object* binding = + &pointer2cell( cell_store->payload.cons.car); + + if ( equal( key, binding->payload.cons.car)) { + result = binding->payload.cons.car; + } + } + /* top-level objects on an assoc list ought to be conses (i.e. each + * successive car should be a cons), but there's no need to throw a + * wobbly if it isn't. */ + + if ( nilp( result)) { + result = assoc( key, cell_store->payload.cons.cdr); } - } else { - result = !nilp( assoc( key, environment)); } - + + return result; +} + + +/** + * Return the canonical version of this key if ut is present as a key in this + * enviroment, defaulting to the oblist if no environment is passed. Key is + * expected to be a string. + */ +struct cons_pointer internedp( struct cons_pointer key, + struct cons_pointer environment) { + struct cons_pointer result = NIL; + + if ( stringp( key)) { + if ( nilp( environment)) { + result = __internedp( key, oblist); + } else { + result = __internedp( key, environment); + } + } + return result; } @@ -105,3 +141,22 @@ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer valu oblist = bind( key, value, oblist); return oblist; } + + +/** + * Ensure that a canonical copy of this key is bound in this environment, and + * return that canonical copy. If there is currently no such binding, create one + * with the value NIL. + */ +struct cons_pointer intern( struct cons_pointer key, + struct cons_pointer environment) { + struct cons_pointer result = environment; + struct cons_pointer canonical = internedp( key, environment); + + if ( nilp( canonical)) { + /* not currently bound */ + result = bind( key, NIL, environment); + } + + return result; +} diff --git a/src/intern.h b/src/intern.h index 656fe62..148f05f 100644 --- a/src/intern.h +++ b/src/intern.h @@ -17,6 +17,10 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ + +#ifndef __intern_h +#define __intern_h + extern struct cons_pointer oblist; /** @@ -31,7 +35,8 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store); * Return true if this key is present as a key in this enviroment, defaulting to * the oblist if no environment is passed. */ -bool internedp( struct cons_pointer key, struct cons_pointer environment); +struct cons_pointer internedp( struct cons_pointer key, + struct cons_pointer environment); /** * Return a new key/value store containing all the key/value pairs in this store @@ -46,3 +51,13 @@ struct cons_pointer bind( struct cons_pointer key, struct cons_pointer value, * there it may not be especially useful). */ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value); + +/** + * Ensure that a canonical copy of this key is bound in this environment, and + * return that canonical copy. If there is currently no such binding, create one + * with the value NIL. + */ +struct cons_pointer intern( struct cons_pointer key, + struct cons_pointer environment); + +#endif diff --git a/src/lispops.c b/src/lispops.c index b59137d..b4efd11 100644 --- a/src/lispops.c +++ b/src/lispops.c @@ -43,7 +43,8 @@ struct cons_pointer i_eval_args( struct cons_pointer args, struct cons_pointer t struct cons_pointer result = NIL; if ( ! nilp( args)) { - result = make_cons( lisp_eval( lisp_car( args)), i_eval_args( lisp_cdr( args), tail)); + result = make_cons( lisp_eval( lisp_car( args, env), env), + i_eval_args( lisp_cdr( args, env), tail, env)); } return result; @@ -59,7 +60,7 @@ struct cons_pointer lisp_apply( struct cons_pointer args, struct cons_pointer en struct cons_pointer result = args; if ( consp( args)) { - lisp_eval( make_cons( lisp_car( args), i_eval_args( lisp_cdr( args), NIL))); + lisp_eval( make_cons( lisp_car( args, env), i_eval_args( lisp_cdr( args, env), NIL, env)), env); } return result; @@ -74,8 +75,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env if ( consp( args)) { /* the hard bit. Sort out what function is required and pass the * args to it. */ - struct cons_pointer fn_pointer lisp_car( args); - args = lisp_cdr( args); + struct cons_pointer fn_pointer = lisp_car( args, env); + args = lisp_cdr( args, env); if ( functionp( fn_pointer)) { struct cons_space_object function = pointer2cell( fn_pointer); @@ -90,8 +91,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env * also if the object is a consp it could be interpretable * source code but in the long run I don't want an interpreter, * and if I can get away without so much the better. */ - result = lisp_throw( args, env) - } + result = lisp_throw( args, env); + } } return result; @@ -110,7 +111,7 @@ struct cons_pointer lisp_cons( struct cons_pointer args, struct cons_pointer env struct cons_pointer d = pointer2cell( cell.payload.cons.cdr).payload.cons.car; result = make_cons( a, d); } else { - lisp_throw( args); + lisp_throw( args, env); } return result; @@ -126,7 +127,7 @@ struct cons_pointer lisp_car( struct cons_pointer args, struct cons_pointer env) struct cons_space_object cell = pointer2cell( args); result = pointer2cell( cell.payload.cons.car).payload.cons.car; } else { - lisp_throw( args); + lisp_throw( args, env); } return result; @@ -143,7 +144,7 @@ struct cons_pointer lisp_cdr( struct cons_pointer args, struct cons_pointer env) struct cons_space_object cell = pointer2cell( args); result = pointer2cell( cell.payload.cons.cdr).payload.cons.car; } else { - lisp_throw( args); + lisp_throw( args, env); } return result; diff --git a/src/print.c b/src/print.c index 479454e..a263d6f 100644 --- a/src/print.c +++ b/src/print.c @@ -11,6 +11,9 @@ #include #include #include +/* wide characters */ +#include +#include #include "conspage.h" #include "consspaceobject.h" @@ -20,10 +23,10 @@ void print_string_contents( FILE* output, struct cons_pointer pointer) { if ( check_tag( pointer, STRINGTAG)) { struct cons_space_object* cell = &pointer2cell(pointer); - char c = cell->payload.string.character; + wint_t c = cell->payload.string.character; if ( c != '\0') { - fputc( c, output); + fputwc( c, output); } print_string_contents( output, cell->payload.string.cdr); } diff --git a/src/read.c b/src/read.c index 2354420..f7a572f 100644 --- a/src/read.c +++ b/src/read.c @@ -8,9 +8,11 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ -#include #include #include +/* wide characters */ +#include +#include #include "consspaceobject.h" #include "integer.h" @@ -22,9 +24,9 @@ lists Can't read atoms because I don't yet know what an atom is or how it's stored. */ -struct cons_pointer read_number( FILE* input, char initial); -struct cons_pointer read_list( FILE* input, char initial); -struct cons_pointer read_string( FILE* input, char initial); +struct cons_pointer read_number( FILE* input, wint_t initial); +struct cons_pointer read_list( FILE* input, wint_t initial); +struct cons_pointer read_string( FILE* input, wint_t initial); /** @@ -32,34 +34,24 @@ struct cons_pointer read_string( FILE* input, char initial); * treating this initial character as the first character of the object * representation. */ -struct cons_pointer read_continuation( FILE* input, char initial) { +struct cons_pointer read_continuation( FILE* input, wint_t initial) { struct cons_pointer result = NIL; - char c; + wint_t c; - for (c = initial; c == '\0' || isblank( c); c = fgetc( input)); + for (c = initial; c == '\0' || iswblank( c); c = fgetwc( input)); switch( c) { case '(' : case ')': - result = read_list(input, fgetc( input)); + result = read_list(input, fgetwc( input)); break; - case '"': result = read_string(input, fgetc( input)); - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - // case '.': - result = read_number( input, c); + case '"': result = read_string(input, fgetwc( input)); break; default: + if ( iswdigit( c)) { + result = read_number( input, c); + } fprintf( stderr, "Unrecognised start of input character %c\n", c); } @@ -70,15 +62,15 @@ struct cons_pointer read_continuation( FILE* input, char initial) { /** * read a number from this input stream, given this initial character. */ -struct cons_pointer read_number( FILE* input, char initial) { +struct cons_pointer read_number( FILE* input, wint_t initial) { int accumulator = 0; int places_of_decimals = 0; bool seen_period = false; - char c; + wint_t c; fprintf( stderr, "read_number starting '%c' (%d)\n", initial, initial); - for (c = initial; isdigit( c); c = fgetc( input)) { + for (c = initial; iswdigit( c); c = fgetwc( input)) { if ( c == '.') { seen_period = true; } else { @@ -91,7 +83,7 @@ struct cons_pointer read_number( FILE* input, char initial) { } /* push back the character read which was not a digit */ - fputc( c, input); + fputwc( c, input); return make_integer( accumulator); } @@ -101,15 +93,15 @@ struct cons_pointer read_number( FILE* input, char initial) { * Read a list from this input stream, which no longer contains the opening * left parenthesis. */ -struct cons_pointer read_list( FILE* input, char initial) { +struct cons_pointer read_list( FILE* input, wint_t initial) { struct cons_pointer cdr = NIL; struct cons_pointer result= NIL; - fprintf( stderr, "read_list starting '%c' (%d)\n", initial, initial); + fwprintf( stderr, L"read_list starting '%C' (%d)\n", initial, initial); if ( initial != ')' ) { struct cons_pointer car = read_continuation( input, initial); - cdr = read_list( input, fgetc( input)); + cdr = read_list( input, fgetwc( input)); result = make_cons( car, cdr); } @@ -123,11 +115,11 @@ struct cons_pointer read_list( FILE* input, char initial) { * representation of a string, which is that there's no obvious representation of * an empty string. */ -struct cons_pointer read_string( FILE* input, char initial) { +struct cons_pointer read_string( FILE* input, wint_t initial) { struct cons_pointer cdr = NIL; struct cons_pointer result; - fprintf( stderr, "read_string starting '%c' (%d)\n", initial, initial); + fwprintf( stderr, L"read_string starting '%C' (%d)\n", initial, initial); switch ( initial) { case '\0': @@ -137,7 +129,7 @@ struct cons_pointer read_string( FILE* input, char initial) { result = make_string( '\0', NIL); break; default: - result = make_string( initial, read_string( input, fgetc( input))); + result = make_string( initial, read_string( input, fgetwc( input))); break; } @@ -151,6 +143,11 @@ struct cons_pointer read_string( FILE* input, char initial) { struct cons_pointer read( FILE* input) { return read_continuation( input, '\0'); } + + +struct cons_pointer lisp_read( struct cons_pointer args, struct cons_pointer env) { + return( read( stdin)); +}