Frustrating. I think all the conversion to 'wide' (UTF) character handling

is done, and all the existing unit tests pass - but UTF characters are
nevertheless not read or printed correctly.
This commit is contained in:
Simon Brooke 2017-01-13 00:45:00 +00:00
parent 472b58b900
commit 432ccb2d44
8 changed files with 139 additions and 61 deletions

View file

@ -11,6 +11,9 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#include "conspage.h" #include "conspage.h"
#include "consspaceobject.h" #include "consspaceobject.h"
@ -87,7 +90,7 @@ void dump_object( FILE* output, struct cons_pointer pointer) {
} else if ( check_tag(pointer, REALTAG)) { } else if ( check_tag(pointer, REALTAG)) {
fprintf( output, "\t\tReal cell: value %Lf\n", cell.payload.real.value); fprintf( output, "\t\tReal cell: value %Lf\n", cell.payload.real.value);
} else if ( check_tag( pointer, STRINGTAG)) { } else if ( check_tag( pointer, STRINGTAG)) {
fprintf( output, "\t\tString cell: character '%c' next at page %d offset %d\n", fwprintf( output, L"\t\tString cell: character '%C' next at page %d offset %d\n",
cell.payload.string.character, cell.payload.string.cdr.page, cell.payload.string.character, cell.payload.string.cdr.page,
cell.payload.string.cdr.offset); cell.payload.string.cdr.offset);
}; };
@ -124,7 +127,7 @@ struct cons_pointer make_string( char c, struct cons_pointer tail) {
struct cons_space_object* cell = &pointer2cell(pointer); struct cons_space_object* cell = &pointer2cell(pointer);
inc_ref(tail); inc_ref(tail);
cell->payload.string.character = (uint32_t) c; cell->payload.string.character = (wint_t) c;
cell->payload.string.cdr.page = tail.page; cell->payload.string.cdr.page = tail.page;
cell->payload.string.cdr.offset = tail.offset; cell->payload.string.cdr.offset = tail.offset;
} else { } else {

View file

@ -11,6 +11,9 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#ifndef __consspaceobject_h #ifndef __consspaceobject_h
#define __consspaceobject_h #define __consspaceobject_h
@ -155,7 +158,7 @@ struct real_payload {
* payload of a string cell. At least at first, only one UTF character will be stored in each cell. * payload of a string cell. At least at first, only one UTF character will be stored in each cell.
*/ */
struct string_payload { struct string_payload {
uint32_t character; /* the actual character stored in this cell */ wint_t character; /* the actual character stored in this cell */
uint32_t padding; /* unused padding to word-align the cdr */ uint32_t padding; /* unused padding to word-align the cdr */
struct cons_pointer cdr; struct cons_pointer cdr;
}; };

View file

@ -1,8 +1,8 @@
/** /**
* init.c * init.c
* *
* Start up and initialise the environement - just enough to get working and (ultimately) * Start up and initialise the environement - just enough to get working
* hand off to the executive. * and (ultimately) hand off to the executive.
* *
* *
* (c) 2017 Simon Brooke <simon@journeyman.cc> * (c) 2017 Simon Brooke <simon@journeyman.cc>
@ -16,6 +16,7 @@
#include "consspaceobject.h" #include "consspaceobject.h"
#include "print.h" #include "print.h"
#include "read.h" #include "read.h"
#include "lispops.h"
int main (int argc, char *argv[]) { int main (int argc, char *argv[]) {
fprintf( stderr, "Post scarcity software environment version %s\n", VERSION); fprintf( stderr, "Post scarcity software environment version %s\n", VERSION);

View file

@ -22,6 +22,7 @@
#include "equal.h" #include "equal.h"
#include "conspage.h" #include "conspage.h"
#include "consspaceobject.h" #include "consspaceobject.h"
#include "equal.h"
/** /**
* The object list. What is added to this during system setup is 'global', that is, * The object list. What is added to this during system setup is 'global', that is,
@ -67,21 +68,56 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store) {
} }
/** /**
* Return true if this key is present as a key in this enviroment, defaulting to * Internal workings of internedp, q.v. Not intended to be called from anywhere
* the oblist if no environment is passed. * else. Note that this is VERY similar to assoc, but returns the car (key) of
* the binding rather than the cdr (value).
*/ */
bool internedp( struct cons_pointer key, struct cons_pointer environment) { struct cons_pointer __internedp( struct cons_pointer key,
bool result = false; struct cons_pointer store) {
struct cons_pointer result = NIL;
if ( nilp( environment)) {
if ( !nilp( oblist)) { if ( consp( store)) {
result = internedp( key, oblist); struct cons_space_object* cell_store = &pointer2cell( store);
if ( consp( cell_store->payload.cons.car)) {
struct cons_space_object* binding =
&pointer2cell( cell_store->payload.cons.car);
if ( equal( key, binding->payload.cons.car)) {
result = binding->payload.cons.car;
}
}
/* top-level objects on an assoc list ought to be conses (i.e. each
* successive car should be a cons), but there's no need to throw a
* wobbly if it isn't. */
if ( nilp( result)) {
result = assoc( key, cell_store->payload.cons.cdr);
} }
} else {
result = !nilp( assoc( key, environment));
} }
return result;
}
/**
* Return the canonical version of this key if ut is present as a key in this
* enviroment, defaulting to the oblist if no environment is passed. Key is
* expected to be a string.
*/
struct cons_pointer internedp( struct cons_pointer key,
struct cons_pointer environment) {
struct cons_pointer result = NIL;
if ( stringp( key)) {
if ( nilp( environment)) {
result = __internedp( key, oblist);
} else {
result = __internedp( key, environment);
}
}
return result; return result;
} }
@ -105,3 +141,22 @@ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer valu
oblist = bind( key, value, oblist); oblist = bind( key, value, oblist);
return oblist; return oblist;
} }
/**
* Ensure that a canonical copy of this key is bound in this environment, and
* return that canonical copy. If there is currently no such binding, create one
* with the value NIL.
*/
struct cons_pointer intern( struct cons_pointer key,
struct cons_pointer environment) {
struct cons_pointer result = environment;
struct cons_pointer canonical = internedp( key, environment);
if ( nilp( canonical)) {
/* not currently bound */
result = bind( key, NIL, environment);
}
return result;
}

View file

@ -17,6 +17,10 @@
* Licensed under GPL version 2.0, or, at your option, any later version. * Licensed under GPL version 2.0, or, at your option, any later version.
*/ */
#ifndef __intern_h
#define __intern_h
extern struct cons_pointer oblist; extern struct cons_pointer oblist;
/** /**
@ -31,7 +35,8 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store);
* Return true if this key is present as a key in this enviroment, defaulting to * Return true if this key is present as a key in this enviroment, defaulting to
* the oblist if no environment is passed. * the oblist if no environment is passed.
*/ */
bool internedp( struct cons_pointer key, struct cons_pointer environment); struct cons_pointer internedp( struct cons_pointer key,
struct cons_pointer environment);
/** /**
* Return a new key/value store containing all the key/value pairs in this store * Return a new key/value store containing all the key/value pairs in this store
@ -46,3 +51,13 @@ struct cons_pointer bind( struct cons_pointer key, struct cons_pointer value,
* there it may not be especially useful). * there it may not be especially useful).
*/ */
struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value); struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value);
/**
* Ensure that a canonical copy of this key is bound in this environment, and
* return that canonical copy. If there is currently no such binding, create one
* with the value NIL.
*/
struct cons_pointer intern( struct cons_pointer key,
struct cons_pointer environment);
#endif

View file

@ -43,7 +43,8 @@ struct cons_pointer i_eval_args( struct cons_pointer args, struct cons_pointer t
struct cons_pointer result = NIL; struct cons_pointer result = NIL;
if ( ! nilp( args)) { if ( ! nilp( args)) {
result = make_cons( lisp_eval( lisp_car( args)), i_eval_args( lisp_cdr( args), tail)); result = make_cons( lisp_eval( lisp_car( args, env), env),
i_eval_args( lisp_cdr( args, env), tail, env));
} }
return result; return result;
@ -59,7 +60,7 @@ struct cons_pointer lisp_apply( struct cons_pointer args, struct cons_pointer en
struct cons_pointer result = args; struct cons_pointer result = args;
if ( consp( args)) { if ( consp( args)) {
lisp_eval( make_cons( lisp_car( args), i_eval_args( lisp_cdr( args), NIL))); lisp_eval( make_cons( lisp_car( args, env), i_eval_args( lisp_cdr( args, env), NIL, env)), env);
} }
return result; return result;
@ -74,8 +75,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env
if ( consp( args)) { if ( consp( args)) {
/* the hard bit. Sort out what function is required and pass the /* the hard bit. Sort out what function is required and pass the
* args to it. */ * args to it. */
struct cons_pointer fn_pointer lisp_car( args); struct cons_pointer fn_pointer = lisp_car( args, env);
args = lisp_cdr( args); args = lisp_cdr( args, env);
if ( functionp( fn_pointer)) { if ( functionp( fn_pointer)) {
struct cons_space_object function = pointer2cell( fn_pointer); struct cons_space_object function = pointer2cell( fn_pointer);
@ -90,8 +91,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env
* also if the object is a consp it could be interpretable * also if the object is a consp it could be interpretable
* source code but in the long run I don't want an interpreter, * source code but in the long run I don't want an interpreter,
* and if I can get away without so much the better. */ * and if I can get away without so much the better. */
result = lisp_throw( args, env) result = lisp_throw( args, env);
} }
} }
return result; return result;
@ -110,7 +111,7 @@ struct cons_pointer lisp_cons( struct cons_pointer args, struct cons_pointer env
struct cons_pointer d = pointer2cell( cell.payload.cons.cdr).payload.cons.car; struct cons_pointer d = pointer2cell( cell.payload.cons.cdr).payload.cons.car;
result = make_cons( a, d); result = make_cons( a, d);
} else { } else {
lisp_throw( args); lisp_throw( args, env);
} }
return result; return result;
@ -126,7 +127,7 @@ struct cons_pointer lisp_car( struct cons_pointer args, struct cons_pointer env)
struct cons_space_object cell = pointer2cell( args); struct cons_space_object cell = pointer2cell( args);
result = pointer2cell( cell.payload.cons.car).payload.cons.car; result = pointer2cell( cell.payload.cons.car).payload.cons.car;
} else { } else {
lisp_throw( args); lisp_throw( args, env);
} }
return result; return result;
@ -143,7 +144,7 @@ struct cons_pointer lisp_cdr( struct cons_pointer args, struct cons_pointer env)
struct cons_space_object cell = pointer2cell( args); struct cons_space_object cell = pointer2cell( args);
result = pointer2cell( cell.payload.cons.cdr).payload.cons.car; result = pointer2cell( cell.payload.cons.cdr).payload.cons.car;
} else { } else {
lisp_throw( args); lisp_throw( args, env);
} }
return result; return result;

View file

@ -11,6 +11,9 @@
#include <ctype.h> #include <ctype.h>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#include "conspage.h" #include "conspage.h"
#include "consspaceobject.h" #include "consspaceobject.h"
@ -20,10 +23,10 @@
void print_string_contents( FILE* output, struct cons_pointer pointer) { void print_string_contents( FILE* output, struct cons_pointer pointer) {
if ( check_tag( pointer, STRINGTAG)) { if ( check_tag( pointer, STRINGTAG)) {
struct cons_space_object* cell = &pointer2cell(pointer); struct cons_space_object* cell = &pointer2cell(pointer);
char c = cell->payload.string.character; wint_t c = cell->payload.string.character;
if ( c != '\0') { if ( c != '\0') {
fputc( c, output); fputwc( c, output);
} }
print_string_contents( output, cell->payload.string.cdr); print_string_contents( output, cell->payload.string.cdr);
} }

View file

@ -8,9 +8,11 @@
* Licensed under GPL version 2.0, or, at your option, any later version. * Licensed under GPL version 2.0, or, at your option, any later version.
*/ */
#include <ctype.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdio.h> #include <stdio.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#include "consspaceobject.h" #include "consspaceobject.h"
#include "integer.h" #include "integer.h"
@ -22,9 +24,9 @@
lists lists
Can't read atoms because I don't yet know what an atom is or how it's stored. */ Can't read atoms because I don't yet know what an atom is or how it's stored. */
struct cons_pointer read_number( FILE* input, char initial); struct cons_pointer read_number( FILE* input, wint_t initial);
struct cons_pointer read_list( FILE* input, char initial); struct cons_pointer read_list( FILE* input, wint_t initial);
struct cons_pointer read_string( FILE* input, char initial); struct cons_pointer read_string( FILE* input, wint_t initial);
/** /**
@ -32,34 +34,24 @@ struct cons_pointer read_string( FILE* input, char initial);
* treating this initial character as the first character of the object * treating this initial character as the first character of the object
* representation. * representation.
*/ */
struct cons_pointer read_continuation( FILE* input, char initial) { struct cons_pointer read_continuation( FILE* input, wint_t initial) {
struct cons_pointer result = NIL; struct cons_pointer result = NIL;
char c; wint_t c;
for (c = initial; c == '\0' || isblank( c); c = fgetc( input)); for (c = initial; c == '\0' || iswblank( c); c = fgetwc( input));
switch( c) { switch( c) {
case '(' : case '(' :
case ')': case ')':
result = read_list(input, fgetc( input)); result = read_list(input, fgetwc( input));
break; break;
case '"': result = read_string(input, fgetc( input)); case '"': result = read_string(input, fgetwc( input));
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// case '.':
result = read_number( input, c);
break; break;
default: default:
if ( iswdigit( c)) {
result = read_number( input, c);
}
fprintf( stderr, "Unrecognised start of input character %c\n", c); fprintf( stderr, "Unrecognised start of input character %c\n", c);
} }
@ -70,15 +62,15 @@ struct cons_pointer read_continuation( FILE* input, char initial) {
/** /**
* read a number from this input stream, given this initial character. * read a number from this input stream, given this initial character.
*/ */
struct cons_pointer read_number( FILE* input, char initial) { struct cons_pointer read_number( FILE* input, wint_t initial) {
int accumulator = 0; int accumulator = 0;
int places_of_decimals = 0; int places_of_decimals = 0;
bool seen_period = false; bool seen_period = false;
char c; wint_t c;
fprintf( stderr, "read_number starting '%c' (%d)\n", initial, initial); fprintf( stderr, "read_number starting '%c' (%d)\n", initial, initial);
for (c = initial; isdigit( c); c = fgetc( input)) { for (c = initial; iswdigit( c); c = fgetwc( input)) {
if ( c == '.') { if ( c == '.') {
seen_period = true; seen_period = true;
} else { } else {
@ -91,7 +83,7 @@ struct cons_pointer read_number( FILE* input, char initial) {
} }
/* push back the character read which was not a digit */ /* push back the character read which was not a digit */
fputc( c, input); fputwc( c, input);
return make_integer( accumulator); return make_integer( accumulator);
} }
@ -101,15 +93,15 @@ struct cons_pointer read_number( FILE* input, char initial) {
* Read a list from this input stream, which no longer contains the opening * Read a list from this input stream, which no longer contains the opening
* left parenthesis. * left parenthesis.
*/ */
struct cons_pointer read_list( FILE* input, char initial) { struct cons_pointer read_list( FILE* input, wint_t initial) {
struct cons_pointer cdr = NIL; struct cons_pointer cdr = NIL;
struct cons_pointer result= NIL; struct cons_pointer result= NIL;
fprintf( stderr, "read_list starting '%c' (%d)\n", initial, initial); fwprintf( stderr, L"read_list starting '%C' (%d)\n", initial, initial);
if ( initial != ')' ) { if ( initial != ')' ) {
struct cons_pointer car = read_continuation( input, initial); struct cons_pointer car = read_continuation( input, initial);
cdr = read_list( input, fgetc( input)); cdr = read_list( input, fgetwc( input));
result = make_cons( car, cdr); result = make_cons( car, cdr);
} }
@ -123,11 +115,11 @@ struct cons_pointer read_list( FILE* input, char initial) {
* representation of a string, which is that there's no obvious representation of * representation of a string, which is that there's no obvious representation of
* an empty string. * an empty string.
*/ */
struct cons_pointer read_string( FILE* input, char initial) { struct cons_pointer read_string( FILE* input, wint_t initial) {
struct cons_pointer cdr = NIL; struct cons_pointer cdr = NIL;
struct cons_pointer result; struct cons_pointer result;
fprintf( stderr, "read_string starting '%c' (%d)\n", initial, initial); fwprintf( stderr, L"read_string starting '%C' (%d)\n", initial, initial);
switch ( initial) { switch ( initial) {
case '\0': case '\0':
@ -137,7 +129,7 @@ struct cons_pointer read_string( FILE* input, char initial) {
result = make_string( '\0', NIL); result = make_string( '\0', NIL);
break; break;
default: default:
result = make_string( initial, read_string( input, fgetc( input))); result = make_string( initial, read_string( input, fgetwc( input)));
break; break;
} }
@ -151,6 +143,11 @@ struct cons_pointer read_string( FILE* input, char initial) {
struct cons_pointer read( FILE* input) { struct cons_pointer read( FILE* input) {
return read_continuation( input, '\0'); return read_continuation( input, '\0');
} }
struct cons_pointer lisp_read( struct cons_pointer args, struct cons_pointer env) {
return( read( stdin));
}