Frustrating. I think all the conversion to 'wide' (UTF) character handling

is done, and all the existing unit tests pass - but UTF characters are
nevertheless not read or printed correctly.
This commit is contained in:
Simon Brooke 2017-01-13 00:45:00 +00:00
parent 472b58b900
commit 432ccb2d44
8 changed files with 139 additions and 61 deletions

View file

@ -11,6 +11,9 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#include "conspage.h"
#include "consspaceobject.h"
@ -87,7 +90,7 @@ void dump_object( FILE* output, struct cons_pointer pointer) {
} else if ( check_tag(pointer, REALTAG)) {
fprintf( output, "\t\tReal cell: value %Lf\n", cell.payload.real.value);
} else if ( check_tag( pointer, STRINGTAG)) {
fprintf( output, "\t\tString cell: character '%c' next at page %d offset %d\n",
fwprintf( output, L"\t\tString cell: character '%C' next at page %d offset %d\n",
cell.payload.string.character, cell.payload.string.cdr.page,
cell.payload.string.cdr.offset);
};
@ -124,7 +127,7 @@ struct cons_pointer make_string( char c, struct cons_pointer tail) {
struct cons_space_object* cell = &pointer2cell(pointer);
inc_ref(tail);
cell->payload.string.character = (uint32_t) c;
cell->payload.string.character = (wint_t) c;
cell->payload.string.cdr.page = tail.page;
cell->payload.string.cdr.offset = tail.offset;
} else {

View file

@ -11,6 +11,9 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#ifndef __consspaceobject_h
#define __consspaceobject_h
@ -155,7 +158,7 @@ struct real_payload {
* payload of a string cell. At least at first, only one UTF character will be stored in each cell.
*/
struct string_payload {
uint32_t character; /* the actual character stored in this cell */
wint_t character; /* the actual character stored in this cell */
uint32_t padding; /* unused padding to word-align the cdr */
struct cons_pointer cdr;
};

View file

@ -1,8 +1,8 @@
/**
* init.c
*
* Start up and initialise the environement - just enough to get working and (ultimately)
* hand off to the executive.
* Start up and initialise the environement - just enough to get working
* and (ultimately) hand off to the executive.
*
*
* (c) 2017 Simon Brooke <simon@journeyman.cc>
@ -16,6 +16,7 @@
#include "consspaceobject.h"
#include "print.h"
#include "read.h"
#include "lispops.h"
int main (int argc, char *argv[]) {
fprintf( stderr, "Post scarcity software environment version %s\n", VERSION);

View file

@ -22,6 +22,7 @@
#include "equal.h"
#include "conspage.h"
#include "consspaceobject.h"
#include "equal.h"
/**
* The object list. What is added to this during system setup is 'global', that is,
@ -68,18 +69,53 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store) {
/**
* Return true if this key is present as a key in this enviroment, defaulting to
* the oblist if no environment is passed.
* Internal workings of internedp, q.v. Not intended to be called from anywhere
* else. Note that this is VERY similar to assoc, but returns the car (key) of
* the binding rather than the cdr (value).
*/
bool internedp( struct cons_pointer key, struct cons_pointer environment) {
bool result = false;
struct cons_pointer __internedp( struct cons_pointer key,
struct cons_pointer store) {
struct cons_pointer result = NIL;
if ( nilp( environment)) {
if ( !nilp( oblist)) {
result = internedp( key, oblist);
if ( consp( store)) {
struct cons_space_object* cell_store = &pointer2cell( store);
if ( consp( cell_store->payload.cons.car)) {
struct cons_space_object* binding =
&pointer2cell( cell_store->payload.cons.car);
if ( equal( key, binding->payload.cons.car)) {
result = binding->payload.cons.car;
}
}
/* top-level objects on an assoc list ought to be conses (i.e. each
* successive car should be a cons), but there's no need to throw a
* wobbly if it isn't. */
if ( nilp( result)) {
result = assoc( key, cell_store->payload.cons.cdr);
}
}
return result;
}
/**
* Return the canonical version of this key if ut is present as a key in this
* enviroment, defaulting to the oblist if no environment is passed. Key is
* expected to be a string.
*/
struct cons_pointer internedp( struct cons_pointer key,
struct cons_pointer environment) {
struct cons_pointer result = NIL;
if ( stringp( key)) {
if ( nilp( environment)) {
result = __internedp( key, oblist);
} else {
result = !nilp( assoc( key, environment));
result = __internedp( key, environment);
}
}
return result;
@ -105,3 +141,22 @@ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer valu
oblist = bind( key, value, oblist);
return oblist;
}
/**
* Ensure that a canonical copy of this key is bound in this environment, and
* return that canonical copy. If there is currently no such binding, create one
* with the value NIL.
*/
struct cons_pointer intern( struct cons_pointer key,
struct cons_pointer environment) {
struct cons_pointer result = environment;
struct cons_pointer canonical = internedp( key, environment);
if ( nilp( canonical)) {
/* not currently bound */
result = bind( key, NIL, environment);
}
return result;
}

View file

@ -17,6 +17,10 @@
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __intern_h
#define __intern_h
extern struct cons_pointer oblist;
/**
@ -31,7 +35,8 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store);
* Return true if this key is present as a key in this enviroment, defaulting to
* the oblist if no environment is passed.
*/
bool internedp( struct cons_pointer key, struct cons_pointer environment);
struct cons_pointer internedp( struct cons_pointer key,
struct cons_pointer environment);
/**
* Return a new key/value store containing all the key/value pairs in this store
@ -46,3 +51,13 @@ struct cons_pointer bind( struct cons_pointer key, struct cons_pointer value,
* there it may not be especially useful).
*/
struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value);
/**
* Ensure that a canonical copy of this key is bound in this environment, and
* return that canonical copy. If there is currently no such binding, create one
* with the value NIL.
*/
struct cons_pointer intern( struct cons_pointer key,
struct cons_pointer environment);
#endif

View file

@ -43,7 +43,8 @@ struct cons_pointer i_eval_args( struct cons_pointer args, struct cons_pointer t
struct cons_pointer result = NIL;
if ( ! nilp( args)) {
result = make_cons( lisp_eval( lisp_car( args)), i_eval_args( lisp_cdr( args), tail));
result = make_cons( lisp_eval( lisp_car( args, env), env),
i_eval_args( lisp_cdr( args, env), tail, env));
}
return result;
@ -59,7 +60,7 @@ struct cons_pointer lisp_apply( struct cons_pointer args, struct cons_pointer en
struct cons_pointer result = args;
if ( consp( args)) {
lisp_eval( make_cons( lisp_car( args), i_eval_args( lisp_cdr( args), NIL)));
lisp_eval( make_cons( lisp_car( args, env), i_eval_args( lisp_cdr( args, env), NIL, env)), env);
}
return result;
@ -74,8 +75,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env
if ( consp( args)) {
/* the hard bit. Sort out what function is required and pass the
* args to it. */
struct cons_pointer fn_pointer lisp_car( args);
args = lisp_cdr( args);
struct cons_pointer fn_pointer = lisp_car( args, env);
args = lisp_cdr( args, env);
if ( functionp( fn_pointer)) {
struct cons_space_object function = pointer2cell( fn_pointer);
@ -90,7 +91,7 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env
* also if the object is a consp it could be interpretable
* source code but in the long run I don't want an interpreter,
* and if I can get away without so much the better. */
result = lisp_throw( args, env)
result = lisp_throw( args, env);
}
}
@ -110,7 +111,7 @@ struct cons_pointer lisp_cons( struct cons_pointer args, struct cons_pointer env
struct cons_pointer d = pointer2cell( cell.payload.cons.cdr).payload.cons.car;
result = make_cons( a, d);
} else {
lisp_throw( args);
lisp_throw( args, env);
}
return result;
@ -126,7 +127,7 @@ struct cons_pointer lisp_car( struct cons_pointer args, struct cons_pointer env)
struct cons_space_object cell = pointer2cell( args);
result = pointer2cell( cell.payload.cons.car).payload.cons.car;
} else {
lisp_throw( args);
lisp_throw( args, env);
}
return result;
@ -143,7 +144,7 @@ struct cons_pointer lisp_cdr( struct cons_pointer args, struct cons_pointer env)
struct cons_space_object cell = pointer2cell( args);
result = pointer2cell( cell.payload.cons.cdr).payload.cons.car;
} else {
lisp_throw( args);
lisp_throw( args, env);
}
return result;

View file

@ -11,6 +11,9 @@
#include <ctype.h>
#include <stdio.h>
#include <string.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#include "conspage.h"
#include "consspaceobject.h"
@ -20,10 +23,10 @@
void print_string_contents( FILE* output, struct cons_pointer pointer) {
if ( check_tag( pointer, STRINGTAG)) {
struct cons_space_object* cell = &pointer2cell(pointer);
char c = cell->payload.string.character;
wint_t c = cell->payload.string.character;
if ( c != '\0') {
fputc( c, output);
fputwc( c, output);
}
print_string_contents( output, cell->payload.string.cdr);
}

View file

@ -8,9 +8,11 @@
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
/* wide characters */
#include <wchar.h>
#include <wctype.h>
#include "consspaceobject.h"
#include "integer.h"
@ -22,9 +24,9 @@
lists
Can't read atoms because I don't yet know what an atom is or how it's stored. */
struct cons_pointer read_number( FILE* input, char initial);
struct cons_pointer read_list( FILE* input, char initial);
struct cons_pointer read_string( FILE* input, char initial);
struct cons_pointer read_number( FILE* input, wint_t initial);
struct cons_pointer read_list( FILE* input, wint_t initial);
struct cons_pointer read_string( FILE* input, wint_t initial);
/**
@ -32,34 +34,24 @@ struct cons_pointer read_string( FILE* input, char initial);
* treating this initial character as the first character of the object
* representation.
*/
struct cons_pointer read_continuation( FILE* input, char initial) {
struct cons_pointer read_continuation( FILE* input, wint_t initial) {
struct cons_pointer result = NIL;
char c;
wint_t c;
for (c = initial; c == '\0' || isblank( c); c = fgetc( input));
for (c = initial; c == '\0' || iswblank( c); c = fgetwc( input));
switch( c) {
case '(' :
case ')':
result = read_list(input, fgetc( input));
result = read_list(input, fgetwc( input));
break;
case '"': result = read_string(input, fgetc( input));
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// case '.':
result = read_number( input, c);
case '"': result = read_string(input, fgetwc( input));
break;
default:
if ( iswdigit( c)) {
result = read_number( input, c);
}
fprintf( stderr, "Unrecognised start of input character %c\n", c);
}
@ -70,15 +62,15 @@ struct cons_pointer read_continuation( FILE* input, char initial) {
/**
* read a number from this input stream, given this initial character.
*/
struct cons_pointer read_number( FILE* input, char initial) {
struct cons_pointer read_number( FILE* input, wint_t initial) {
int accumulator = 0;
int places_of_decimals = 0;
bool seen_period = false;
char c;
wint_t c;
fprintf( stderr, "read_number starting '%c' (%d)\n", initial, initial);
for (c = initial; isdigit( c); c = fgetc( input)) {
for (c = initial; iswdigit( c); c = fgetwc( input)) {
if ( c == '.') {
seen_period = true;
} else {
@ -91,7 +83,7 @@ struct cons_pointer read_number( FILE* input, char initial) {
}
/* push back the character read which was not a digit */
fputc( c, input);
fputwc( c, input);
return make_integer( accumulator);
}
@ -101,15 +93,15 @@ struct cons_pointer read_number( FILE* input, char initial) {
* Read a list from this input stream, which no longer contains the opening
* left parenthesis.
*/
struct cons_pointer read_list( FILE* input, char initial) {
struct cons_pointer read_list( FILE* input, wint_t initial) {
struct cons_pointer cdr = NIL;
struct cons_pointer result= NIL;
fprintf( stderr, "read_list starting '%c' (%d)\n", initial, initial);
fwprintf( stderr, L"read_list starting '%C' (%d)\n", initial, initial);
if ( initial != ')' ) {
struct cons_pointer car = read_continuation( input, initial);
cdr = read_list( input, fgetc( input));
cdr = read_list( input, fgetwc( input));
result = make_cons( car, cdr);
}
@ -123,11 +115,11 @@ struct cons_pointer read_list( FILE* input, char initial) {
* representation of a string, which is that there's no obvious representation of
* an empty string.
*/
struct cons_pointer read_string( FILE* input, char initial) {
struct cons_pointer read_string( FILE* input, wint_t initial) {
struct cons_pointer cdr = NIL;
struct cons_pointer result;
fprintf( stderr, "read_string starting '%c' (%d)\n", initial, initial);
fwprintf( stderr, L"read_string starting '%C' (%d)\n", initial, initial);
switch ( initial) {
case '\0':
@ -137,7 +129,7 @@ struct cons_pointer read_string( FILE* input, char initial) {
result = make_string( '\0', NIL);
break;
default:
result = make_string( initial, read_string( input, fgetc( input)));
result = make_string( initial, read_string( input, fgetwc( input)));
break;
}
@ -153,5 +145,10 @@ struct cons_pointer read( FILE* input) {
}
struct cons_pointer lisp_read( struct cons_pointer args, struct cons_pointer env) {
return( read( stdin));
}