Frustrating. I think all the conversion to 'wide' (UTF) character handling
is done, and all the existing unit tests pass - but UTF characters are nevertheless not read or printed correctly.
This commit is contained in:
		
							parent
							
								
									472b58b900
								
							
						
					
					
						commit
						432ccb2d44
					
				|  | @ -11,6 +11,9 @@ | |||
| #include <stdint.h> | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| /* wide characters */ | ||||
| #include <wchar.h> | ||||
| #include <wctype.h> | ||||
| 
 | ||||
| #include "conspage.h" | ||||
| #include "consspaceobject.h" | ||||
|  | @ -87,7 +90,7 @@ void dump_object( FILE* output, struct cons_pointer pointer) { | |||
|   } else if ( check_tag(pointer, REALTAG)) { | ||||
|     fprintf( output, "\t\tReal cell: value %Lf\n", cell.payload.real.value); | ||||
|   } else if ( check_tag( pointer, STRINGTAG)) { | ||||
|     fprintf( output, "\t\tString cell: character '%c' next at page %d offset %d\n", | ||||
|     fwprintf( output, L"\t\tString cell: character '%C' next at page %d offset %d\n", | ||||
| 	     cell.payload.string.character, cell.payload.string.cdr.page, | ||||
| 	     cell.payload.string.cdr.offset); | ||||
|   }; | ||||
|  | @ -124,7 +127,7 @@ struct cons_pointer make_string( char c, struct cons_pointer tail) { | |||
|     struct cons_space_object* cell = &pointer2cell(pointer); | ||||
| 
 | ||||
|     inc_ref(tail); | ||||
|     cell->payload.string.character = (uint32_t) c; | ||||
|     cell->payload.string.character = (wint_t) c; | ||||
|     cell->payload.string.cdr.page = tail.page; | ||||
|     cell->payload.string.cdr.offset = tail.offset; | ||||
|   } else { | ||||
|  |  | |||
|  | @ -11,6 +11,9 @@ | |||
| #include <stdbool.h> | ||||
| #include <stdint.h> | ||||
| #include <stdio.h> | ||||
| /* wide characters */ | ||||
| #include <wchar.h> | ||||
| #include <wctype.h> | ||||
| 
 | ||||
| #ifndef __consspaceobject_h | ||||
| #define __consspaceobject_h | ||||
|  | @ -155,7 +158,7 @@ struct real_payload { | |||
|  * payload of a string cell. At least at first, only one UTF character will be stored in each cell. | ||||
|  */ | ||||
| struct string_payload { | ||||
|   uint32_t character;          /* the actual character stored in this cell */ | ||||
|   wint_t character;          /* the actual character stored in this cell */ | ||||
|   uint32_t padding;            /* unused padding to word-align the cdr */ | ||||
|   struct cons_pointer cdr; | ||||
| }; | ||||
|  |  | |||
|  | @ -1,8 +1,8 @@ | |||
| /**
 | ||||
|  * init.c | ||||
|  * | ||||
|  * Start up and initialise the environement - just enough to get working and (ultimately) | ||||
|  * hand off to the executive. | ||||
|  * Start up and initialise the environement - just enough to get working  | ||||
|  * and (ultimately) hand off to the executive. | ||||
|  * | ||||
|  * | ||||
|  * (c) 2017 Simon Brooke <simon@journeyman.cc> | ||||
|  | @ -16,6 +16,7 @@ | |||
| #include "consspaceobject.h" | ||||
| #include "print.h" | ||||
| #include "read.h" | ||||
| #include "lispops.h" | ||||
| 
 | ||||
| int main (int argc, char *argv[]) { | ||||
|   fprintf( stderr, "Post scarcity software environment version %s\n", VERSION); | ||||
|  |  | |||
							
								
								
									
										79
									
								
								src/intern.c
									
									
									
									
									
								
							
							
						
						
									
										79
									
								
								src/intern.c
									
									
									
									
									
								
							|  | @ -22,6 +22,7 @@ | |||
| #include "equal.h" | ||||
| #include "conspage.h" | ||||
| #include "consspaceobject.h" | ||||
| #include "equal.h" | ||||
| 
 | ||||
| /**
 | ||||
|  * The object list. What is added to this during system setup is 'global', that is,  | ||||
|  | @ -67,21 +68,56 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store) { | |||
| } | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  * Return true if this key is present as a key in this enviroment, defaulting to | ||||
|  * the oblist if no environment is passed. | ||||
| /** 
 | ||||
|  * Internal workings of internedp, q.v. Not intended to be called from anywhere | ||||
|  * else. Note that this is VERY similar to assoc, but returns the car (key) of | ||||
|  * the binding rather than the cdr (value). | ||||
|  */ | ||||
| bool internedp( struct cons_pointer key, struct cons_pointer environment) { | ||||
|   bool result = false; | ||||
|    | ||||
|   if ( nilp( environment)) { | ||||
|     if ( !nilp( oblist)) { | ||||
|       result = internedp( key, oblist); | ||||
| struct cons_pointer __internedp( struct cons_pointer key, | ||||
| 				 struct cons_pointer store) { | ||||
|   struct cons_pointer result = NIL; | ||||
| 
 | ||||
|   if ( consp( store)) { | ||||
|     struct cons_space_object* cell_store = &pointer2cell( store); | ||||
| 
 | ||||
|     if ( consp( cell_store->payload.cons.car)) { | ||||
|       struct cons_space_object* binding = | ||||
| 	&pointer2cell( cell_store->payload.cons.car); | ||||
| 
 | ||||
|       if ( equal( key, binding->payload.cons.car)) { | ||||
| 	result = binding->payload.cons.car; | ||||
|       } | ||||
|     } | ||||
|     /* top-level objects on an assoc list ought to be conses (i.e. each
 | ||||
|      * successive car should be a cons), but there's no need to throw a | ||||
|      * wobbly if it isn't. */ | ||||
| 
 | ||||
|     if ( nilp( result)) { | ||||
|       result = assoc( key, cell_store->payload.cons.cdr); | ||||
|     } | ||||
|   } else { | ||||
|     result = !nilp( assoc( key, environment)); | ||||
|   } | ||||
|        | ||||
| 
 | ||||
|   return result;   | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  * Return the canonical version of this key if ut is present as a key in this  | ||||
|  * enviroment, defaulting to the oblist if no environment is passed. Key is | ||||
|  * expected to be a string. | ||||
|  */ | ||||
| struct cons_pointer internedp( struct cons_pointer key, | ||||
| 			       struct cons_pointer environment) { | ||||
|   struct cons_pointer result = NIL; | ||||
| 
 | ||||
|   if ( stringp( key)) { | ||||
|     if ( nilp( environment)) { | ||||
|       result = __internedp( key, oblist); | ||||
|     } else { | ||||
|       result = __internedp( key, environment); | ||||
|     } | ||||
|   } | ||||
|    | ||||
|   return result; | ||||
| } | ||||
| 
 | ||||
|  | @ -105,3 +141,22 @@ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer valu | |||
|   oblist = bind( key, value, oblist); | ||||
|   return oblist; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  * Ensure that a canonical copy of this key is bound in this environment, and | ||||
|  * return that canonical copy. If there is currently no such binding, create one | ||||
|  * with the value NIL. | ||||
|  */ | ||||
| struct cons_pointer intern( struct cons_pointer key, | ||||
| 			    struct cons_pointer environment) { | ||||
|   struct cons_pointer result = environment; | ||||
|   struct cons_pointer canonical = internedp( key, environment); | ||||
| 
 | ||||
|   if ( nilp( canonical)) { | ||||
|     /* not currently bound */ | ||||
|     result = bind( key, NIL, environment); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
| } | ||||
|  |  | |||
							
								
								
									
										17
									
								
								src/intern.h
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								src/intern.h
									
									
									
									
									
								
							|  | @ -17,6 +17,10 @@ | |||
|  * Licensed under GPL version 2.0, or, at your option, any later version. | ||||
|  */ | ||||
| 
 | ||||
| 
 | ||||
| #ifndef __intern_h | ||||
| #define __intern_h | ||||
| 
 | ||||
| extern struct cons_pointer oblist; | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -31,7 +35,8 @@ struct cons_pointer assoc( struct cons_pointer key, struct cons_pointer store); | |||
|  * Return true if this key is present as a key in this enviroment, defaulting to | ||||
|  * the oblist if no environment is passed. | ||||
|  */ | ||||
| bool internedp( struct cons_pointer key, struct cons_pointer environment); | ||||
| struct cons_pointer internedp( struct cons_pointer key, | ||||
| 			       struct cons_pointer environment); | ||||
| 
 | ||||
| /**
 | ||||
|  * Return a new key/value store containing all the key/value pairs in this store | ||||
|  | @ -46,3 +51,13 @@ struct cons_pointer bind( struct cons_pointer key, struct cons_pointer value, | |||
|  * there it may not be especially useful). | ||||
|  */ | ||||
| struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value); | ||||
| 
 | ||||
| /**
 | ||||
|  * Ensure that a canonical copy of this key is bound in this environment, and | ||||
|  * return that canonical copy. If there is currently no such binding, create one | ||||
|  * with the value NIL. | ||||
|  */ | ||||
| struct cons_pointer intern( struct cons_pointer key, | ||||
| 			    struct cons_pointer environment); | ||||
| 
 | ||||
| #endif | ||||
|  |  | |||
|  | @ -43,7 +43,8 @@ struct cons_pointer i_eval_args( struct cons_pointer args, struct cons_pointer t | |||
|   struct cons_pointer result = NIL; | ||||
|    | ||||
|   if ( ! nilp( args)) { | ||||
|     result = make_cons( lisp_eval( lisp_car( args)), i_eval_args( lisp_cdr( args), tail)); | ||||
|     result = make_cons( lisp_eval( lisp_car( args, env), env), | ||||
| 			i_eval_args( lisp_cdr( args, env), tail, env)); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
|  | @ -59,7 +60,7 @@ struct cons_pointer lisp_apply( struct cons_pointer args, struct cons_pointer en | |||
|   struct cons_pointer result = args; | ||||
|    | ||||
|   if ( consp( args)) { | ||||
|     lisp_eval( make_cons( lisp_car( args), i_eval_args( lisp_cdr( args), NIL))); | ||||
|     lisp_eval( make_cons( lisp_car( args, env), i_eval_args( lisp_cdr( args, env), NIL, env)), env); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
|  | @ -74,8 +75,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env | |||
|   if ( consp( args)) { | ||||
|     /* the hard bit. Sort out what function is required and pass the
 | ||||
|      * args to it. */ | ||||
|     struct cons_pointer fn_pointer lisp_car( args); | ||||
|     args = lisp_cdr( args); | ||||
|     struct cons_pointer fn_pointer = lisp_car( args, env); | ||||
|     args = lisp_cdr( args, env); | ||||
| 
 | ||||
|     if ( functionp( fn_pointer)) { | ||||
|       struct cons_space_object function = pointer2cell( fn_pointer); | ||||
|  | @ -90,8 +91,8 @@ struct cons_pointer lisp_eval( struct cons_pointer args, struct cons_pointer env | |||
|        * also if the object is a consp it could be interpretable | ||||
|        * source code but in the long run I don't want an interpreter, | ||||
|        * and if I can get away without so much the better. */ | ||||
|       result = lisp_throw( args, env) | ||||
|         } | ||||
|       result = lisp_throw( args, env); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
|  | @ -110,7 +111,7 @@ struct cons_pointer lisp_cons( struct cons_pointer args, struct cons_pointer env | |||
|     struct cons_pointer d = pointer2cell( cell.payload.cons.cdr).payload.cons.car; | ||||
|     result = make_cons( a, d); | ||||
|   } else { | ||||
|     lisp_throw( args); | ||||
|     lisp_throw( args, env); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
|  | @ -126,7 +127,7 @@ struct cons_pointer lisp_car( struct cons_pointer args, struct cons_pointer env) | |||
|     struct cons_space_object cell = pointer2cell( args); | ||||
|     result =  pointer2cell( cell.payload.cons.car).payload.cons.car; | ||||
|   } else { | ||||
|     lisp_throw( args); | ||||
|     lisp_throw( args, env); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
|  | @ -143,7 +144,7 @@ struct cons_pointer lisp_cdr( struct cons_pointer args, struct cons_pointer env) | |||
|     struct cons_space_object cell = pointer2cell( args); | ||||
|     result =  pointer2cell( cell.payload.cons.cdr).payload.cons.car; | ||||
|   } else { | ||||
|     lisp_throw( args); | ||||
|     lisp_throw( args, env); | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
|  |  | |||
|  | @ -11,6 +11,9 @@ | |||
| #include <ctype.h> | ||||
| #include <stdio.h> | ||||
| #include <string.h> | ||||
| /* wide characters */ | ||||
| #include <wchar.h> | ||||
| #include <wctype.h> | ||||
| 
 | ||||
| #include "conspage.h" | ||||
| #include "consspaceobject.h" | ||||
|  | @ -20,10 +23,10 @@ | |||
| void print_string_contents( FILE* output, struct cons_pointer pointer) { | ||||
|   if ( check_tag( pointer, STRINGTAG)) { | ||||
|     struct cons_space_object* cell = &pointer2cell(pointer); | ||||
|     char c = cell->payload.string.character; | ||||
|     wint_t c = cell->payload.string.character; | ||||
| 
 | ||||
|     if ( c != '\0') { | ||||
|       fputc( c, output); | ||||
|       fputwc( c, output); | ||||
|     } | ||||
|     print_string_contents( output, cell->payload.string.cdr); | ||||
|   } | ||||
|  |  | |||
							
								
								
									
										61
									
								
								src/read.c
									
									
									
									
									
								
							
							
						
						
									
										61
									
								
								src/read.c
									
									
									
									
									
								
							|  | @ -8,9 +8,11 @@ | |||
|  * Licensed under GPL version 2.0, or, at your option, any later version. | ||||
|  */ | ||||
| 
 | ||||
| #include <ctype.h> | ||||
| #include <stdbool.h> | ||||
| #include <stdio.h> | ||||
| /* wide characters */ | ||||
| #include <wchar.h> | ||||
| #include <wctype.h> | ||||
| 
 | ||||
| #include "consspaceobject.h" | ||||
| #include "integer.h" | ||||
|  | @ -22,9 +24,9 @@ | |||
|    lists | ||||
|    Can't read atoms because I don't yet know what an atom is or how it's stored. */ | ||||
| 
 | ||||
| struct cons_pointer read_number( FILE* input, char initial); | ||||
| struct cons_pointer read_list( FILE* input, char initial); | ||||
| struct cons_pointer read_string( FILE* input, char initial); | ||||
| struct cons_pointer read_number( FILE* input, wint_t initial); | ||||
| struct cons_pointer read_list( FILE* input, wint_t initial); | ||||
| struct cons_pointer read_string( FILE* input, wint_t initial); | ||||
| 
 | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -32,34 +34,24 @@ struct cons_pointer read_string( FILE* input, char initial); | |||
|  * treating this initial character as the first character of the object | ||||
|  * representation. | ||||
|  */ | ||||
| struct cons_pointer read_continuation( FILE* input, char initial) { | ||||
| struct cons_pointer read_continuation( FILE* input, wint_t initial) { | ||||
|   struct cons_pointer result = NIL; | ||||
| 
 | ||||
|   char c; | ||||
|   wint_t c; | ||||
| 
 | ||||
|   for (c = initial; c == '\0' || isblank( c); c = fgetc( input)); | ||||
|   for (c = initial; c == '\0' || iswblank( c); c = fgetwc( input)); | ||||
|    | ||||
|   switch( c) { | ||||
|   case '(' : | ||||
|   case ')': | ||||
|     result = read_list(input, fgetc( input)); | ||||
|     result = read_list(input, fgetwc( input)); | ||||
|     break; | ||||
|   case '"': result = read_string(input, fgetc( input)); | ||||
|     break; | ||||
|   case '0': | ||||
|   case '1': | ||||
|   case '2': | ||||
|   case '3': | ||||
|   case '4': | ||||
|   case '5': | ||||
|   case '6': | ||||
|   case '7': | ||||
|   case '8': | ||||
|   case '9': | ||||
|     // case '.':
 | ||||
|     result = read_number( input, c); | ||||
|   case '"': result = read_string(input, fgetwc( input)); | ||||
|     break; | ||||
|   default: | ||||
|     if ( iswdigit( c)) { | ||||
|       result = read_number( input, c); | ||||
|     }  | ||||
|     fprintf( stderr, "Unrecognised start of input character %c\n", c); | ||||
|   } | ||||
| 
 | ||||
|  | @ -70,15 +62,15 @@ struct cons_pointer read_continuation( FILE* input, char initial) { | |||
| /**
 | ||||
|  * read a number from this input stream, given this initial character. | ||||
|  */ | ||||
| struct cons_pointer read_number( FILE* input, char initial) { | ||||
| struct cons_pointer read_number( FILE* input, wint_t initial) { | ||||
|   int accumulator = 0; | ||||
|   int places_of_decimals = 0; | ||||
|   bool seen_period = false; | ||||
|   char c; | ||||
|   wint_t c; | ||||
| 
 | ||||
|   fprintf( stderr, "read_number starting '%c' (%d)\n", initial, initial); | ||||
|    | ||||
|   for (c = initial; isdigit( c); c = fgetc( input)) { | ||||
|   for (c = initial; iswdigit( c); c = fgetwc( input)) { | ||||
|     if ( c == '.') { | ||||
|       seen_period = true; | ||||
|     } else { | ||||
|  | @ -91,7 +83,7 @@ struct cons_pointer read_number( FILE* input, char initial) { | |||
|   } | ||||
| 
 | ||||
|   /* push back the character read which was not a digit */ | ||||
|   fputc( c, input); | ||||
|   fputwc( c, input); | ||||
| 
 | ||||
|   return make_integer( accumulator); | ||||
| } | ||||
|  | @ -101,15 +93,15 @@ struct cons_pointer read_number( FILE* input, char initial) { | |||
|  * Read a list from this input stream, which no longer contains the opening | ||||
|  * left parenthesis. | ||||
|  */ | ||||
| struct cons_pointer read_list( FILE* input, char initial) { | ||||
| struct cons_pointer read_list( FILE* input, wint_t initial) { | ||||
|   struct cons_pointer cdr = NIL; | ||||
|   struct cons_pointer result= NIL; | ||||
| 
 | ||||
|   fprintf( stderr, "read_list starting '%c' (%d)\n", initial, initial); | ||||
|   fwprintf( stderr, L"read_list starting '%C' (%d)\n", initial, initial); | ||||
|    | ||||
|   if ( initial != ')' ) { | ||||
|     struct cons_pointer car = read_continuation( input, initial); | ||||
|     cdr = read_list( input, fgetc( input)); | ||||
|     cdr = read_list( input, fgetwc( input)); | ||||
|     result = make_cons( car, cdr); | ||||
|   } | ||||
| 
 | ||||
|  | @ -123,11 +115,11 @@ struct cons_pointer read_list( FILE* input, char initial) { | |||
|  * representation of a string, which is that there's no obvious representation of | ||||
|  * an empty string. | ||||
|  */ | ||||
| struct cons_pointer read_string( FILE* input, char initial) { | ||||
| struct cons_pointer read_string( FILE* input, wint_t initial) { | ||||
|   struct cons_pointer cdr = NIL; | ||||
|   struct cons_pointer result; | ||||
| 
 | ||||
|   fprintf( stderr, "read_string starting '%c' (%d)\n", initial, initial); | ||||
|   fwprintf( stderr, L"read_string starting '%C' (%d)\n", initial, initial); | ||||
| 
 | ||||
|   switch ( initial) { | ||||
|   case '\0': | ||||
|  | @ -137,7 +129,7 @@ struct cons_pointer read_string( FILE* input, char initial) { | |||
|     result = make_string( '\0', NIL); | ||||
|     break; | ||||
|   default: | ||||
|     result = make_string( initial, read_string( input, fgetc( input))); | ||||
|     result = make_string( initial, read_string( input, fgetwc( input))); | ||||
|     break; | ||||
|   } | ||||
| 
 | ||||
|  | @ -151,6 +143,11 @@ struct cons_pointer read_string( FILE* input, char initial) { | |||
| struct cons_pointer read( FILE* input) { | ||||
|   return read_continuation( input, '\0'); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| struct cons_pointer lisp_read( struct cons_pointer args, struct cons_pointer env) { | ||||
|   return( read( stdin)); | ||||
| } | ||||
|      | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue