post-scarcity/src/c/io/read.c
2026-05-07 06:47:58 +01:00

382 lines
14 KiB
C

/**
* read.c
*
* Read basic Lisp objects..This is :bootstrap layer print; it needs to be
* able to read characters, symbols, integers, lists and dotted pairs. I
* don't think it needs to be able to read anything else. It must, however,
* take a readtable as argument and expand reader macros.
*
*
* (c) 2017 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
#include "debug.h"
#include "io/io.h"
#include "io/read.h"
#include "memory/node.h"
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/pso2.h"
#include "memory/tags.h"
#include "payloads/character.h"
#include "payloads/cons.h"
#include "payloads/exception.h"
#include "payloads/function.h"
#include "payloads/integer.h"
#include "payloads/read_stream.h"
#include "payloads/symbol.h"
#include "ops/assoc.h"
#include "ops/reverse.h"
#include "payloads/stack.h"
#include "ops/string_ops.h"
#include "ops/truth.h"
// TODO: what I've copied from 0.0.6 is *weirdly* over-complex for just now.
// I think I'm going to essentially delete all this and start again. We need
// to be able to despatch on readttables, and the initial readtable functions
// don't need to be written in Lisp.
//
// In the long run a readtable ought to be a hashtable, but for now an assoc
// list will do.
//
// A readtable function is a Lisp function so needs the stackframe and the
// environment. Other arguments (including the output stream) should be passed
// in the argument, so I think the first arg in the frame is the character read;
// the next is the input stream; the next is the readtable, if any.
/*
* for the time being things which may be read are:
* * integers
* * lists
* * atoms
* * dotted pairs
*/
/**
* An example wrapper function while I work out how I'm going to do this.
*
* For this and all other `read` functions unless documented otherwise, the
* arguments in the frame are expected to be:
*
* 0. The input stream to read from;
* 1. The read table currently in use;
* 2. The character most recently read from that stream.
*/
struct pso_pointer read_example( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
return result;
}
struct pso_pointer make_eof_exception( struct pso_pointer frame_pointer ) {
return make_exception( make_frame( 1, frame_pointer,
c_string_to_lisp_string
( frame_pointer,
L"Read: end of input while reading" ) ) );
}
/**
* Function: return the next character from the stream indicated by arg 0;
* further arguments are ignored.
*
* * (read-char stream)
*
* @param frame my stack frame.
* @param frame_pointer a pointer to my stack frame.
* @param env my environment.
* @return a string of one character, namely the next available character
* on my stream, if any, else nil.
*/
struct pso_pointer read_character( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer result = nil;
struct pso_pointer stream_pointer = fetch_arg( frame, 0 );
if ( readp( stream_pointer ) ) {
wint_t chr = url_fgetwc( stream_get_url_file( stream_pointer ) );
result = make_character( frame_pointer, chr );
#ifdef DEBUG
debug_printf( DEBUG_IO, 0, L"\nRead character %lc\n", chr );
#endif
}
return result;
}
/**
* @brief advance the `stream` indicated in arg[0] of this stack frame over any
* whitespace characters. The character indicated by arg[2] will be treated as
* potentially the first such character. Returns the first non-space character
* encountered, or an exception.
*/
struct pso_pointer skip_whitespace( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
if ( characterp( character ) ) {
wchar_t wc =
pointer_to_object( character )->payload.character.character;
if ( !iswspace( wc ) && wc != L',' ) {
result = character;
}
}
if ( c_nilp( result ) && readp( stream ) ) {
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
wint_t wc = url_fgetwc( input );
while ( iswspace( wc ) || wc == L',' ) {
wc = url_fgetwc( input );
}
result =
( wc ==
WEOF ) ? make_eof_exception( frame_pointer ) :
make_character( frame_pointer, wc );
}
return result;
}
struct pso_pointer read_list( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
if ( !c_nilp( character ) && characterp( character ) &&
pointer_to_object( character )->payload.character.character ==
SYNTAX_LPAR ) {
// it's OK if an LPAR is passed in, but we don't want it now.
character = nil;
}
if ( !c_nilp( character ) ) {
// if anything other than LPAR is passed in as character, TODO: throw exception.
}
while ( c_nilp( character ) || ( characterp( character ) &&
pointer_to_object( character )->
payload.character.character !=
SYNTAX_RPAR ) ) {
character =
skip_whitespace( make_frame
( 3, frame_pointer, stream, readtable,
character ) );
struct pso_pointer r =
read( make_frame
( 3, frame_pointer, stream, readtable, character ) );
if ( exceptionp( r ) ) {
result = r;
break;
} else {
result = make_cons( frame_pointer, r, result );
character =
skip_whitespace( make_frame
( 3, frame_pointer, stream, readtable,
character ) );
}
}
return consp( result ) ? c_reverse( frame_pointer, result ) : result;
}
/**
* @brief Read one integer from the stream and return it.
*
* For this and all other `read` functions unless documented otherwise, the
* arguments in the frame are expected to be:
*
* 0. The input stream to read from;
* 1. The read table currently in use;
* 2. The character most recently read from that stream.
*/
struct pso_pointer read_number( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
int base = 10;
// TODO: should check for *read-base* in the environment
int64_t value = 0;
if ( readp( stream ) ) {
if ( c_nilp( character ) ) {
character =
read_character( make_frame( 1, frame_pointer, stream ) );
}
wchar_t c = c_nilp( character )
? 0 : pointer_to_object( character )->payload.character.character;
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
for ( ; iswdigit( c ) || c == L','; c = url_fgetwc( input ) ) {
if ( iswdigit( c ) ) {
value = ( value * base ) + ( ( int ) c - ( int ) L'0' );
}
}
url_ungetwc( c, input );
result = make_integer( frame_pointer, value );
} // else exception?
#ifdef DEBUG
debug_printf( DEBUG_IO, 0, L"\nRead number %ld\n", value );
debug_dump_object( result, DEBUG_IO, 1 );
#endif
return result;
}
struct pso_pointer read_symbol( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
if ( readp( stream ) ) {
if ( c_nilp( character ) ) {
character =
read_character( make_frame( 1, frame_pointer, stream ) );
}
wchar_t c = c_nilp( character )
? 0 : pointer_to_object( character )->payload.character.character;
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
for ( ; symbol_char_p( c ); c = url_fgetwc( input ) ) {
result =
make_string_like_thing( frame_pointer, c, result, SYMBOLTAG );
}
url_ungetwc( c, input );
result = c_reverse( frame_pointer, result );
}
#ifdef DEBUG
debug_print( L"\nRead symbol `", DEBUG_IO, 0 );
debug_print_object( result, DEBUG_IO, 0 );
debug_print( L"`\n\t", DEBUG_IO, 0 );
debug_dump_object( result, DEBUG_IO, 1 );
#endif
return result;
}
/**
* @brief Read the next object on the input stream indicated by this stack
* frame, and return a pso_pointer to the object read.
*
* For this and all other `read` functions unless documented otherwise, the
* arguments in the frame are expected to be:
*
* 0. The input stream to read from;
* 1. The read table currently in use;
* 2. The character most recently read from that stream.
*/
struct pso_pointer read( struct pso_pointer frame_pointer ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
if ( c_nilp( stream ) ) {
stream =
make_read_stream( frame_pointer, file_to_url_file( stdin ), nil );
}
if ( c_nilp( readtable ) ) {
readtable = c_assoc( lisp_io_read_table, fetch_env( frame_pointer ) );
}
if ( c_nilp( character ) ) {
character = skip_whitespace( make_frame( 1, frame_pointer, stream ) );
}
struct pso_pointer readmacro = c_assoc( character, readtable );
if ( !c_nilp( readmacro ) ) {
// invoke the read macro on the stream
} else if ( readp( stream ) && characterp( character ) ) {
wchar_t c =
pointer_to_object( character )->payload.character.character;
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
switch ( c ) {
case SYNTAX_SEMICOLON:
for ( c = url_fgetwc( input ); c != '\n';
c = url_fgetwc( input ) );
/* skip all characters from semi-colon to the end of the line */
break;
case SYNTAX_LPAR:
result =
read_list( make_frame( 3, stream, readtable, character ) );
break;
case EOF:
result = make_eof_exception( frame_pointer );
break;
default:
struct pso_pointer next = make_frame( 3, frame_pointer, stream,
readtable,
make_character
( frame_pointer, c ) );
inc_ref( next );
if ( iswdigit( c ) ) {
result = push_local( frame_pointer, read_number( next ) );
} else if ( symbol_char_p( c ) ) {
result = push_local( frame_pointer, read_symbol( next ) );
} else {
// result =
// throw_exception(
// c_string_to_lisp_symbol( L"read" ),
// make_cons(
// c_string_to_lisp_string
// (
// L"Unrecognised
// start of
// input
// character"
// ),
// make_string(
// c, NIL )
// ),
// frame_pointer );
}
// dec_ref( next );
break;
}
}
#ifdef DEBUG
debug_print( L"Read expression: `", DEBUG_IO, 0 );
debug_print_object( result, DEBUG_IO, 0 );
debug_print( L"`\n", DEBUG_IO, 0 );
debug_dump_object( result, DEBUG_IO, 1 );
#endif
return result;
}