216 lines
5.5 KiB
C
216 lines
5.5 KiB
C
/**
|
|
* read.c
|
|
*
|
|
* First pass at a reader, for bootstrapping.
|
|
*
|
|
*
|
|
* (c) 2017 Simon Brooke <simon@journeyman.cc>
|
|
* Licensed under GPL version 2.0, or, at your option, any later version.
|
|
*/
|
|
|
|
#include <math.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
/*
|
|
* wide characters
|
|
*/
|
|
#include <wchar.h>
|
|
#include <wctype.h>
|
|
|
|
#include "consspaceobject.h"
|
|
#include "integer.h"
|
|
#include "intern.h"
|
|
#include "read.h"
|
|
#include "real.h"
|
|
|
|
/*
|
|
* for the time being things which may be read are: strings numbers - either
|
|
* integer or real, but not yet including ratios or bignums lists Can't read
|
|
* atoms because I don't yet know what an atom is or how it's stored.
|
|
*/
|
|
|
|
struct cons_pointer read_number(FILE * input, wint_t initial);
|
|
struct cons_pointer read_list(FILE * input, wint_t initial);
|
|
struct cons_pointer read_string(FILE * input, wint_t initial);
|
|
struct cons_pointer read_symbol(FILE * input, wint_t initial);
|
|
|
|
/**
|
|
* quote reader macro in C (!)
|
|
*/
|
|
struct cons_pointer c_quote(struct cons_pointer arg)
|
|
{
|
|
return make_cons(c_string_to_lisp_symbol("quote"), make_cons(arg, NIL));
|
|
}
|
|
|
|
/**
|
|
* Read the next object on this input stream and return a cons_pointer to it,
|
|
* treating this initial character as the first character of the object
|
|
* representation.
|
|
*/
|
|
struct cons_pointer read_continuation(FILE * input, wint_t initial)
|
|
{
|
|
struct cons_pointer result = NIL;
|
|
|
|
wint_t c;
|
|
|
|
for (c = initial;
|
|
c == '\0' || iswblank(c) || iswcntrl(c); c = fgetwc(input));
|
|
|
|
switch (c) {
|
|
case '\'':
|
|
result = c_quote(read_continuation(input, fgetwc(input)));
|
|
break;
|
|
case '(':
|
|
result = read_list(input, fgetwc(input));
|
|
break;
|
|
case '"':
|
|
result = read_string(input, fgetwc(input));
|
|
break;
|
|
default:
|
|
if (iswdigit(c)) {
|
|
result = read_number(input, c);
|
|
} else if (iswprint(c)) {
|
|
result = read_symbol(input, c);
|
|
} else {
|
|
fprintf(stderr, "Unrecognised start of input character %c\n", c);
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* read a number from this input stream, given this initial character.
|
|
*/
|
|
struct cons_pointer read_number(FILE * input, wint_t initial)
|
|
{
|
|
int accumulator = 0;
|
|
int places_of_decimals = 0;
|
|
bool seen_period = false;
|
|
wint_t c;
|
|
|
|
fprintf(stderr, "read_number starting '%c' (%d)\n", initial, initial);
|
|
|
|
for (c = initial; iswdigit(c) || c == btowc('.'); c = fgetwc(input)) {
|
|
if (c == btowc('.')) {
|
|
seen_period = true;
|
|
} else {
|
|
accumulator = accumulator * 10 + ((int) c - (int) '0');
|
|
|
|
if (seen_period) {
|
|
places_of_decimals++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* push back the character read which was not a digit
|
|
*/
|
|
ungetwc(c, input);
|
|
|
|
if (seen_period) {
|
|
return make_real(accumulator / pow(10, places_of_decimals));
|
|
} else {
|
|
return make_integer(accumulator);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Read a list from this input stream, which no longer contains the opening
|
|
* left parenthesis.
|
|
*/
|
|
struct cons_pointer read_list(FILE * input, wint_t initial)
|
|
{
|
|
struct cons_pointer result = NIL;
|
|
|
|
if (initial != ')') {
|
|
fwprintf(stderr, L"read_list starting '%C' (%d)\n", initial, initial);
|
|
struct cons_pointer car = read_continuation(input, initial);
|
|
result = make_cons(car, read_list(input, fgetwc(input)));
|
|
} else {
|
|
fprintf(stderr, "End of list detected\n");
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Read a string. This means either a string delimited by double quotes
|
|
* (is_quoted == true), in which case it may contain whitespace but may
|
|
* not contain a double quote character (unless escaped), or one not
|
|
* so delimited in which case it may not contain whitespace (unless escaped)
|
|
* but may contain a double quote character (probably not a good idea!)
|
|
*/
|
|
struct cons_pointer read_string(FILE * input, wint_t initial)
|
|
{
|
|
struct cons_pointer cdr = NIL;
|
|
struct cons_pointer result;
|
|
|
|
fwprintf(stderr, L"read_string starting '%C' (%d)\n", initial, initial);
|
|
|
|
switch (initial) {
|
|
case '\0':
|
|
result = make_string(initial, NIL);
|
|
break;
|
|
case '"':
|
|
result = make_string('\0', NIL);
|
|
break;
|
|
default:
|
|
result = make_string(initial, read_string(input, fgetwc(input)));
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
struct cons_pointer read_symbol(FILE * input, wint_t initial)
|
|
{
|
|
struct cons_pointer cdr = NIL;
|
|
struct cons_pointer result;
|
|
|
|
fwprintf(stderr, L"read_symbol starting '%C' (%d)\n", initial, initial);
|
|
|
|
switch (initial) {
|
|
case '\0':
|
|
result = make_symbol(initial, NIL);
|
|
break;
|
|
case '"':
|
|
/*
|
|
* THIS IS NOT A GOOD IDEA, but is legal
|
|
*/
|
|
result = make_symbol(initial, read_symbol(input, fgetwc(input)));
|
|
break;
|
|
case ')':
|
|
/*
|
|
* unquoted strings may not include right-parenthesis
|
|
*/
|
|
result = make_symbol('\0', NIL);
|
|
/*
|
|
* push back the character read
|
|
*/
|
|
ungetwc(initial, input);
|
|
break;
|
|
default:
|
|
if (iswblank(initial) || !iswprint(initial)) {
|
|
result = make_symbol('\0', NIL);
|
|
/*
|
|
* push back the character read
|
|
*/
|
|
ungetwc(initial, input);
|
|
} else {
|
|
result = make_symbol(initial, read_symbol(input, fgetwc(input)));
|
|
}
|
|
break;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Read the next object on this input stream and return a cons_pointer to it.
|
|
*/
|
|
struct cons_pointer read(FILE * input)
|
|
{
|
|
return read_continuation(input, fgetwc(input));
|
|
}
|