Upversioned the C source tree to '0.0.7-SNAPSHOT', but proposing to start experimental
work towards 0.1.0 in separate source trees.
This commit is contained in:
parent
788cb48b37
commit
99d4794f3b
57 changed files with 2 additions and 2 deletions
526
src/c/io/fopen.c
Normal file
526
src/c/io/fopen.c
Normal file
|
|
@ -0,0 +1,526 @@
|
|||
/*
|
||||
* fopen.c
|
||||
*
|
||||
* adapted from https://curl.haxx.se/libcurl/c/fopen.html.
|
||||
*
|
||||
* Modifications to read/write wide character streams by
|
||||
* Simon Brooke.
|
||||
*
|
||||
* NOTE THAT: for my purposes, I'm only interested in wide characters,
|
||||
* and I always read them one character at a time.
|
||||
*
|
||||
* Copyright (c) 2003, 2017 Simtec Electronics
|
||||
* Some portions (c) 2019 Simon Brooke <simon@journeyman.cc>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* This example requires libcurl 7.9.7 or later.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifndef WIN32
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include "io/fopen.h"
|
||||
#ifdef FOPEN_STANDALONE
|
||||
CURLSH *io_share;
|
||||
#else
|
||||
#include "memory/consspaceobject.h"
|
||||
#include "io/io.h"
|
||||
#include "utils.h"
|
||||
#endif
|
||||
|
||||
|
||||
/* exported functions */
|
||||
URL_FILE *url_fopen( const char *url, const char *operation );
|
||||
int url_fclose( URL_FILE * file );
|
||||
int url_feof( URL_FILE * file );
|
||||
size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file );
|
||||
char *url_fgets( char *ptr, size_t size, URL_FILE * file );
|
||||
void url_rewind( URL_FILE * file );
|
||||
|
||||
/* we use a global one for convenience */
|
||||
static CURLM *multi_handle;
|
||||
|
||||
/* curl calls this routine to get more data */
|
||||
static size_t write_callback( char *buffer,
|
||||
size_t size, size_t nitems, void *userp ) {
|
||||
char *newbuff;
|
||||
size_t rembuff;
|
||||
|
||||
URL_FILE *url = ( URL_FILE * ) userp;
|
||||
size *= nitems;
|
||||
|
||||
rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */
|
||||
|
||||
if ( size > rembuff ) {
|
||||
/* not enough space in buffer */
|
||||
newbuff = realloc( url->buffer, url->buffer_len + ( size - rembuff ) );
|
||||
if ( newbuff == NULL ) {
|
||||
fprintf( stderr, "callback buffer grow failed\n" );
|
||||
size = rembuff;
|
||||
} else {
|
||||
/* realloc succeeded increase buffer size */
|
||||
url->buffer_len += size - rembuff;
|
||||
url->buffer = newbuff;
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( &url->buffer[url->buffer_pos], buffer, size );
|
||||
url->buffer_pos += size;
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/* use to attempt to fill the read buffer up to requested number of bytes */
|
||||
static int fill_buffer( URL_FILE *file, size_t want ) {
|
||||
fd_set fdread;
|
||||
fd_set fdwrite;
|
||||
fd_set fdexcep;
|
||||
struct timeval timeout;
|
||||
int rc;
|
||||
CURLMcode mc; /* curl_multi_fdset() return code */
|
||||
|
||||
/* only attempt to fill buffer if transactions still running and buffer
|
||||
* doesn't exceed required size already
|
||||
*/
|
||||
if ( ( !file->still_running ) || ( file->buffer_pos > want ) )
|
||||
return 0;
|
||||
|
||||
/* attempt to fill buffer */
|
||||
do {
|
||||
int maxfd = -1;
|
||||
long curl_timeo = -1;
|
||||
|
||||
FD_ZERO( &fdread );
|
||||
FD_ZERO( &fdwrite );
|
||||
FD_ZERO( &fdexcep );
|
||||
|
||||
/* set a suitable timeout to fail on */
|
||||
timeout.tv_sec = 60; /* 1 minute */
|
||||
timeout.tv_usec = 0;
|
||||
|
||||
curl_multi_timeout( multi_handle, &curl_timeo );
|
||||
if ( curl_timeo >= 0 ) {
|
||||
timeout.tv_sec = curl_timeo / 1000;
|
||||
if ( timeout.tv_sec > 1 )
|
||||
timeout.tv_sec = 1;
|
||||
else
|
||||
timeout.tv_usec = ( curl_timeo % 1000 ) * 1000;
|
||||
}
|
||||
|
||||
/* get file descriptors from the transfers */
|
||||
mc = curl_multi_fdset( multi_handle, &fdread, &fdwrite, &fdexcep,
|
||||
&maxfd );
|
||||
|
||||
if ( mc != CURLM_OK ) {
|
||||
fprintf( stderr, "curl_multi_fdset() failed, code %d.\n", mc );
|
||||
break;
|
||||
}
|
||||
|
||||
/* On success the value of maxfd is guaranteed to be >= -1. We call
|
||||
select(maxfd + 1, ...); specially in case of (maxfd == -1) there are
|
||||
no fds ready yet so we call select(0, ...) --or Sleep() on Windows--
|
||||
to sleep 100ms, which is the minimum suggested value in the
|
||||
curl_multi_fdset() doc. */
|
||||
|
||||
if ( maxfd == -1 ) {
|
||||
#ifdef _WIN32
|
||||
Sleep( 100 );
|
||||
rc = 0;
|
||||
#else
|
||||
/* Portable sleep for platforms other than Windows. */
|
||||
struct timeval wait = { 0, 100 * 1000 }; /* 100ms */
|
||||
rc = select( 0, NULL, NULL, NULL, &wait );
|
||||
#endif
|
||||
} else {
|
||||
/* Note that on some platforms 'timeout' may be modified by select().
|
||||
If you need access to the original value save a copy beforehand. */
|
||||
rc = select( maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout );
|
||||
}
|
||||
|
||||
switch ( rc ) {
|
||||
case -1:
|
||||
/* select error */
|
||||
break;
|
||||
|
||||
case 0:
|
||||
default:
|
||||
/* timeout or readable/writable sockets */
|
||||
curl_multi_perform( multi_handle, &file->still_running );
|
||||
break;
|
||||
}
|
||||
} while ( file->still_running && ( file->buffer_pos < want ) );
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* use to remove want bytes from the front of a files buffer */
|
||||
static int use_buffer( URL_FILE *file, size_t want ) {
|
||||
/* sort out buffer */
|
||||
if ( ( file->buffer_pos - want ) <= 0 ) {
|
||||
/* ditch buffer - write will recreate */
|
||||
free( file->buffer );
|
||||
file->buffer = NULL;
|
||||
file->buffer_pos = 0;
|
||||
file->buffer_len = 0;
|
||||
} else {
|
||||
/* move rest down make it available for later */
|
||||
memmove( file->buffer,
|
||||
&file->buffer[want], ( file->buffer_pos - want ) );
|
||||
|
||||
file->buffer_pos -= want;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
URL_FILE *url_fopen( const char *url, const char *operation ) {
|
||||
/* this code could check for URLs or types in the 'url' and
|
||||
basically use the real fopen() for standard files */
|
||||
|
||||
URL_FILE *file;
|
||||
( void ) operation;
|
||||
|
||||
file = calloc( 1, sizeof( URL_FILE ) );
|
||||
if ( !file )
|
||||
return NULL;
|
||||
|
||||
file->handle.file = fopen( url, operation );
|
||||
if ( file->handle.file ) {
|
||||
file->type = CFTYPE_FILE; /* marked as file */
|
||||
} else if ( index_of( ':', url ) > -1 ) {
|
||||
file->type = CFTYPE_CURL; /* marked as URL */
|
||||
file->handle.curl = curl_easy_init( );
|
||||
|
||||
curl_easy_setopt( file->handle.curl, CURLOPT_URL, url );
|
||||
curl_easy_setopt( file->handle.curl, CURLOPT_WRITEDATA, file );
|
||||
curl_easy_setopt( file->handle.curl, CURLOPT_VERBOSE, 0L );
|
||||
curl_easy_setopt( file->handle.curl, CURLOPT_WRITEFUNCTION,
|
||||
write_callback );
|
||||
/* use the share object */
|
||||
curl_easy_setopt( file->handle.curl, CURLOPT_SHARE, io_share );
|
||||
|
||||
|
||||
if ( !multi_handle )
|
||||
multi_handle = curl_multi_init( );
|
||||
|
||||
curl_multi_add_handle( multi_handle, file->handle.curl );
|
||||
|
||||
/* lets start the fetch */
|
||||
curl_multi_perform( multi_handle, &file->still_running );
|
||||
|
||||
if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) {
|
||||
/* if still_running is 0 now, we should return NULL */
|
||||
|
||||
/* make sure the easy handle is not in the multi handle anymore */
|
||||
curl_multi_remove_handle( multi_handle, file->handle.curl );
|
||||
|
||||
/* cleanup */
|
||||
curl_easy_cleanup( file->handle.curl );
|
||||
|
||||
free( file );
|
||||
|
||||
file = NULL;
|
||||
}
|
||||
} else {
|
||||
file->type = CFTYPE_NONE;
|
||||
/* not a file, and doesn't look like a URL. */
|
||||
}
|
||||
|
||||
return file;
|
||||
}
|
||||
|
||||
int url_fclose( URL_FILE *file ) {
|
||||
int ret = 0; /* default is good return */
|
||||
|
||||
switch ( file->type ) {
|
||||
case CFTYPE_FILE:
|
||||
ret = fclose( file->handle.file ); /* passthrough */
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:
|
||||
/* make sure the easy handle is not in the multi handle anymore */
|
||||
curl_multi_remove_handle( multi_handle, file->handle.curl );
|
||||
|
||||
/* cleanup */
|
||||
curl_easy_cleanup( file->handle.curl );
|
||||
break;
|
||||
|
||||
default: /* unknown or supported type - oh dear */
|
||||
ret = EOF;
|
||||
errno = EBADF;
|
||||
break;
|
||||
}
|
||||
|
||||
free( file->buffer ); /* free any allocated buffer space */
|
||||
free( file );
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int url_feof( URL_FILE *file ) {
|
||||
int ret = 0;
|
||||
|
||||
switch ( file->type ) {
|
||||
case CFTYPE_FILE:
|
||||
ret = feof( file->handle.file );
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:
|
||||
if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) )
|
||||
ret = 1;
|
||||
break;
|
||||
|
||||
default: /* unknown or supported type - oh dear */
|
||||
ret = -1;
|
||||
errno = EBADF;
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE *file ) {
|
||||
size_t want;
|
||||
|
||||
switch ( file->type ) {
|
||||
case CFTYPE_FILE:
|
||||
want = fread( ptr, size, nmemb, file->handle.file );
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:
|
||||
want = nmemb * size;
|
||||
|
||||
fill_buffer( file, want );
|
||||
|
||||
/* check if there's data in the buffer - if not fill_buffer()
|
||||
* either errored or EOF */
|
||||
if ( !file->buffer_pos )
|
||||
return 0;
|
||||
|
||||
/* ensure only available data is considered */
|
||||
if ( file->buffer_pos < want )
|
||||
want = file->buffer_pos;
|
||||
|
||||
/* xfer data to caller */
|
||||
memcpy( ptr, file->buffer, want );
|
||||
|
||||
use_buffer( file, want );
|
||||
|
||||
want = want / size; /* number of items */
|
||||
break;
|
||||
|
||||
default: /* unknown or supported type - oh dear */
|
||||
want = 0;
|
||||
errno = EBADF;
|
||||
break;
|
||||
|
||||
}
|
||||
return want;
|
||||
}
|
||||
|
||||
char *url_fgets( char *ptr, size_t size, URL_FILE *file ) {
|
||||
size_t want = size - 1; /* always need to leave room for zero termination */
|
||||
size_t loop;
|
||||
|
||||
switch ( file->type ) {
|
||||
case CFTYPE_FILE:
|
||||
ptr = fgets( ptr, ( int ) size, file->handle.file );
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:
|
||||
fill_buffer( file, want );
|
||||
|
||||
/* check if there's data in the buffer - if not fill either errored or
|
||||
* EOF */
|
||||
if ( !file->buffer_pos )
|
||||
return NULL;
|
||||
|
||||
/* ensure only available data is considered */
|
||||
if ( file->buffer_pos < want )
|
||||
want = file->buffer_pos;
|
||||
|
||||
/*buffer contains data */
|
||||
/* look for newline or eof */
|
||||
for ( loop = 0; loop < want; loop++ ) {
|
||||
if ( file->buffer[loop] == '\n' ) {
|
||||
want = loop + 1; /* include newline */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* xfer data to caller */
|
||||
memcpy( ptr, file->buffer, want );
|
||||
ptr[want] = 0; /* always null terminate */
|
||||
|
||||
use_buffer( file, want );
|
||||
|
||||
break;
|
||||
|
||||
default: /* unknown or supported type - oh dear */
|
||||
ptr = NULL;
|
||||
errno = EBADF;
|
||||
break;
|
||||
}
|
||||
|
||||
return ptr; /*success */
|
||||
}
|
||||
|
||||
void url_rewind( URL_FILE *file ) {
|
||||
switch ( file->type ) {
|
||||
case CFTYPE_FILE:
|
||||
rewind( file->handle.file ); /* passthrough */
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:
|
||||
/* halt transaction */
|
||||
curl_multi_remove_handle( multi_handle, file->handle.curl );
|
||||
|
||||
/* restart */
|
||||
curl_multi_add_handle( multi_handle, file->handle.curl );
|
||||
|
||||
/* ditch buffer - write will recreate - resets stream pos */
|
||||
free( file->buffer );
|
||||
file->buffer = NULL;
|
||||
file->buffer_pos = 0;
|
||||
file->buffer_len = 0;
|
||||
|
||||
break;
|
||||
|
||||
default: /* unknown or supported type - oh dear */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef FOPEN_STANDALONE
|
||||
#define FGETSFILE "fgets.test"
|
||||
#define FREADFILE "fread.test"
|
||||
#define REWINDFILE "rewind.test"
|
||||
|
||||
/* Small main program to retrieve from a url using fgets and fread saving the
|
||||
* output to two test files (note the fgets method will corrupt binary files if
|
||||
* they contain 0 chars */
|
||||
int main( int argc, char *argv[] ) {
|
||||
URL_FILE *handle;
|
||||
FILE *outf;
|
||||
|
||||
size_t nread;
|
||||
char buffer[256];
|
||||
const char *url;
|
||||
|
||||
CURL *curl;
|
||||
CURLcode res;
|
||||
|
||||
curl_global_init( CURL_GLOBAL_DEFAULT );
|
||||
|
||||
curl = curl_easy_init( );
|
||||
|
||||
|
||||
if ( argc < 2 )
|
||||
url = "http://192.168.7.3/testfile"; /* default to testurl */
|
||||
else
|
||||
url = argv[1]; /* use passed url */
|
||||
|
||||
/* copy from url line by line with fgets */
|
||||
outf = fopen( FGETSFILE, "wb+" );
|
||||
if ( !outf ) {
|
||||
perror( "couldn't open fgets output file\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
handle = url_fopen( url, "r" );
|
||||
if ( !handle ) {
|
||||
printf( "couldn't url_fopen() %s\n", url );
|
||||
fclose( outf );
|
||||
return 2;
|
||||
}
|
||||
|
||||
while ( !url_feof( handle ) ) {
|
||||
url_fgets( buffer, sizeof( buffer ), handle );
|
||||
fwrite( buffer, 1, strlen( buffer ), outf );
|
||||
}
|
||||
|
||||
url_fclose( handle );
|
||||
|
||||
fclose( outf );
|
||||
|
||||
|
||||
/* Copy from url with fread */
|
||||
outf = fopen( FREADFILE, "wb+" );
|
||||
if ( !outf ) {
|
||||
perror( "couldn't open fread output file\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
handle = url_fopen( "testfile", "r" );
|
||||
if ( !handle ) {
|
||||
printf( "couldn't url_fopen() testfile\n" );
|
||||
fclose( outf );
|
||||
return 2;
|
||||
}
|
||||
|
||||
do {
|
||||
nread = url_fread( buffer, 1, sizeof( buffer ), handle );
|
||||
fwrite( buffer, 1, nread, outf );
|
||||
} while ( nread );
|
||||
|
||||
url_fclose( handle );
|
||||
|
||||
fclose( outf );
|
||||
|
||||
|
||||
/* Test rewind */
|
||||
outf = fopen( REWINDFILE, "wb+" );
|
||||
if ( !outf ) {
|
||||
perror( "couldn't open fread output file\n" );
|
||||
return 1;
|
||||
}
|
||||
|
||||
handle = url_fopen( "testfile", "r" );
|
||||
if ( !handle ) {
|
||||
printf( "couldn't url_fopen() testfile\n" );
|
||||
fclose( outf );
|
||||
return 2;
|
||||
}
|
||||
|
||||
nread = url_fread( buffer, 1, sizeof( buffer ), handle );
|
||||
fwrite( buffer, 1, nread, outf );
|
||||
url_rewind( handle );
|
||||
|
||||
buffer[0] = '\n';
|
||||
fwrite( buffer, 1, 1, outf );
|
||||
|
||||
nread = url_fread( buffer, 1, sizeof( buffer ), handle );
|
||||
fwrite( buffer, 1, nread, outf );
|
||||
|
||||
url_fclose( handle );
|
||||
|
||||
fclose( outf );
|
||||
|
||||
return 0; /* all done */
|
||||
}
|
||||
#endif
|
||||
83
src/c/io/fopen.h
Normal file
83
src/c/io/fopen.h
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* fopen.h
|
||||
*
|
||||
* adapted from https://curl.haxx.se/libcurl/c/fopen.html.
|
||||
*
|
||||
*
|
||||
* Modifications to read/write wide character streams by
|
||||
* Simon Brooke.
|
||||
*
|
||||
* NOTE THAT: for my purposes, I'm only interested in wide characters,
|
||||
* and I always read them one character at a time.
|
||||
*
|
||||
* Copyright (c) 2003, 2017 Simtec Electronics
|
||||
* Some portions (c) 2019 Simon Brooke <simon@journeyman.cc>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* This example requires libcurl 7.9.7 or later.
|
||||
*/
|
||||
|
||||
#ifndef __fopen_h
|
||||
#define __fopen_h
|
||||
#include <curl/curl.h>
|
||||
/*
|
||||
* wide characters
|
||||
*/
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#define url_fwprintf(f, ...) ((f->type = CFTYPE_FILE) ? fwprintf( f->handle.file, __VA_ARGS__) : -1)
|
||||
#define url_fputws(ws, f) ((f->type = CFTYPE_FILE) ? fputws(ws, f->handle.file) : 0)
|
||||
#define url_fputwc(wc, f) ((f->type = CFTYPE_FILE) ? fputwc(wc, f->handle.file) : 0)
|
||||
|
||||
enum fcurl_type_e {
|
||||
CFTYPE_NONE = 0,
|
||||
CFTYPE_FILE = 1,
|
||||
CFTYPE_CURL = 2
|
||||
};
|
||||
|
||||
struct fcurl_data {
|
||||
enum fcurl_type_e type; /* type of handle */
|
||||
union {
|
||||
CURL *curl;
|
||||
FILE *file;
|
||||
} handle; /* handle */
|
||||
|
||||
char *buffer; /* buffer to store cached data */
|
||||
size_t buffer_len; /* currently allocated buffer's length */
|
||||
size_t buffer_pos; /* cursor into in buffer */
|
||||
int still_running; /* Is background url fetch still in progress */
|
||||
};
|
||||
|
||||
typedef struct fcurl_data URL_FILE;
|
||||
|
||||
/* exported functions */
|
||||
URL_FILE *url_fopen( const char *url, const char *operation );
|
||||
int url_fclose( URL_FILE * file );
|
||||
int url_feof( URL_FILE * file );
|
||||
size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file );
|
||||
char *url_fgets( char *ptr, size_t size, URL_FILE * file );
|
||||
void url_rewind( URL_FILE * file );
|
||||
|
||||
#endif
|
||||
14
src/c/io/history.c
Normal file
14
src/c/io/history.c
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* history.c
|
||||
*
|
||||
* Maintain, and recall, a history of things which have been read from standard
|
||||
* input. Necessarily the history must be stored on the user session, and not be
|
||||
* global.
|
||||
*
|
||||
* I *think* history will be maintained as a list of forms, not of strings, so
|
||||
* only forms which have successfully been read can be recalled, and forms which
|
||||
* have not been completed when the history function is invoked will be lost.
|
||||
*
|
||||
* (c) 2025 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
14
src/c/io/history.h
Normal file
14
src/c/io/history.h
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* history.h
|
||||
*
|
||||
* Maintain, and recall, a history of things which have been read from standard
|
||||
* input. Necessarily the history must be stored on the user session, and not be
|
||||
* global.
|
||||
*
|
||||
* I *think* history will be maintained as a list of forms, not of strings, so
|
||||
* only forms which have successfully been read can be recalled, and forms which
|
||||
* have not been completed when the history function is invoked will be lost.
|
||||
*
|
||||
* (c) 2025 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
557
src/c/io/io.c
Normal file
557
src/c/io/io.c
Normal file
|
|
@ -0,0 +1,557 @@
|
|||
/*
|
||||
* io.c
|
||||
*
|
||||
* Communication between PSSE and the outside world, via libcurl. NOTE
|
||||
* that this file destructively changes metadata on URL connections,
|
||||
* because the metadata is not available until the stream has been read
|
||||
* from. It would be better to find a workaround!
|
||||
*
|
||||
* (c) 2019 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
|
||||
#include <grp.h>
|
||||
#include <langinfo.h>
|
||||
#include <pwd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <uuid/uuid.h>
|
||||
/*
|
||||
* wide characters
|
||||
*/
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include <curl/curl.h>
|
||||
|
||||
#include "arith/integer.h"
|
||||
#include "debug.h"
|
||||
#include "io/fopen.h"
|
||||
#include "io/io.h"
|
||||
#include "memory/conspage.h"
|
||||
#include "memory/consspaceobject.h"
|
||||
#include "ops/intern.h"
|
||||
#include "ops/lispops.h"
|
||||
#include "utils.h"
|
||||
|
||||
/**
|
||||
* The sharing hub for all connections. TODO: Ultimately this probably doesn't
|
||||
* work for a multi-user environment and we will need one sharing hub for each
|
||||
* user, or else we will need to not share at least cookies and ssl sessions.
|
||||
*/
|
||||
CURLSH *io_share;
|
||||
|
||||
/**
|
||||
* @brief bound to the Lisp string representing C_IO_IN in initialisation.
|
||||
*/
|
||||
struct cons_pointer lisp_io_in = NIL;
|
||||
/**
|
||||
* @brief bound to the Lisp string representing C_IO_OUT in initialisation.
|
||||
*/
|
||||
struct cons_pointer lisp_io_out = NIL;
|
||||
|
||||
|
||||
/**
|
||||
* Allow a one-character unget facility. This may not be enough - we may need
|
||||
* to allocate a buffer.
|
||||
*/
|
||||
wint_t ungotten = 0;
|
||||
|
||||
/**
|
||||
* Initialise the I/O subsystem.
|
||||
*
|
||||
* @return 0 on success; any other value means failure.
|
||||
*/
|
||||
int io_init( ) {
|
||||
int result = curl_global_init( CURL_GLOBAL_SSL );
|
||||
|
||||
io_share = curl_share_init( );
|
||||
|
||||
if ( result == 0 ) {
|
||||
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT );
|
||||
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE );
|
||||
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS );
|
||||
curl_share_setopt( io_share, CURLSHOPT_SHARE,
|
||||
CURL_LOCK_DATA_SSL_SESSION );
|
||||
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this lisp string-like-thing (also works for symbols, and, later
|
||||
* keywords) into a UTF-8 string. NOTE that the returned value has been
|
||||
* malloced and must be freed. TODO: candidate to moving into a utilities
|
||||
* file.
|
||||
*
|
||||
* @param s the lisp string or symbol;
|
||||
* @return the c string.
|
||||
*/
|
||||
char *lisp_string_to_c_string( struct cons_pointer s ) {
|
||||
char *result = NULL;
|
||||
|
||||
if ( stringp( s ) || symbolp( s ) ) {
|
||||
int len = 0;
|
||||
|
||||
for ( struct cons_pointer c = s; !nilp( c );
|
||||
c = pointer2cell( c ).payload.string.cdr ) {
|
||||
len++;
|
||||
}
|
||||
|
||||
wchar_t *buffer = calloc( len + 1, sizeof( wchar_t ) );
|
||||
/* worst case, one wide char = four utf bytes */
|
||||
result = calloc( ( len * 4 ) + 1, sizeof( char ) );
|
||||
|
||||
int i = 0;
|
||||
for ( struct cons_pointer c = s; !nilp( c );
|
||||
c = pointer2cell( c ).payload.string.cdr ) {
|
||||
buffer[i++] = pointer2cell( c ).payload.string.character;
|
||||
}
|
||||
|
||||
wcstombs( result, buffer, len );
|
||||
free( buffer );
|
||||
}
|
||||
|
||||
debug_print( L"lisp_string_to_c_string( ", DEBUG_IO );
|
||||
debug_print_object( s, DEBUG_IO );
|
||||
debug_printf( DEBUG_IO, L") => '%s'\n", result );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* given this file handle f, return a new url_file handle wrapping it.
|
||||
*
|
||||
* @param f the file to be wrapped;
|
||||
* @return the new handle, or null if no such handle could be allocated.
|
||||
*/
|
||||
URL_FILE *file_to_url_file( FILE *f ) {
|
||||
URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) );
|
||||
|
||||
if ( result != NULL ) {
|
||||
result->type = CFTYPE_FILE, result->handle.file = f;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get one wide character from the buffer.
|
||||
*
|
||||
* @param file the stream to read from;
|
||||
* @return the next wide character on the stream, or zero if no more.
|
||||
*/
|
||||
wint_t url_fgetwc( URL_FILE *input ) {
|
||||
wint_t result = -1;
|
||||
|
||||
if ( ungotten != 0 ) {
|
||||
/* TODO: not thread safe */
|
||||
result = ungotten;
|
||||
ungotten = 0;
|
||||
} else {
|
||||
switch ( input->type ) {
|
||||
case CFTYPE_FILE:
|
||||
fwide( input->handle.file, 1 ); /* wide characters */
|
||||
result = fgetwc( input->handle.file ); /* passthrough */
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:{
|
||||
char *cbuff =
|
||||
calloc( sizeof( wchar_t ) + 2, sizeof( char ) );
|
||||
wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) );
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
debug_print( L"url_fgetwc: about to call url_fgets\n",
|
||||
DEBUG_IO );
|
||||
url_fgets( cbuff, 2, input );
|
||||
debug_print( L"url_fgetwc: back from url_fgets\n",
|
||||
DEBUG_IO );
|
||||
int c = ( int ) cbuff[0];
|
||||
// TODO: risk of reading off cbuff?
|
||||
debug_printf( DEBUG_IO,
|
||||
L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n",
|
||||
cbuff, c, c & 0xf7 );
|
||||
/* The value of each individual byte indicates its UTF-8 function, as follows:
|
||||
*
|
||||
* 00 to 7F hex (0 to 127): first and only byte of a sequence.
|
||||
* 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence.
|
||||
* C2 to DF hex (194 to 223): first byte of a two-byte sequence.
|
||||
* E0 to EF hex (224 to 239): first byte of a three-byte sequence.
|
||||
* F0 to FF hex (240 to 255): first byte of a four-byte sequence.
|
||||
*/
|
||||
if ( c <= 0xf7 ) {
|
||||
count = 1;
|
||||
} else if ( c >= 0xc2 && c <= 0xdf ) {
|
||||
count = 2;
|
||||
} else if ( c >= 0xe0 && c <= 0xef ) {
|
||||
count = 3;
|
||||
} else if ( c >= 0xf0 && c <= 0xff ) {
|
||||
count = 4;
|
||||
}
|
||||
|
||||
if ( count > 1 ) {
|
||||
url_fgets( ( char * ) &cbuff[1], count, input );
|
||||
}
|
||||
mbstowcs( wbuff, cbuff, 2 ); //(char *)(&input->buffer[input->buffer_pos]), 1 );
|
||||
result = wbuff[0];
|
||||
|
||||
free( wbuff );
|
||||
free( cbuff );
|
||||
}
|
||||
break;
|
||||
case CFTYPE_NONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
debug_printf( DEBUG_IO, L"url_fgetwc returning %d (%C)\n", result,
|
||||
result );
|
||||
return result;
|
||||
}
|
||||
|
||||
wint_t url_ungetwc( wint_t wc, URL_FILE *input ) {
|
||||
wint_t result = -1;
|
||||
|
||||
switch ( input->type ) {
|
||||
case CFTYPE_FILE:
|
||||
fwide( input->handle.file, 1 ); /* wide characters */
|
||||
result = ungetwc( wc, input->handle.file ); /* passthrough */
|
||||
break;
|
||||
|
||||
case CFTYPE_CURL:{
|
||||
ungotten = wc;
|
||||
break;
|
||||
case CFTYPE_NONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Function, sort-of: close the file indicated by my first arg, and return
|
||||
* nil. If the first arg is not a stream, does nothing. All other args are
|
||||
* ignored.
|
||||
*
|
||||
* * (close stream)
|
||||
*
|
||||
* @param frame my stack_frame.
|
||||
* @param frame_pointer a pointer to my stack_frame.
|
||||
* @param env my environment.
|
||||
* @return T if the stream was successfully closed, else NIL.
|
||||
*/
|
||||
struct cons_pointer
|
||||
lisp_close( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env ) {
|
||||
struct cons_pointer result = NIL;
|
||||
|
||||
if ( readp( frame->arg[0] ) || writep( frame->arg[0] ) ) {
|
||||
if ( url_fclose( pointer2cell( frame->arg[0] ).payload.stream.stream )
|
||||
== 0 ) {
|
||||
result = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct cons_pointer add_meta_integer( struct cons_pointer meta, wchar_t *key,
|
||||
long int value ) {
|
||||
return
|
||||
make_cons( make_cons
|
||||
( c_string_to_lisp_keyword( key ),
|
||||
make_integer( value, NIL ) ), meta );
|
||||
}
|
||||
|
||||
struct cons_pointer add_meta_string( struct cons_pointer meta, wchar_t *key,
|
||||
char *value ) {
|
||||
value = trim( value );
|
||||
wchar_t buffer[strlen( value ) + 1];
|
||||
mbstowcs( buffer, value, strlen( value ) + 1 );
|
||||
|
||||
return make_cons( make_cons( c_string_to_lisp_keyword( key ),
|
||||
c_string_to_lisp_string( buffer ) ), meta );
|
||||
}
|
||||
|
||||
struct cons_pointer add_meta_time( struct cons_pointer meta, wchar_t *key,
|
||||
time_t *value ) {
|
||||
/* I don't yet have a concept of a date-time object, which is a
|
||||
* bit of an oversight! */
|
||||
char datestring[256];
|
||||
|
||||
strftime( datestring,
|
||||
sizeof( datestring ),
|
||||
nl_langinfo( D_T_FMT ), localtime( value ) );
|
||||
|
||||
return add_meta_string( meta, key, datestring );
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback to assemble metadata for a URL stream. This is naughty because
|
||||
* it modifies data, but it's really the only way to create metadata.
|
||||
*/
|
||||
static size_t write_meta_callback( char *string, size_t size, size_t nmemb,
|
||||
struct cons_pointer stream ) {
|
||||
struct cons_space_object *cell = &pointer2cell( stream );
|
||||
|
||||
/* make a copy of the string that we can destructively change */
|
||||
char *s = calloc( strlen( string ), sizeof( char ) );
|
||||
|
||||
strcpy( s, string );
|
||||
|
||||
if ( strncmp( &cell->tag.bytes[0], READTAG, 4 ) ||
|
||||
strncmp( &cell->tag.bytes[0], WRITETAG, 4 ) ) {
|
||||
int offset = index_of( ':', s );
|
||||
|
||||
if ( offset != -1 ) {
|
||||
s[offset] = ( char ) 0;
|
||||
char *name = trim( s );
|
||||
char *value = trim( &s[++offset] );
|
||||
wchar_t wname[strlen( name )];
|
||||
|
||||
mbstowcs( wname, name, strlen( name ) + 1 );
|
||||
|
||||
cell->payload.stream.meta =
|
||||
add_meta_string( cell->payload.stream.meta, wname, value );
|
||||
|
||||
debug_printf( DEBUG_IO,
|
||||
L"write_meta_callback: added header '%s': value '%s'\n",
|
||||
name, value );
|
||||
} else if ( strncmp( "HTTP", s, 4 ) == 0 ) {
|
||||
int offset = index_of( ' ', s );
|
||||
char *value = trim( &s[offset] );
|
||||
|
||||
cell->payload.stream.meta =
|
||||
add_meta_integer( add_meta_string
|
||||
( cell->payload.stream.meta, L"status",
|
||||
value ), L"status-code", strtol( value,
|
||||
NULL,
|
||||
10 ) );
|
||||
|
||||
debug_printf( DEBUG_IO,
|
||||
L"write_meta_callback: added header 'status': value '%s'\n",
|
||||
value );
|
||||
} else {
|
||||
debug_printf( DEBUG_IO,
|
||||
L"write_meta_callback: header passed with no colon: '%s'\n",
|
||||
s );
|
||||
}
|
||||
} else {
|
||||
debug_print
|
||||
( L"Pointer passed to write_meta_callback did not point to a stream: ",
|
||||
DEBUG_IO );
|
||||
debug_dump_object( stream, DEBUG_IO );
|
||||
}
|
||||
|
||||
free( s );
|
||||
return strlen( string );
|
||||
}
|
||||
|
||||
void collect_meta( struct cons_pointer stream, char *url ) {
|
||||
struct cons_space_object *cell = &pointer2cell( stream );
|
||||
URL_FILE *s = pointer2cell( stream ).payload.stream.stream;
|
||||
struct cons_pointer meta =
|
||||
add_meta_string( cell->payload.stream.meta, L"url", url );
|
||||
struct stat statbuf;
|
||||
int result = stat( url, &statbuf );
|
||||
struct passwd *pwd;
|
||||
struct group *grp;
|
||||
|
||||
switch ( s->type ) {
|
||||
case CFTYPE_NONE:
|
||||
break;
|
||||
case CFTYPE_FILE:
|
||||
if ( result == 0 ) {
|
||||
if ( ( pwd = getpwuid( statbuf.st_uid ) ) != NULL ) {
|
||||
meta = add_meta_string( meta, L"owner", pwd->pw_name );
|
||||
} else {
|
||||
meta = add_meta_integer( meta, L"owner", statbuf.st_uid );
|
||||
}
|
||||
|
||||
if ( ( grp = getgrgid( statbuf.st_gid ) ) != NULL ) {
|
||||
meta = add_meta_string( meta, L"group", grp->gr_name );
|
||||
} else {
|
||||
meta = add_meta_integer( meta, L"group", statbuf.st_gid );
|
||||
}
|
||||
|
||||
meta =
|
||||
add_meta_integer( meta, L"size",
|
||||
( intmax_t ) statbuf.st_size );
|
||||
|
||||
meta = add_meta_time( meta, L"modified", &statbuf.st_mtime );
|
||||
}
|
||||
break;
|
||||
case CFTYPE_CURL:
|
||||
curl_easy_setopt( s->handle.curl, CURLOPT_VERBOSE, 1L );
|
||||
curl_easy_setopt( s->handle.curl, CURLOPT_HEADERFUNCTION,
|
||||
write_meta_callback );
|
||||
curl_easy_setopt( s->handle.curl, CURLOPT_HEADERDATA, stream );
|
||||
break;
|
||||
}
|
||||
|
||||
/* this is destructive change before the cell is released into the
|
||||
* wild, and consequently permissible, just. */
|
||||
cell->payload.stream.meta = meta;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resutn the current default input, or of `inputp` is false, output stream from
|
||||
* this `env`ironment.
|
||||
*/
|
||||
struct cons_pointer get_default_stream( bool inputp, struct cons_pointer env ) {
|
||||
struct cons_pointer result = NIL;
|
||||
struct cons_pointer stream_name = inputp ? lisp_io_in : lisp_io_out;
|
||||
|
||||
result = c_assoc( stream_name, env );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Function: return a stream open on the URL indicated by the first argument;
|
||||
* if a second argument is present and is non-nil, open it for writing. At
|
||||
* present, further arguments are ignored and there is no mechanism to open
|
||||
* to append, or error if the URL is faulty or indicates an unavailable
|
||||
* resource.
|
||||
*
|
||||
* * (open url)
|
||||
*
|
||||
* @param frame my stack_frame.
|
||||
* @param frame_pointer a pointer to my stack_frame.
|
||||
* @param env my environment.
|
||||
* @return a string of one character, namely the next available character
|
||||
* on my stream, if any, else NIL.
|
||||
*/
|
||||
struct cons_pointer
|
||||
lisp_open( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env ) {
|
||||
struct cons_pointer result = NIL;
|
||||
|
||||
if ( stringp( frame->arg[0] ) ) {
|
||||
char *url = lisp_string_to_c_string( frame->arg[0] );
|
||||
|
||||
if ( nilp( frame->arg[1] ) ) {
|
||||
URL_FILE *stream = url_fopen( url, "r" );
|
||||
|
||||
debug_printf( DEBUG_IO,
|
||||
L"lisp_open: stream @ %ld, stream type = %d, stream handle = %ld\n",
|
||||
( long int ) &stream, ( int ) stream->type,
|
||||
( long int ) stream->handle.file );
|
||||
|
||||
switch ( stream->type ) {
|
||||
case CFTYPE_NONE:
|
||||
return
|
||||
make_exception( c_string_to_lisp_string
|
||||
( L"Could not open stream" ),
|
||||
frame_pointer );
|
||||
break;
|
||||
case CFTYPE_FILE:
|
||||
if ( stream->handle.file == NULL ) {
|
||||
return
|
||||
make_exception( c_string_to_lisp_string
|
||||
( L"Could not open file" ),
|
||||
frame_pointer );
|
||||
}
|
||||
break;
|
||||
case CFTYPE_CURL:
|
||||
/* can't tell whether a URL is bad without reading it */
|
||||
break;
|
||||
}
|
||||
|
||||
result = make_read_stream( stream, NIL );
|
||||
} else {
|
||||
// TODO: anything more complex is a problem for another day.
|
||||
URL_FILE *stream = url_fopen( url, "w" );
|
||||
result = make_write_stream( stream, NIL );
|
||||
}
|
||||
|
||||
if ( pointer2cell( result ).payload.stream.stream == NULL ) {
|
||||
result = NIL;
|
||||
} else {
|
||||
collect_meta( result, url );
|
||||
}
|
||||
|
||||
free( url );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function: return the next character from the stream indicated by arg 0;
|
||||
* further arguments are ignored.
|
||||
*
|
||||
* * (read-char stream)
|
||||
*
|
||||
* @param frame my stack_frame.
|
||||
* @param frame_pointer a pointer to my stack_frame.
|
||||
* @param env my environment.
|
||||
* @return a string of one character, namely the next available character
|
||||
* on my stream, if any, else NIL.
|
||||
*/
|
||||
struct cons_pointer
|
||||
lisp_read_char( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env ) {
|
||||
struct cons_pointer result = NIL;
|
||||
|
||||
if ( readp( frame->arg[0] ) ) {
|
||||
result =
|
||||
make_string( url_fgetwc
|
||||
( pointer2cell( frame->arg[0] ).payload.
|
||||
stream.stream ), NIL );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function: return a string representing all characters from the stream
|
||||
* indicated by arg 0; further arguments are ignored.
|
||||
*
|
||||
* TODO: it should be possible to optionally pass a string URL to this function,
|
||||
*
|
||||
* * (slurp stream)
|
||||
*
|
||||
* @param frame my stack_frame.
|
||||
* @param frame_pointer a pointer to my stack_frame.
|
||||
* @param env my environment.
|
||||
* @return a string of one character, namely the next available character
|
||||
* on my stream, if any, else NIL.
|
||||
*/
|
||||
struct cons_pointer
|
||||
lisp_slurp( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env ) {
|
||||
struct cons_pointer result = NIL;
|
||||
|
||||
if ( readp( frame->arg[0] ) ) {
|
||||
URL_FILE *stream = pointer2cell( frame->arg[0] ).payload.stream.stream;
|
||||
struct cons_pointer cursor = make_string( url_fgetwc( stream ), NIL );
|
||||
result = cursor;
|
||||
|
||||
for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ) && c != 0;
|
||||
c = url_fgetwc( stream ) ) {
|
||||
debug_print( L"slurp: cursor is: ", DEBUG_IO );
|
||||
debug_dump_object( cursor, DEBUG_IO );
|
||||
debug_print( L"; result is: ", DEBUG_IO );
|
||||
debug_dump_object( result, DEBUG_IO );
|
||||
debug_println( DEBUG_IO );
|
||||
|
||||
struct cons_space_object *cell = &pointer2cell( cursor );
|
||||
cursor = make_string( ( wchar_t ) c, NIL );
|
||||
cell->payload.string.cdr = cursor;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
46
src/c/io/io.h
Normal file
46
src/c/io/io.h
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
/*
|
||||
* io.h
|
||||
*
|
||||
* Communication between PSSE and the outside world, via libcurl.
|
||||
*
|
||||
* (c) 2019 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
|
||||
#ifndef __psse_io_h
|
||||
#define __psse_io_h
|
||||
#include <curl/curl.h>
|
||||
#include "memory/consspaceobject.h"
|
||||
|
||||
extern CURLSH *io_share;
|
||||
|
||||
int io_init( );
|
||||
|
||||
#define C_IO_IN L"*in*"
|
||||
#define C_IO_OUT L"*out*"
|
||||
|
||||
extern struct cons_pointer lisp_io_in;
|
||||
extern struct cons_pointer lisp_io_out;
|
||||
|
||||
URL_FILE *file_to_url_file( FILE * f );
|
||||
wint_t url_fgetwc( URL_FILE * input );
|
||||
wint_t url_ungetwc( wint_t wc, URL_FILE * input );
|
||||
|
||||
struct cons_pointer get_default_stream( bool inputp, struct cons_pointer env );
|
||||
|
||||
struct cons_pointer
|
||||
lisp_close( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env );
|
||||
struct cons_pointer
|
||||
lisp_open( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env );
|
||||
struct cons_pointer
|
||||
lisp_read_char( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env );
|
||||
struct cons_pointer
|
||||
lisp_slurp( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env );
|
||||
|
||||
char *lisp_string_to_c_string( struct cons_pointer s );
|
||||
#endif
|
||||
356
src/c/io/print.c
Normal file
356
src/c/io/print.c
Normal file
|
|
@ -0,0 +1,356 @@
|
|||
/*
|
||||
* print.c
|
||||
*
|
||||
* First pass at a printer, for bootstrapping.
|
||||
*
|
||||
* (c) 2017 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
/*
|
||||
* wide characters
|
||||
*/
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "arith/integer.h"
|
||||
#include "debug.h"
|
||||
#include "io/io.h"
|
||||
#include "io/print.h"
|
||||
#include "memory/conspage.h"
|
||||
#include "memory/consspaceobject.h"
|
||||
#include "memory/hashmap.h"
|
||||
#include "memory/stack.h"
|
||||
#include "memory/vectorspace.h"
|
||||
#include "ops/intern.h"
|
||||
#include "time/psse_time.h"
|
||||
|
||||
/**
|
||||
* print all the characters in the symbol or string indicated by `pointer`
|
||||
* onto this `output`; if `pointer` does not indicate a string or symbol,
|
||||
* don't print anything but just return.
|
||||
*/
|
||||
void print_string_contents( URL_FILE *output, struct cons_pointer pointer ) {
|
||||
while ( stringp( pointer ) || symbolp( pointer ) || keywordp( pointer ) ) {
|
||||
struct cons_space_object *cell = &pointer2cell( pointer );
|
||||
wchar_t c = cell->payload.string.character;
|
||||
|
||||
if ( c != '\0' ) {
|
||||
url_fputwc( c, output );
|
||||
}
|
||||
pointer = cell->payload.string.cdr;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* print all the characters in the string indicated by `pointer` onto
|
||||
* the stream at this `output`, prepending and appending double quote
|
||||
* characters.
|
||||
*/
|
||||
void print_string( URL_FILE *output, struct cons_pointer pointer ) {
|
||||
url_fputwc( btowc( '"' ), output );
|
||||
print_string_contents( output, pointer );
|
||||
url_fputwc( btowc( '"' ), output );
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a single list cell (cons cell) indicated by `pointer` to the
|
||||
* stream indicated by `output`. if `initial_space` is `true`, prepend
|
||||
* a space character.
|
||||
*/
|
||||
void
|
||||
print_list_contents( URL_FILE *output, struct cons_pointer pointer,
|
||||
bool initial_space ) {
|
||||
struct cons_space_object *cell = &pointer2cell( pointer );
|
||||
|
||||
switch ( cell->tag.value ) {
|
||||
case CONSTV:
|
||||
if ( initial_space ) {
|
||||
url_fputwc( btowc( ' ' ), output );
|
||||
}
|
||||
print( output, cell->payload.cons.car );
|
||||
|
||||
print_list_contents( output, cell->payload.cons.cdr, true );
|
||||
break;
|
||||
case NILTV:
|
||||
break;
|
||||
default:
|
||||
url_fwprintf( output, L" . " );
|
||||
print( output, pointer );
|
||||
}
|
||||
}
|
||||
|
||||
void print_list( URL_FILE *output, struct cons_pointer pointer ) {
|
||||
url_fputws( L"(", output );
|
||||
print_list_contents( output, pointer, false );
|
||||
url_fputws( L")", output );
|
||||
}
|
||||
|
||||
void print_map( URL_FILE *output, struct cons_pointer map ) {
|
||||
if ( hashmapp( map ) ) {
|
||||
struct vector_space_object *vso = pointer_to_vso( map );
|
||||
|
||||
url_fputwc( btowc( '{' ), output );
|
||||
|
||||
for ( struct cons_pointer ks = hashmap_keys( map ); !nilp( ks );
|
||||
ks = c_cdr( ks ) ) {
|
||||
struct cons_pointer key = c_car( ks );
|
||||
print( output, key );
|
||||
url_fputwc( btowc( ' ' ), output );
|
||||
print( output, hashmap_get( map, key, false ) );
|
||||
|
||||
if ( !nilp( c_cdr( ks ) ) ) {
|
||||
url_fputws( L", ", output );
|
||||
}
|
||||
}
|
||||
|
||||
url_fputwc( btowc( '}' ), output );
|
||||
}
|
||||
}
|
||||
|
||||
void print_vso( URL_FILE *output, struct cons_pointer pointer ) {
|
||||
struct vector_space_object *vso = pointer_to_vso( pointer );
|
||||
switch ( vso->header.tag.value ) {
|
||||
case HASHTV:
|
||||
print_map( output, pointer );
|
||||
break;
|
||||
case STACKFRAMETV:
|
||||
dump_stack_trace( output, pointer );
|
||||
break;
|
||||
// \todo: others.
|
||||
default:
|
||||
fwprintf( stderr, L"Unrecognised vector-space type '%d'\n",
|
||||
vso->header.tag.value );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* stolen from https://stackoverflow.com/questions/11656241/how-to-print-uint128-t-number-using-gcc
|
||||
*/
|
||||
void print_128bit( URL_FILE *output, __int128_t n ) {
|
||||
if ( n == 0 ) {
|
||||
fwprintf( stderr, L"0" );
|
||||
} else {
|
||||
char str[40] = { 0 }; // log10(1 << 128) + '\0'
|
||||
char *s = str + sizeof( str ) - 1; // start at the end
|
||||
while ( n != 0 ) {
|
||||
if ( s == str )
|
||||
return; // never happens
|
||||
|
||||
*--s = "0123456789"[n % 10]; // save last digit
|
||||
n /= 10; // drop it
|
||||
}
|
||||
url_fwprintf( output, L"%s", s );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Print the cons-space object indicated by `pointer` to the stream indicated
|
||||
* by `output`.
|
||||
*/
|
||||
struct cons_pointer print( URL_FILE *output, struct cons_pointer pointer ) {
|
||||
struct cons_space_object cell = pointer2cell( pointer );
|
||||
char *buffer;
|
||||
|
||||
/*
|
||||
* Because tags have values as well as bytes, this if ... else if
|
||||
* statement can ultimately be replaced by a switch, which will be neater.
|
||||
*/
|
||||
switch ( cell.tag.value ) {
|
||||
case CONSTV:
|
||||
print_list( output, pointer );
|
||||
break;
|
||||
case EXCEPTIONTV:
|
||||
url_fputws( L"\nException: ", output );
|
||||
dump_stack_trace( output, pointer );
|
||||
break;
|
||||
case FUNCTIONTV:
|
||||
url_fputws( L"<Function: ", output );
|
||||
print( output, cell.payload.function.meta );
|
||||
url_fputwc( L'>', output );
|
||||
break;
|
||||
case INTEGERTV:
|
||||
struct cons_pointer s = integer_to_string( pointer, 10 );
|
||||
print_string_contents( output, s );
|
||||
dec_ref( s );
|
||||
break;
|
||||
case KEYTV:
|
||||
url_fputws( L":", output );
|
||||
print_string_contents( output, pointer );
|
||||
break;
|
||||
case LAMBDATV:{
|
||||
url_fputws( L"<Anonymous Function: ", output );
|
||||
struct cons_pointer to_print =
|
||||
make_cons( c_string_to_lisp_symbol( L"\u03bb" ),
|
||||
make_cons( cell.payload.lambda.args,
|
||||
cell.payload.lambda.body ) );
|
||||
|
||||
print( output, to_print );
|
||||
|
||||
dec_ref( to_print );
|
||||
url_fputwc( L'>', output );
|
||||
}
|
||||
break;
|
||||
case NILTV:
|
||||
url_fwprintf( output, L"nil" );
|
||||
break;
|
||||
case NLAMBDATV:{
|
||||
url_fputws( L"<Anonymous Special Form: ", output );
|
||||
struct cons_pointer to_print =
|
||||
make_cons( c_string_to_lisp_symbol( L"n\u03bb" ),
|
||||
make_cons( cell.payload.lambda.args,
|
||||
cell.payload.lambda.body ) );
|
||||
|
||||
print( output, to_print );
|
||||
|
||||
dec_ref( to_print );
|
||||
url_fputwc( L'>', output );
|
||||
}
|
||||
break;
|
||||
case RATIOTV:
|
||||
print( output, cell.payload.ratio.dividend );
|
||||
url_fputws( L"/", output );
|
||||
print( output, cell.payload.ratio.divisor );
|
||||
break;
|
||||
case READTV:
|
||||
url_fwprintf( output, L"<Input stream: " );
|
||||
print( output, cell.payload.stream.meta );
|
||||
url_fputwc( L'>', output );
|
||||
break;
|
||||
case REALTV:
|
||||
/* \todo using the C heap is a bad plan because it will fragment.
|
||||
* As soon as I have working vector space I'll use a special purpose
|
||||
* vector space object */
|
||||
buffer = ( char * ) malloc( 24 );
|
||||
memset( buffer, 0, 24 );
|
||||
/* format it really long, then clear the trailing zeros */
|
||||
sprintf( buffer, "%-.23Lg", cell.payload.real.value );
|
||||
if ( strchr( buffer, '.' ) != NULL ) {
|
||||
for ( int i = strlen( buffer ) - 1; buffer[i] == '0'; i-- ) {
|
||||
buffer[i] = '\0';
|
||||
}
|
||||
}
|
||||
url_fwprintf( output, L"%s", buffer );
|
||||
free( buffer );
|
||||
break;
|
||||
case STRINGTV:
|
||||
print_string( output, pointer );
|
||||
break;
|
||||
case SYMBOLTV:
|
||||
print_string_contents( output, pointer );
|
||||
break;
|
||||
case SPECIALTV:
|
||||
url_fwprintf( output, L"<Special form: " );
|
||||
print( output, cell.payload.special.meta );
|
||||
url_fputwc( L'>', output );
|
||||
break;
|
||||
case TIMETV:
|
||||
url_fwprintf( output, L"<Time: " );
|
||||
print_string( output, time_to_string( pointer ) );
|
||||
url_fputws( L"; ", output );
|
||||
print_128bit( output, cell.payload.time.value );
|
||||
url_fputwc( L'>', output );
|
||||
break;
|
||||
case TRUETV:
|
||||
url_fwprintf( output, L"t" );
|
||||
break;
|
||||
case VECTORPOINTTV:
|
||||
print_vso( output, pointer );
|
||||
break;
|
||||
case WRITETV:
|
||||
url_fwprintf( output, L"<Output stream: " );
|
||||
print( output, cell.payload.stream.meta );
|
||||
url_fputwc( L'>', output );
|
||||
break;
|
||||
default:
|
||||
fwprintf( stderr,
|
||||
L"Error: Unrecognised tag value %d (%4.4s)\n",
|
||||
cell.tag.value, &cell.tag.bytes[0] );
|
||||
// dump_object( stderr, pointer);
|
||||
break;
|
||||
}
|
||||
|
||||
return pointer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function; print one complete lisp expression and return NIL. If write-stream is specified and
|
||||
* is a write stream, then print to that stream, else the stream which is the value of
|
||||
* `*out*` in the environment.
|
||||
*
|
||||
* * (print expr)
|
||||
* * (print expr write-stream)
|
||||
*
|
||||
* @param frame my stack_frame.
|
||||
* @param frame_pointer a pointer to my stack_frame.
|
||||
* @param env my environment (from which the stream may be extracted).
|
||||
* @return NIL.
|
||||
*/
|
||||
struct cons_pointer
|
||||
lisp_print( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env ) {
|
||||
debug_print( L"Entering print\n", DEBUG_IO );
|
||||
struct cons_pointer result = NIL;
|
||||
URL_FILE *output;
|
||||
struct cons_pointer out_stream = writep( frame->arg[1] ) ?
|
||||
frame->arg[1] : get_default_stream( false, env );
|
||||
|
||||
if ( writep( out_stream ) ) {
|
||||
debug_print( L"lisp_print: setting output stream\n", DEBUG_IO );
|
||||
debug_dump_object( out_stream, DEBUG_IO );
|
||||
output = pointer2cell( out_stream ).payload.stream.stream;
|
||||
inc_ref( out_stream );
|
||||
} else {
|
||||
output = file_to_url_file( stderr );
|
||||
}
|
||||
|
||||
debug_print( L"lisp_print: about to print\n", DEBUG_IO );
|
||||
debug_dump_object( frame->arg[0], DEBUG_IO );
|
||||
|
||||
result = print( output, frame->arg[0] );
|
||||
|
||||
debug_print( L"lisp_print returning\n", DEBUG_IO );
|
||||
debug_dump_object( result, DEBUG_IO );
|
||||
|
||||
if ( writep( out_stream ) ) {
|
||||
dec_ref( out_stream );
|
||||
} else {
|
||||
free( output );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void println( URL_FILE *output ) {
|
||||
url_fputws( L"\n", output );
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief `(prinln out-stream)`: Print a new line character to `out-stream`, if
|
||||
* it is specified and is an output stream, else to `*out*`.
|
||||
*
|
||||
* @param frame
|
||||
* @param frame_pointer
|
||||
* @param env
|
||||
* @return `nil`
|
||||
*/
|
||||
struct cons_pointer
|
||||
lisp_println( struct stack_frame *frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env ) {
|
||||
URL_FILE *output;
|
||||
struct cons_pointer out_stream = writep( frame->arg[1] ) ?
|
||||
frame->arg[1] : get_default_stream( false, env );
|
||||
|
||||
if ( writep( out_stream ) ) {
|
||||
output = pointer2cell( out_stream ).payload.stream.stream;
|
||||
|
||||
println( output );
|
||||
}
|
||||
|
||||
return NIL;
|
||||
}
|
||||
30
src/c/io/print.h
Normal file
30
src/c/io/print.h
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
* print.h
|
||||
*
|
||||
* First pass at a printer, for bootstrapping.
|
||||
*
|
||||
*
|
||||
* (c) 2017 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "io/fopen.h"
|
||||
|
||||
#ifndef __print_h
|
||||
#define __print_h
|
||||
|
||||
struct cons_pointer print( URL_FILE * output, struct cons_pointer pointer );
|
||||
void println( URL_FILE * output );
|
||||
|
||||
struct cons_pointer lisp_print( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env );
|
||||
struct cons_pointer lisp_println( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env );
|
||||
|
||||
|
||||
#endif
|
||||
570
src/c/io/read.c
Normal file
570
src/c/io/read.c
Normal file
|
|
@ -0,0 +1,570 @@
|
|||
/*
|
||||
* read.c
|
||||
*
|
||||
* First pass at a reader, for bootstrapping.
|
||||
*
|
||||
*
|
||||
* (c) 2017 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
/*
|
||||
* wide characters
|
||||
*/
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "memory/consspaceobject.h"
|
||||
#include "debug.h"
|
||||
#include "memory/dump.h"
|
||||
#include "memory/hashmap.h"
|
||||
#include "arith/integer.h"
|
||||
#include "ops/intern.h"
|
||||
#include "io/io.h"
|
||||
#include "ops/lispops.h"
|
||||
#include "arith/peano.h"
|
||||
#include "io/print.h"
|
||||
#include "arith/ratio.h"
|
||||
#include "io/read.h"
|
||||
#include "arith/real.h"
|
||||
#include "memory/vectorspace.h"
|
||||
|
||||
// We can't, I think, use libreadline, because we read character by character,
|
||||
// not line by line, and because we use wide characters. So we're going to have
|
||||
// to reimplement it. So we're going to have to maintain history of the forms
|
||||
// (or strings, but I currently think forms). So we're going to have to be able
|
||||
// to detact special keys, particularly, at this stage, the uparrow and down-
|
||||
// arrow keys
|
||||
// #include <readline/readline.h>
|
||||
// #include <readline/history.h>
|
||||
|
||||
|
||||
/*
|
||||
* for the time being things which may be read are:
|
||||
* * strings
|
||||
* * numbers - either integer, ratio or real
|
||||
* * lists
|
||||
* * maps
|
||||
* * keywords
|
||||
* * atoms
|
||||
*/
|
||||
|
||||
struct cons_pointer read_number( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
URL_FILE * input, wint_t initial,
|
||||
bool seen_period );
|
||||
struct cons_pointer read_list( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env,
|
||||
URL_FILE * input, wint_t initial );
|
||||
struct cons_pointer read_map( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env,
|
||||
URL_FILE * input, wint_t initial );
|
||||
struct cons_pointer read_string( URL_FILE * input, wint_t initial );
|
||||
struct cons_pointer read_symbol_or_key( URL_FILE * input, uint32_t tag,
|
||||
wint_t initial );
|
||||
|
||||
/**
|
||||
* quote reader macro in C (!)
|
||||
*/
|
||||
struct cons_pointer c_quote( struct cons_pointer arg ) {
|
||||
return make_cons( c_string_to_lisp_symbol( L"quote" ),
|
||||
make_cons( arg, NIL ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a path macro from the stream. A path macro is expected to be
|
||||
* 1. optionally a leading character such as '/' or '$', followed by
|
||||
* 2. one or more keywords with leading colons (':') but no intervening spaces; or
|
||||
* 3. one or more symbols separated by slashes; or
|
||||
* 4. keywords (with leading colons) interspersed with symbols (prefixed by slashes).
|
||||
*/
|
||||
struct cons_pointer read_path( URL_FILE *input, wint_t initial,
|
||||
struct cons_pointer q ) {
|
||||
bool done = false;
|
||||
struct cons_pointer prefix = NIL;
|
||||
|
||||
switch ( initial ) {
|
||||
case '/':
|
||||
prefix = make_cons( c_string_to_lisp_symbol( L"oblist" ), NIL );
|
||||
break;
|
||||
case '$':
|
||||
case LSESSION:
|
||||
prefix = c_string_to_lisp_symbol( L"session" );
|
||||
break;
|
||||
}
|
||||
|
||||
while ( !done ) {
|
||||
wint_t c = url_fgetwc( input );
|
||||
if ( iswblank( c ) || iswcntrl( c ) ) {
|
||||
done = true;
|
||||
} else if ( url_feof( input ) ) {
|
||||
done = true;
|
||||
} else {
|
||||
switch ( c ) {
|
||||
case ':':
|
||||
q = make_cons( read_symbol_or_key
|
||||
( input, KEYTV, url_fgetwc( input ) ), q );
|
||||
break;
|
||||
case '/':
|
||||
q = make_cons( make_cons
|
||||
( c_string_to_lisp_symbol( L"quote" ),
|
||||
make_cons( read_symbol_or_key
|
||||
( input, SYMBOLTV,
|
||||
url_fgetwc( input ) ),
|
||||
NIL ) ), q );
|
||||
break;
|
||||
default:
|
||||
if ( iswalpha( c ) ) {
|
||||
q = make_cons( read_symbol_or_key
|
||||
( input, SYMBOLTV, c ), q );
|
||||
} else {
|
||||
// TODO: it's really an error. Exception?
|
||||
url_ungetwc( c, input );
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// right, we now have the path we want (reversed) in q.
|
||||
struct cons_pointer r = NIL;
|
||||
|
||||
for ( struct cons_pointer p = q; !nilp( p ); p = c_cdr( p ) ) {
|
||||
r = make_cons( c_car( p ), r );
|
||||
}
|
||||
|
||||
dec_ref( q );
|
||||
|
||||
if ( !nilp( prefix ) ) {
|
||||
r = make_cons( prefix, r );
|
||||
}
|
||||
|
||||
return make_cons( c_string_to_lisp_symbol( L"->" ), r );
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next object on this input stream and return a cons_pointer to it,
|
||||
* treating this initial character as the first character of the object
|
||||
* representation.
|
||||
*/
|
||||
struct cons_pointer read_continuation( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env,
|
||||
URL_FILE *input, wint_t initial ) {
|
||||
debug_print( L"entering read_continuation\n", DEBUG_IO );
|
||||
struct cons_pointer result = NIL;
|
||||
|
||||
wint_t c;
|
||||
|
||||
for ( c = initial;
|
||||
c == '\0' || iswblank( c ) || iswcntrl( c );
|
||||
c = url_fgetwc( input ) );
|
||||
|
||||
if ( url_feof( input ) ) {
|
||||
result =
|
||||
throw_exception( c_string_to_lisp_symbol( L"read" ),
|
||||
c_string_to_lisp_string
|
||||
( L"End of file while reading" ), frame_pointer );
|
||||
} else {
|
||||
switch ( c ) {
|
||||
case ';':
|
||||
for ( c = url_fgetwc( input ); c != '\n';
|
||||
c = url_fgetwc( input ) );
|
||||
/* skip all characters from semi-colon to the end of the line */
|
||||
break;
|
||||
case EOF:
|
||||
result = throw_exception( c_string_to_lisp_symbol( L"read" ),
|
||||
c_string_to_lisp_string
|
||||
( L"End of input while reading" ),
|
||||
frame_pointer );
|
||||
break;
|
||||
case '\'':
|
||||
result =
|
||||
c_quote( read_continuation
|
||||
( frame, frame_pointer, env, input,
|
||||
url_fgetwc( input ) ) );
|
||||
break;
|
||||
case '(':
|
||||
result =
|
||||
read_list( frame, frame_pointer, env, input,
|
||||
url_fgetwc( input ) );
|
||||
break;
|
||||
case '{':
|
||||
result = read_map( frame, frame_pointer, env, input,
|
||||
url_fgetwc( input ) );
|
||||
break;
|
||||
case '"':
|
||||
result = read_string( input, url_fgetwc( input ) );
|
||||
break;
|
||||
case '-':{
|
||||
wint_t next = url_fgetwc( input );
|
||||
url_ungetwc( next, input );
|
||||
if ( iswdigit( next ) ) {
|
||||
result =
|
||||
read_number( frame, frame_pointer, input, c,
|
||||
false );
|
||||
} else {
|
||||
result = read_symbol_or_key( input, SYMBOLTV, c );
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '.':
|
||||
{
|
||||
wint_t next = url_fgetwc( input );
|
||||
if ( iswdigit( next ) ) {
|
||||
url_ungetwc( next, input );
|
||||
result =
|
||||
read_number( frame, frame_pointer, input, c,
|
||||
true );
|
||||
} else if ( iswblank( next ) ) {
|
||||
/* dotted pair. \todo this isn't right, we
|
||||
* really need to backtrack up a level. */
|
||||
result =
|
||||
read_continuation( frame, frame_pointer, env,
|
||||
input, url_fgetwc( input ) );
|
||||
debug_print
|
||||
( L"read_continuation: dotted pair; read cdr ",
|
||||
DEBUG_IO );
|
||||
} else {
|
||||
read_symbol_or_key( input, SYMBOLTV, c );
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ':':
|
||||
result =
|
||||
read_symbol_or_key( input, KEYTV, url_fgetwc( input ) );
|
||||
break;
|
||||
case '/':
|
||||
{
|
||||
/* slash followed by whitespace is legit provided it's not
|
||||
* preceded by anything - it's the division operator. Otherwise,
|
||||
* it's terminal, probably part of a path, and needs pushed back.
|
||||
*/
|
||||
wint_t cn = url_fgetwc( input );
|
||||
if ( nilp( result )
|
||||
&& ( iswblank( cn ) || iswcntrl( cn ) ) ) {
|
||||
url_ungetwc( cn, input );
|
||||
result = make_symbol_or_key( c, NIL, SYMBOLTV );
|
||||
} else {
|
||||
url_ungetwc( cn, input );
|
||||
result = read_path( input, c, NIL );
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '$':
|
||||
case LSESSION:
|
||||
result = read_path( input, c, NIL );
|
||||
break;
|
||||
default:
|
||||
if ( iswdigit( c ) ) {
|
||||
result =
|
||||
read_number( frame, frame_pointer, input, c, false );
|
||||
} else if ( iswprint( c ) ) {
|
||||
result = read_symbol_or_key( input, SYMBOLTV, c );
|
||||
} else {
|
||||
result =
|
||||
throw_exception( c_string_to_lisp_symbol( L"read" ),
|
||||
make_cons( c_string_to_lisp_string
|
||||
( L"Unrecognised start of input character" ),
|
||||
make_string( c, NIL ) ),
|
||||
frame_pointer );
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
debug_print( L"read_continuation returning\n", DEBUG_IO );
|
||||
debug_dump_object( result, DEBUG_IO );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* read a number from this input stream, given this initial character.
|
||||
* \todo Need to do a lot of inc_ref and dec_ref, to make sure the
|
||||
* garbage is collected.
|
||||
*/
|
||||
struct cons_pointer read_number( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
URL_FILE *input,
|
||||
wint_t initial, bool seen_period ) {
|
||||
debug_print( L"entering read_number\n", DEBUG_IO );
|
||||
|
||||
struct cons_pointer result = acquire_integer( 0, NIL );
|
||||
/* \todo we really need to be getting `base` from a privileged Lisp name -
|
||||
* and it should be the same privileged name we use when writing numbers */
|
||||
struct cons_pointer base = acquire_integer( 10, NIL );
|
||||
struct cons_pointer dividend = NIL;
|
||||
int places_of_decimals = 0;
|
||||
wint_t c;
|
||||
bool neg = initial == btowc( '-' );
|
||||
|
||||
if ( neg ) {
|
||||
initial = url_fgetwc( input );
|
||||
}
|
||||
|
||||
debug_printf( DEBUG_IO, L"read_number starting '%c' (%d)\n", initial,
|
||||
initial );
|
||||
|
||||
for ( c = initial; iswdigit( c )
|
||||
|| c == LPERIOD || c == LSLASH || c == LCOMMA;
|
||||
c = url_fgetwc( input ) ) {
|
||||
switch ( c ) {
|
||||
case LPERIOD:
|
||||
if ( seen_period || !nilp( dividend ) ) {
|
||||
return throw_exception( c_string_to_lisp_symbol( L"read" ),
|
||||
c_string_to_lisp_string
|
||||
( L"Malformed number: too many periods" ),
|
||||
frame_pointer );
|
||||
} else {
|
||||
debug_print( L"read_number: decimal point seen\n",
|
||||
DEBUG_IO );
|
||||
seen_period = true;
|
||||
}
|
||||
break;
|
||||
case LSLASH:
|
||||
if ( seen_period || !nilp( dividend ) ) {
|
||||
return throw_exception( c_string_to_lisp_symbol( L"read" ),
|
||||
c_string_to_lisp_string
|
||||
( L"Malformed number: dividend of rational must be integer" ),
|
||||
frame_pointer );
|
||||
} else {
|
||||
debug_print( L"read_number: ratio slash seen\n",
|
||||
DEBUG_IO );
|
||||
dividend = result;
|
||||
|
||||
result = acquire_integer( 0, NIL );
|
||||
// If I do replace_integer_p here instead of acquire_integer,
|
||||
// and thus reclaim the garbage, I get a regression. Dom't yet
|
||||
// know why.
|
||||
}
|
||||
break;
|
||||
case LCOMMA:
|
||||
// silently ignore comma.
|
||||
break;
|
||||
default:
|
||||
result = add_integers( multiply_integers( result, base ),
|
||||
acquire_integer( ( int ) c -
|
||||
( int ) '0', NIL ) );
|
||||
|
||||
debug_printf( DEBUG_IO,
|
||||
L"read_number: added character %c, result now ",
|
||||
c );
|
||||
debug_print_object( result, DEBUG_IO );
|
||||
debug_print( L"\n", DEBUG_IO );
|
||||
|
||||
if ( seen_period ) {
|
||||
places_of_decimals++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* push back the character read which was not a digit
|
||||
*/
|
||||
url_ungetwc( c, input );
|
||||
|
||||
if ( seen_period ) {
|
||||
debug_print( L"read_number: converting result to real\n", DEBUG_IO );
|
||||
struct cons_pointer div = make_ratio( result,
|
||||
acquire_integer( powl
|
||||
( to_long_double
|
||||
( base ),
|
||||
places_of_decimals ),
|
||||
NIL ), true );
|
||||
inc_ref( div );
|
||||
|
||||
result = make_real( to_long_double( div ) );
|
||||
|
||||
dec_ref( div );
|
||||
} else if ( integerp( dividend ) ) {
|
||||
debug_print( L"read_number: converting result to ratio\n", DEBUG_IO );
|
||||
result = make_ratio( dividend, result, true );
|
||||
}
|
||||
|
||||
if ( neg ) {
|
||||
debug_print( L"read_number: converting result to negative\n",
|
||||
DEBUG_IO );
|
||||
|
||||
result = negative( result );
|
||||
}
|
||||
|
||||
debug_print( L"read_number returning\n", DEBUG_IO );
|
||||
debug_dump_object( result, DEBUG_IO );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a list from this input stream, which no longer contains the opening
|
||||
* left parenthesis.
|
||||
*/
|
||||
struct cons_pointer read_list( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env,
|
||||
URL_FILE *input, wint_t initial ) {
|
||||
struct cons_pointer result = NIL;
|
||||
wint_t c;
|
||||
|
||||
if ( initial != ')' ) {
|
||||
debug_printf( DEBUG_IO,
|
||||
L"read_list starting '%C' (%d)\n", initial, initial );
|
||||
struct cons_pointer car =
|
||||
read_continuation( frame, frame_pointer, env, input,
|
||||
initial );
|
||||
|
||||
/* skip whitespace */
|
||||
for ( c = url_fgetwc( input );
|
||||
iswblank( c ) || iswcntrl( c ); c = url_fgetwc( input ) );
|
||||
|
||||
if ( c == LPERIOD ) {
|
||||
/* might be a dotted pair; indeed, if we rule out numbers with
|
||||
* initial periods, it must be a dotted pair. \todo Ought to check,
|
||||
* howerver, that there's only one form after the period. */
|
||||
result =
|
||||
make_cons( car,
|
||||
c_car( read_list( frame,
|
||||
frame_pointer,
|
||||
env,
|
||||
input, url_fgetwc( input ) ) ) );
|
||||
} else {
|
||||
result =
|
||||
make_cons( car,
|
||||
read_list( frame, frame_pointer, env, input, c ) );
|
||||
}
|
||||
} else {
|
||||
debug_print( L"End of list detected\n", DEBUG_IO );
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct cons_pointer read_map( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env,
|
||||
URL_FILE *input, wint_t initial ) {
|
||||
// set write ACL to true whilst creating to prevent GC churn
|
||||
struct cons_pointer result =
|
||||
make_hashmap( DFLT_HASHMAP_BUCKETS, NIL, TRUE );
|
||||
wint_t c = initial;
|
||||
|
||||
while ( c != LCBRACE ) {
|
||||
struct cons_pointer key =
|
||||
read_continuation( frame, frame_pointer, env, input, c );
|
||||
|
||||
/* skip whitespace */
|
||||
for ( c = url_fgetwc( input ); iswblank( c ) || iswcntrl( c );
|
||||
c = url_fgetwc( input ) );
|
||||
|
||||
struct cons_pointer value =
|
||||
read_continuation( frame, frame_pointer, env, input, c );
|
||||
|
||||
/* skip commaa and whitespace at this point. */
|
||||
for ( c = url_fgetwc( input );
|
||||
c == LCOMMA || iswblank( c ) || iswcntrl( c );
|
||||
c = url_fgetwc( input ) );
|
||||
|
||||
result =
|
||||
hashmap_put( result, key,
|
||||
eval_form( frame, frame_pointer, value, env ) );
|
||||
}
|
||||
|
||||
// default write ACL for maps should be NIL.
|
||||
pointer_to_vso( result )->payload.hashmap.write_acl = NIL;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a string. This means either a string delimited by double quotes
|
||||
* (is_quoted == true), in which case it may contain whitespace but may
|
||||
* not contain a double quote character (unless escaped), or one not
|
||||
* so delimited in which case it may not contain whitespace (unless escaped)
|
||||
* but may contain a double quote character (probably not a good idea!)
|
||||
*/
|
||||
struct cons_pointer read_string( URL_FILE *input, wint_t initial ) {
|
||||
struct cons_pointer cdr = NIL;
|
||||
struct cons_pointer result;
|
||||
switch ( initial ) {
|
||||
case '\0':
|
||||
result = NIL;
|
||||
break;
|
||||
case '"':
|
||||
/* making a string of the null character means we can have an empty
|
||||
* string. Just returning NIL here would make an empty string
|
||||
* impossible. */
|
||||
result = make_string( '\0', NIL );
|
||||
break;
|
||||
default:
|
||||
result =
|
||||
make_string( initial,
|
||||
read_string( input, url_fgetwc( input ) ) );
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct cons_pointer read_symbol_or_key( URL_FILE *input, uint32_t tag,
|
||||
wint_t initial ) {
|
||||
struct cons_pointer cdr = NIL;
|
||||
struct cons_pointer result;
|
||||
switch ( initial ) {
|
||||
case '\0':
|
||||
result = make_symbol_or_key( initial, NIL, tag );
|
||||
break;
|
||||
case '"':
|
||||
case '\'':
|
||||
/* unwise to allow embedded quotation marks in symbols */
|
||||
case ')':
|
||||
case ':':
|
||||
case '/':
|
||||
/*
|
||||
* symbols and keywords may not include right-parenthesis,
|
||||
* slashes or colons.
|
||||
*/
|
||||
result = NIL;
|
||||
/*
|
||||
* push back the character read
|
||||
*/
|
||||
url_ungetwc( initial, input );
|
||||
break;
|
||||
default:
|
||||
if ( iswprint( initial )
|
||||
&& !iswblank( initial ) ) {
|
||||
result =
|
||||
make_symbol_or_key( initial,
|
||||
read_symbol_or_key( input,
|
||||
tag,
|
||||
url_fgetwc
|
||||
( input ) ), tag );
|
||||
} else {
|
||||
result = NIL;
|
||||
/*
|
||||
* push back the character read
|
||||
*/
|
||||
url_ungetwc( initial, input );
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
debug_print( L"read_symbol_or_key returning\n", DEBUG_IO );
|
||||
debug_dump_object( result, DEBUG_IO );
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next object on this input stream and return a cons_pointer to it.
|
||||
*/
|
||||
struct cons_pointer read( struct
|
||||
stack_frame
|
||||
*frame, struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env, URL_FILE *input ) {
|
||||
return read_continuation( frame, frame_pointer, env, input,
|
||||
url_fgetwc( input ) );
|
||||
}
|
||||
32
src/c/io/read.h
Normal file
32
src/c/io/read.h
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
/**
|
||||
* read.c
|
||||
*
|
||||
* First pass at a reader, for bootstrapping.
|
||||
*
|
||||
*
|
||||
* (c) 2017 Simon Brooke <simon@journeyman.cc>
|
||||
* Licensed under GPL version 2.0, or, at your option, any later version.
|
||||
*/
|
||||
|
||||
#ifndef __read_h
|
||||
#define __read_h
|
||||
|
||||
#include "memory/consspaceobject.h"
|
||||
|
||||
/* characters (other than arabic numberals) used in number representations */
|
||||
#define LCOMMA L','
|
||||
#define LPERIOD L'.'
|
||||
#define LSLASH L'/'
|
||||
/* ... used in map representations */
|
||||
#define LCBRACE L'}'
|
||||
/* ... used in path representations */
|
||||
#define LSESSION L'§'
|
||||
|
||||
/**
|
||||
* read the next object on this input stream and return a cons_pointer to it.
|
||||
*/
|
||||
struct cons_pointer read( struct stack_frame *frame,
|
||||
struct cons_pointer frame_pointer,
|
||||
struct cons_pointer env, URL_FILE * input );
|
||||
|
||||
#endif
|
||||
Loading…
Add table
Add a link
Reference in a new issue