diff --git a/.gitignore b/.gitignore index 6fa1cd9..ec1281e 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,5 @@ utils_src/readprintwc/out *.dump *.bak + +src/io/fopen diff --git a/lisp/slurp.lisp b/lisp/slurp.lisp new file mode 100644 index 0000000..e927bcb --- /dev/null +++ b/lisp/slurp.lisp @@ -0,0 +1 @@ +(slurp (set! f (open "http://www.journeyman.cc/"))) diff --git a/src/debug.c b/src/debug.c index 14881f9..c8b9771 100644 --- a/src/debug.c +++ b/src/debug.c @@ -19,9 +19,9 @@ #include #include "consspaceobject.h" -#include "fopen.h" #include "debug.h" #include "dump.h" +#include "io.h" #include "print.h" /** diff --git a/src/io/fopen.c b/src/io/fopen.c index 3c26cd9..f0ea012 100644 --- a/src/io/fopen.c +++ b/src/io/fopen.c @@ -37,517 +37,510 @@ * This example requires libcurl 7.9.7 or later. */ -#include #include -#include #include #ifndef WIN32 #include #endif +#include +#include #include -#include "debug.h" -#include "fopen.h" +enum fcurl_type_e { + CFTYPE_NONE = 0, + CFTYPE_FILE = 1, + CFTYPE_CURL = 2 +}; + +struct fcurl_data +{ + enum fcurl_type_e type; /* type of handle */ + union { + CURL *curl; + FILE *file; + } handle; /* handle */ + + char *buffer; /* buffer to store cached data*/ + size_t buffer_len; /* currently allocated buffers length */ + size_t buffer_pos; /* end of data in buffer*/ + int still_running; /* Is background url fetch still in progress */ +}; + +typedef struct fcurl_data URL_FILE; + +/* exported functions */ +URL_FILE *url_fopen(const char *url, const char *operation); +int url_fclose(URL_FILE *file); +int url_feof(URL_FILE *file); +size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file); +char *url_fgets(char *ptr, size_t size, URL_FILE *file); +void url_rewind(URL_FILE *file); /* we use a global one for convenience */ static CURLM *multi_handle; -wint_t ungotten = 0; - /* curl calls this routine to get more data */ -static size_t write_callback( char *buffer, - size_t size, size_t nitems, void *userp ) { - char *newbuff; - size_t rembuff; +static size_t write_callback(char *buffer, + size_t size, + size_t nitems, + void *userp) +{ + char *newbuff; + size_t rembuff; - URL_FILE *url = ( URL_FILE * ) userp; - size *= nitems; + URL_FILE *url = (URL_FILE *)userp; + size *= nitems; - rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */ + rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */ - if ( size > rembuff ) { - /* not enough space in buffer */ - newbuff = realloc( url->buffer, url->buffer_len + ( size - rembuff ) ); - if ( newbuff == NULL ) { - fprintf( stderr, "callback buffer grow failed\n" ); - size = rembuff; - } else { - /* realloc succeeded increase buffer size */ - url->buffer_len += size - rembuff; - url->buffer = newbuff; - } + if(size > rembuff) { + /* not enough space in buffer */ + newbuff = realloc(url->buffer, url->buffer_len + (size - rembuff)); + if(newbuff == NULL) { + fprintf(stderr, "callback buffer grow failed\n"); + size = rembuff; } + else { + /* realloc succeeded increase buffer size*/ + url->buffer_len += size - rembuff; + url->buffer = newbuff; + } + } - memcpy( &url->buffer[url->buffer_pos], buffer, size ); - url->buffer_pos += size; + memcpy(&url->buffer[url->buffer_pos], buffer, size); + url->buffer_pos += size; - return size; + return size; } /* use to attempt to fill the read buffer up to requested number of bytes */ -static int fill_buffer( URL_FILE * file, size_t want ) { - fd_set fdread; - fd_set fdwrite; - fd_set fdexcep; - struct timeval timeout; - int rc; - CURLMcode mc; /* curl_multi_fdset() return code */ +static int fill_buffer(URL_FILE *file, size_t want) +{ + fd_set fdread; + fd_set fdwrite; + fd_set fdexcep; + struct timeval timeout; + int rc; + CURLMcode mc; /* curl_multi_fdset() return code */ - /* only attempt to fill buffer if transactions still running and buffer - * doesn't exceed required size already - */ - if ( ( !file->still_running ) || ( file->buffer_pos > want ) ) - return 0; + /* only attempt to fill buffer if transactions still running and buffer + * doesn't exceed required size already + */ + if((!file->still_running) || (file->buffer_pos > want)) + return 0; - /* attempt to fill buffer */ - do { - int maxfd = -1; - long curl_timeo = -1; + /* attempt to fill buffer */ + do { + int maxfd = -1; + long curl_timeo = -1; - FD_ZERO( &fdread ); - FD_ZERO( &fdwrite ); - FD_ZERO( &fdexcep ); + FD_ZERO(&fdread); + FD_ZERO(&fdwrite); + FD_ZERO(&fdexcep); - /* set a suitable timeout to fail on */ - timeout.tv_sec = 60; /* 1 minute */ - timeout.tv_usec = 0; + /* set a suitable timeout to fail on */ + timeout.tv_sec = 60; /* 1 minute */ + timeout.tv_usec = 0; - curl_multi_timeout( multi_handle, &curl_timeo ); - if ( curl_timeo >= 0 ) { - timeout.tv_sec = curl_timeo / 1000; - if ( timeout.tv_sec > 1 ) - timeout.tv_sec = 1; - else - timeout.tv_usec = ( curl_timeo % 1000 ) * 1000; - } + curl_multi_timeout(multi_handle, &curl_timeo); + if(curl_timeo >= 0) { + timeout.tv_sec = curl_timeo / 1000; + if(timeout.tv_sec > 1) + timeout.tv_sec = 1; + else + timeout.tv_usec = (curl_timeo % 1000) * 1000; + } - /* get file descriptors from the transfers */ - mc = curl_multi_fdset( multi_handle, &fdread, &fdwrite, &fdexcep, - &maxfd ); + /* get file descriptors from the transfers */ + mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd); - if ( mc != CURLM_OK ) { - fprintf( stderr, "curl_multi_fdset() failed, code %d.\n", mc ); - break; - } + if(mc != CURLM_OK) { + fprintf(stderr, "curl_multi_fdset() failed, code %d.\n", mc); + break; + } - /* On success the value of maxfd is guaranteed to be >= -1. We call - select(maxfd + 1, ...); specially in case of (maxfd == -1) there are - no fds ready yet so we call select(0, ...) --or Sleep() on Windows-- - to sleep 100ms, which is the minimum suggested value in the - curl_multi_fdset() doc. */ + /* On success the value of maxfd is guaranteed to be >= -1. We call + select(maxfd + 1, ...); specially in case of (maxfd == -1) there are + no fds ready yet so we call select(0, ...) --or Sleep() on Windows-- + to sleep 100ms, which is the minimum suggested value in the + curl_multi_fdset() doc. */ - if ( maxfd == -1 ) { + if(maxfd == -1) { #ifdef _WIN32 - Sleep( 100 ); - rc = 0; + Sleep(100); + rc = 0; #else - /* Portable sleep for platforms other than Windows. */ - struct timeval wait = { 0, 100 * 1000 }; /* 100ms */ - rc = select( 0, NULL, NULL, NULL, &wait ); + /* Portable sleep for platforms other than Windows. */ + struct timeval wait = { 0, 100 * 1000 }; /* 100ms */ + rc = select(0, NULL, NULL, NULL, &wait); #endif - } else { - /* Note that on some platforms 'timeout' may be modified by select(). - If you need access to the original value save a copy beforehand. */ - rc = select( maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout ); - } + } + else { + /* Note that on some platforms 'timeout' may be modified by select(). + If you need access to the original value save a copy beforehand. */ + rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout); + } - switch ( rc ) { - case -1: - /* select error */ - break; + switch(rc) { + case -1: + /* select error */ + break; - case 0: - default: - /* timeout or readable/writable sockets */ - curl_multi_perform( multi_handle, &file->still_running ); - break; - } - } while ( file->still_running && ( file->buffer_pos < want ) ); - - return 1; + case 0: + default: + /* timeout or readable/writable sockets */ + curl_multi_perform(multi_handle, &file->still_running); + break; + } + } while(file->still_running && (file->buffer_pos < want)); + return 1; } /* use to remove want bytes from the front of a files buffer */ -static int use_buffer( URL_FILE * file, size_t want ) { - /* sort out buffer */ - if ( ( file->buffer_pos - want ) <= 0 ) { - /* ditch buffer - write will recreate */ - free( file->buffer ); - file->buffer = NULL; - file->buffer_pos = 0; - file->buffer_len = 0; - } else { - /* move rest down make it available for later */ - memmove( file->buffer, - &file->buffer[want], ( file->buffer_pos - want ) ); +static int use_buffer(URL_FILE *file, size_t want) +{ + /* sort out buffer */ + if((file->buffer_pos - want) <= 0) { + /* ditch buffer - write will recreate */ + free(file->buffer); + file->buffer = NULL; + file->buffer_pos = 0; + file->buffer_len = 0; + } + else { + /* move rest down make it available for later */ + memmove(file->buffer, + &file->buffer[want], + (file->buffer_pos - want)); - file->buffer_pos -= want; - } - return 0; + file->buffer_pos -= want; + } + return 0; } -/** - * consume one wide character on the buffer of this file. - * - * @param file the url or file from which the character is consumed. - */ -static int use_one_wide( URL_FILE * file ) { - int c = ( int ) file->buffer[file->buffer_pos]; - size_t count = 0; +URL_FILE *url_fopen(const char *url, const char *operation) +{ + /* this code could check for URLs or types in the 'url' and + basically use the real fopen() for standard files */ - /* The value of each individual byte indicates its UTF-8 function, as follows: - * - * 00 to 7F hex (0 to 127): first and only byte of a sequence. - * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence. - * C2 to DF hex (194 to 223): first byte of a two-byte sequence. - * E0 to EF hex (224 to 239): first byte of a three-byte sequence. - * F0 to FF hex (240 to 255): first byte of a four-byte sequence. - */ - if ( c <= '0x07' ) { - count = 1; - } else if ( c >= '0xc2' && c <= '0xdf' ) { - count = 2; - } else if ( c >= '0xe0' && c <= '0xef' ) { - count = 3; - } else if ( c >= '0xf0' && c <= '0xff' ) { - count = 4; + URL_FILE *file; + (void)operation; + + file = calloc(1, sizeof(URL_FILE)); + if(!file) + return NULL; + + file->handle.file = fopen(url, operation); + if(file->handle.file) + file->type = CFTYPE_FILE; /* marked as URL */ + + else { + file->type = CFTYPE_CURL; /* marked as URL */ + file->handle.curl = curl_easy_init(); + + curl_easy_setopt(file->handle.curl, CURLOPT_URL, url); + curl_easy_setopt(file->handle.curl, CURLOPT_WRITEDATA, file); + curl_easy_setopt(file->handle.curl, CURLOPT_VERBOSE, 0L); + curl_easy_setopt(file->handle.curl, CURLOPT_WRITEFUNCTION, write_callback); + + if(!multi_handle) + multi_handle = curl_multi_init(); + + curl_multi_add_handle(multi_handle, file->handle.curl); + + /* lets start the fetch */ + curl_multi_perform(multi_handle, &file->still_running); + + if((file->buffer_pos == 0) && (!file->still_running)) { + /* if still_running is 0 now, we should return NULL */ + + /* make sure the easy handle is not in the multi handle anymore */ + curl_multi_remove_handle(multi_handle, file->handle.curl); + + /* cleanup */ + curl_easy_cleanup(file->handle.curl); + + free(file); + + file = NULL; } - - return use_buffer( file, c ); + } + return file; } -URL_FILE *url_fopen( const char *url, const char *operation ) { - /* this code could check for URLs or types in the 'url' and - basically use the real fopen() for standard files */ +int url_fclose(URL_FILE *file) +{ + int ret = 0;/* default is good return */ - URL_FILE *file; - ( void ) operation; + switch(file->type) { + case CFTYPE_FILE: + ret = fclose(file->handle.file); /* passthrough */ + break; - file = calloc( 1, sizeof( URL_FILE ) ); - if ( !file ) - return NULL; + case CFTYPE_CURL: + /* make sure the easy handle is not in the multi handle anymore */ + curl_multi_remove_handle(multi_handle, file->handle.curl); - file->handle.file = fopen( url, operation ); - if ( file->handle.file ) - file->type = CFTYPE_FILE; /* marked as URL */ + /* cleanup */ + curl_easy_cleanup(file->handle.curl); + break; - else { - file->type = CFTYPE_CURL; /* marked as URL */ - file->handle.curl = curl_easy_init( ); + default: /* unknown or supported type - oh dear */ + ret = EOF; + errno = EBADF; + break; + } - curl_easy_setopt( file->handle.curl, CURLOPT_URL, url ); - curl_easy_setopt( file->handle.curl, CURLOPT_WRITEDATA, file ); - curl_easy_setopt( file->handle.curl, CURLOPT_VERBOSE, 0L ); - curl_easy_setopt( file->handle.curl, CURLOPT_WRITEFUNCTION, - write_callback ); + free(file->buffer);/* free any allocated buffer space */ + free(file); - if ( !multi_handle ) - multi_handle = curl_multi_init( ); - - curl_multi_add_handle( multi_handle, file->handle.curl ); - - /* lets start the fetch */ - curl_multi_perform( multi_handle, &file->still_running ); - - if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) { - /* if still_running is 0 now, we should return NULL */ - - /* make sure the easy handle is not in the multi handle anymore */ - curl_multi_remove_handle( multi_handle, file->handle.curl ); - - /* cleanup */ - curl_easy_cleanup( file->handle.curl ); - - free( file ); - - file = NULL; - } - } - return file; + return ret; } -int url_fclose( URL_FILE * file ) { - int ret = 0; /* default is good return */ +int url_feof(URL_FILE *file) +{ + int ret = 0; - switch ( file->type ) { - case CFTYPE_FILE: - ret = fclose( file->handle.file ); /* passthrough */ - break; + switch(file->type) { + case CFTYPE_FILE: + ret = feof(file->handle.file); + break; - case CFTYPE_CURL: - /* make sure the easy handle is not in the multi handle anymore */ - curl_multi_remove_handle( multi_handle, file->handle.curl ); + case CFTYPE_CURL: + if((file->buffer_pos == 0) && (!file->still_running)) + ret = 1; + break; - /* cleanup */ - curl_easy_cleanup( file->handle.curl ); - break; - - default: /* unknown or supported type - oh dear */ - ret = EOF; - errno = EBADF; - break; - } - - free( file->buffer ); /* free any allocated buffer space */ - free( file ); - - return ret; + default: /* unknown or supported type - oh dear */ + ret = -1; + errno = EBADF; + break; + } + return ret; } -int url_feof( URL_FILE * file ) { - int ret = 0; +size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file) +{ + size_t want; - switch ( file->type ) { - case CFTYPE_FILE: - ret = feof( file->handle.file ); - break; + switch(file->type) { + case CFTYPE_FILE: + want = fread(ptr, size, nmemb, file->handle.file); + break; - case CFTYPE_CURL: - if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) - ret = 1; - break; + case CFTYPE_CURL: + want = nmemb * size; - default: /* unknown or supported type - oh dear */ - ret = -1; - errno = EBADF; - break; - } - return ret; + fill_buffer(file, want); + + /* check if there's data in the buffer - if not fill_buffer() + * either errored or EOF */ + if(!file->buffer_pos) + return 0; + + /* ensure only available data is considered */ + if(file->buffer_pos < want) + want = file->buffer_pos; + + /* xfer data to caller */ + memcpy(ptr, file->buffer, want); + + use_buffer(file, want); + + want = want / size; /* number of items */ + break; + + default: /* unknown or supported type - oh dear */ + want = 0; + errno = EBADF; + break; + + } + return want; } -size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ) { - size_t want; +char *url_fgets(char *ptr, size_t size, URL_FILE *file) +{ + size_t want = size - 1;/* always need to leave room for zero termination */ + size_t loop; - switch ( file->type ) { - case CFTYPE_FILE: - want = fread( ptr, size, nmemb, file->handle.file ); - break; + switch(file->type) { + case CFTYPE_FILE: + ptr = fgets(ptr, (int)size, file->handle.file); + break; - case CFTYPE_CURL: - want = nmemb * size; + case CFTYPE_CURL: + fill_buffer(file, want); - fill_buffer( file, want ); + /* check if there's data in the buffer - if not fill either errored or + * EOF */ + if(!file->buffer_pos) + return NULL; - /* check if there's data in the buffer - if not fill_buffer() - * either errored or EOF */ - if ( !file->buffer_pos ) - return 0; + /* ensure only available data is considered */ + if(file->buffer_pos < want) + want = file->buffer_pos; - /* ensure only available data is considered */ - if ( file->buffer_pos < want ) - want = file->buffer_pos; - - /* xfer data to caller */ - memcpy( ptr, file->buffer, want ); - - use_buffer( file, want ); - - want = want / size; /* number of items */ - break; - - default: /* unknown or supported type - oh dear */ - want = 0; - errno = EBADF; - break; - - } - return want; -} - -char *url_fgets( char *ptr, size_t size, URL_FILE * file ) { - size_t want = size - 1; /* always need to leave room for zero termination */ - size_t loop; - - switch ( file->type ) { - case CFTYPE_FILE: - ptr = fgets( ptr, ( int ) size, file->handle.file ); - break; - - case CFTYPE_CURL: - fill_buffer( file, want ); - - /* check if there's data in the buffer - if not fill either errored or - * EOF */ - if ( !file->buffer_pos ) - return NULL; - - /* ensure only available data is considered */ - if ( file->buffer_pos < want ) - want = file->buffer_pos; - - /*buffer contains data */ - /* look for newline or eof */ - for ( loop = 0; loop < want; loop++ ) { - if ( file->buffer[loop] == '\n' ) { - want = loop + 1; /* include newline */ - break; - } - } - - /* xfer data to caller */ - memcpy( ptr, file->buffer, want ); - ptr[want] = 0; /* always null terminate */ - - use_buffer( file, want ); - - break; - - default: /* unknown or supported type - oh dear */ - ptr = NULL; - errno = EBADF; - break; + /*buffer contains data */ + /* look for newline or eof */ + for(loop = 0; loop < want; loop++) { + if(file->buffer[loop] == '\n') { + want = loop + 1;/* include newline */ + break; + } } - return ptr; /*success */ + /* xfer data to caller */ + memcpy(ptr, file->buffer, want); + ptr[want] = 0;/* always null terminate */ + + use_buffer(file, want); + + break; + + default: /* unknown or supported type - oh dear */ + ptr = NULL; + errno = EBADF; + break; + } + + return ptr;/*success */ } -void url_rewind( URL_FILE * file ) { - switch ( file->type ) { - case CFTYPE_FILE: - rewind( file->handle.file ); /* passthrough */ - break; +void url_rewind(URL_FILE *file) +{ + switch(file->type) { + case CFTYPE_FILE: + rewind(file->handle.file); /* passthrough */ + break; - case CFTYPE_CURL: - /* halt transaction */ - curl_multi_remove_handle( multi_handle, file->handle.curl ); + case CFTYPE_CURL: + /* halt transaction */ + curl_multi_remove_handle(multi_handle, file->handle.curl); - /* restart */ - curl_multi_add_handle( multi_handle, file->handle.curl ); + /* restart */ + curl_multi_add_handle(multi_handle, file->handle.curl); - /* ditch buffer - write will recreate - resets stream pos */ - free( file->buffer ); - file->buffer = NULL; - file->buffer_pos = 0; - file->buffer_len = 0; + /* ditch buffer - write will recreate - resets stream pos*/ + free(file->buffer); + file->buffer = NULL; + file->buffer_pos = 0; + file->buffer_len = 0; - break; + break; - default: /* unknown or supported type - oh dear */ - break; - } + default: /* unknown or supported type - oh dear */ + break; + } } -/** - * given this file handle f, return a new url_file handle wrapping it. - * - * @param f the file to be wrapped; - * @return the new handle, or null if no such handle could be allocated. - */ -URL_FILE *file_to_url_file( FILE * f ) { - URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) ); +#ifdef FOPEN_STANDALONE +#define FGETSFILE "fgets.test" +#define FREADFILE "fread.test" +#define REWINDFILE "rewind.test" - if ( result != NULL ) { - result->type = CFTYPE_FILE, result->handle.file = f; - } +/* Small main program to retrieve from a url using fgets and fread saving the + * output to two test files (note the fgets method will corrupt binary files if + * they contain 0 chars */ +int main(int argc, char *argv[]) +{ + URL_FILE *handle; + FILE *outf; - return result; -} - - -/** - * get one wide character from the buffer. - * - * @param file the stream to read from; - * @return the next wide character on the stream, or zero if no more. - */ -wint_t url_fgetwc( URL_FILE * input ) { - wint_t result = -1; - - debug_printf( DEBUG_IO, L"url_fgetwc: ungotten = %d\n", ungotten ); - - if ( ungotten != 0 ) { - /* TODO: not thread safe */ - result = ungotten; - ungotten = 0; - } else { - switch ( input->type ) { - case CFTYPE_FILE: - fwide( input->handle.file, 1 ); /* wide characters */ - result = fgetwc( input->handle.file ); /* passthrough */ - break; - - case CFTYPE_CURL:{ - debug_print( L"url_fgetwc: stream is URL\n", DEBUG_IO ); - - char *cbuff = - calloc( sizeof( wchar_t ) + 1, sizeof( char ) ); - wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) ); - - size_t count = 0; - - debug_print( L"url_fgetwc: about to call url_fgets\n", DEBUG_IO ); - url_fgets( cbuff, 1, input ); - debug_print( L"url_fgetwc: back from url_fgets\n", DEBUG_IO ); - int c = ( int ) cbuff[0]; - debug_printf( DEBUG_IO, L"url_fgetwc: (first) character = %d (%c)\n", c, c & 0xf7 ); - /* The value of each individual byte indicates its UTF-8 function, as follows: - * - * 00 to 7F hex (0 to 127): first and only byte of a sequence. - * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence. - * C2 to DF hex (194 to 223): first byte of a two-byte sequence. - * E0 to EF hex (224 to 239): first byte of a three-byte sequence. - * F0 to FF hex (240 to 255): first byte of a four-byte sequence. - */ - if ( c <= 0x07 ) { - count = 1; - } else if ( c >= '0xc2' && c <= '0xdf' ) { - count = 2; - } else if ( c >= '0xe0' && c <= '0xef' ) { - count = 3; - } else if ( c >= '0xf0' && c <= '0xff' ) { - count = 4; - } - - if ( count > 1 ) { - url_fgets( cbuff, --count, input ); - } - mbstowcs( wbuff, cbuff, 1 ); //(char *)(&input->buffer[input->buffer_pos]), 1 ); - result = wbuff[0]; - use_one_wide( input ); - - free( wbuff ); - free( cbuff ); - } - break; - case CFTYPE_NONE: - break; - } - } - - debug_printf( DEBUG_IO, L"url_fgetwc returning %d (%C)\n", result, - result ); - return result; -} - -wint_t url_ungetwc( wint_t wc, URL_FILE * input ) { - wint_t result = -1; - - switch ( input->type ) { - case CFTYPE_FILE: - fwide( input->handle.file, 1 ); /* wide characters */ - result = ungetwc( wc, input->handle.file ); /* passthrough */ - break; - - case CFTYPE_CURL:{ - ungotten = wc; -// wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) ); -// char *cbuff = calloc( 5, sizeof( char ) ); -// -// wbuff[0] = wc; -// result = wcstombs( cbuff, wbuff, 1 ); -// -// input->buffer_pos -= strlen( cbuff ); -// -// free( cbuff ); -// free( wbuff ); -// -// result = result > 0 ? wc : result; - break; - case CFTYPE_NONE: - break; - } - } - - return result; + size_t nread; + char buffer[256]; + const char *url; + + CURL *curl; + CURLcode res; + + curl_global_init(CURL_GLOBAL_DEFAULT); + + curl = curl_easy_init(); + + + if(argc < 2) + url = "http://192.168.7.3/testfile";/* default to testurl */ + else + url = argv[1];/* use passed url */ + + /* copy from url line by line with fgets */ + outf = fopen(FGETSFILE, "wb+"); + if(!outf) { + perror("couldn't open fgets output file\n"); + return 1; + } + + handle = url_fopen(url, "r"); + if(!handle) { + printf("couldn't url_fopen() %s\n", url); + fclose(outf); + return 2; + } + + while(!url_feof(handle)) { + url_fgets(buffer, sizeof(buffer), handle); + fwrite(buffer, 1, strlen(buffer), outf); + } + + url_fclose(handle); + + fclose(outf); + + + /* Copy from url with fread */ + outf = fopen(FREADFILE, "wb+"); + if(!outf) { + perror("couldn't open fread output file\n"); + return 1; + } + + handle = url_fopen("testfile", "r"); + if(!handle) { + printf("couldn't url_fopen() testfile\n"); + fclose(outf); + return 2; + } + + do { + nread = url_fread(buffer, 1, sizeof(buffer), handle); + fwrite(buffer, 1, nread, outf); + } while(nread); + + url_fclose(handle); + + fclose(outf); + + + /* Test rewind */ + outf = fopen(REWINDFILE, "wb+"); + if(!outf) { + perror("couldn't open fread output file\n"); + return 1; + } + + handle = url_fopen("testfile", "r"); + if(!handle) { + printf("couldn't url_fopen() testfile\n"); + fclose(outf); + return 2; + } + + nread = url_fread(buffer, 1, sizeof(buffer), handle); + fwrite(buffer, 1, nread, outf); + url_rewind(handle); + + buffer[0]='\n'; + fwrite(buffer, 1, 1, outf); + + nread = url_fread(buffer, 1, sizeof(buffer), handle); + fwrite(buffer, 1, nread, outf); + + url_fclose(handle); + + fclose(outf); + + return 0;/* all done */ } +#endif diff --git a/src/io/fopen.h b/src/io/fopen.h index f952a65..5f87bd2 100644 --- a/src/io/fopen.h +++ b/src/io/fopen.h @@ -80,8 +80,4 @@ size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ); char *url_fgets( char *ptr, size_t size, URL_FILE * file ); void url_rewind( URL_FILE * file ); -wint_t url_fgetwc( URL_FILE * file ); -wint_t url_ungetwc( wint_t wc, URL_FILE * input ); -URL_FILE *file_to_url_file( FILE * f ); - #endif diff --git a/src/io/io.c b/src/io/io.c index 4577a11..d7c2024 100644 --- a/src/io/io.c +++ b/src/io/io.c @@ -15,6 +15,12 @@ #include "fopen.h" #include "lispops.h" +/** + * Allow a one-character unget facility. This may not be enough - we may need + * to allocate a buffer. + */ +wint_t ungotten = 0; + /** * Convert this lisp string-like-thing (also works for symbols, and, later * keywords) into a UTF-8 string. NOTE that the returned value has been @@ -56,6 +62,129 @@ char *lisp_string_to_c_string( struct cons_pointer s ) { return result; } + +/** + * given this file handle f, return a new url_file handle wrapping it. + * + * @param f the file to be wrapped; + * @return the new handle, or null if no such handle could be allocated. + */ +URL_FILE *file_to_url_file( FILE * f ) { + URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) ); + + if ( result != NULL ) { + result->type = CFTYPE_FILE, result->handle.file = f; + } + + return result; +} + + +/** + * get one wide character from the buffer. + * + * @param file the stream to read from; + * @return the next wide character on the stream, or zero if no more. + */ +wint_t url_fgetwc( URL_FILE * input ) { + wint_t result = -1; + + if ( ungotten != 0 ) { + /* TODO: not thread safe */ + result = ungotten; + ungotten = 0; + } else { + switch ( input->type ) { + case CFTYPE_FILE: + fwide( input->handle.file, 1 ); /* wide characters */ + result = fgetwc( input->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL:{ + char *cbuff = + calloc( sizeof( wchar_t ) + 2, sizeof( char ) ); + wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) ); + + size_t count = 0; + + debug_print( L"url_fgetwc: about to call url_fgets\n", DEBUG_IO ); + url_fgets( cbuff, 2, input ); + debug_print( L"url_fgetwc: back from url_fgets\n", DEBUG_IO ); + int c = ( int ) cbuff[0]; + debug_printf( DEBUG_IO, + L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n", + cbuff, c, c & 0xf7 ); + /* The value of each individual byte indicates its UTF-8 function, as follows: + * + * 00 to 7F hex (0 to 127): first and only byte of a sequence. + * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence. + * C2 to DF hex (194 to 223): first byte of a two-byte sequence. + * E0 to EF hex (224 to 239): first byte of a three-byte sequence. + * F0 to FF hex (240 to 255): first byte of a four-byte sequence. + */ + if ( c <= 0x07 ) { + count = 1; + } else if ( c >= '0xc2' && c <= '0xdf' ) { + count = 2; + } else if ( c >= '0xe0' && c <= '0xef' ) { + count = 3; + } else if ( c >= '0xf0' && c <= '0xff' ) { + count = 4; + } + + if ( count > 1 ) { + url_fgets( (char *)&cbuff[1], count, input ); + } + mbstowcs( wbuff, cbuff, 1 ); //(char *)(&input->buffer[input->buffer_pos]), 1 ); + result = wbuff[0]; + + free( wbuff ); + free( cbuff ); + } + break; + case CFTYPE_NONE: + break; + } + } + + debug_printf( DEBUG_IO, L"url_fgetwc returning %d (%C)\n", result, + result ); + return result; +} + +wint_t url_ungetwc( wint_t wc, URL_FILE * input ) { + wint_t result = -1; + + switch ( input->type ) { + case CFTYPE_FILE: + fwide( input->handle.file, 1 ); /* wide characters */ + result = ungetwc( wc, input->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL:{ + ungotten = wc; +// wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) ); +// char *cbuff = calloc( 5, sizeof( char ) ); +// +// wbuff[0] = wc; +// result = wcstombs( cbuff, wbuff, 1 ); +// +// input->buffer_pos -= strlen( cbuff ); +// +// free( cbuff ); +// free( wbuff ); +// +// result = result > 0 ? wc : result; + break; + case CFTYPE_NONE: + break; + } + } + + return result; +} + + /** * Function, sort-of: close the file indicated by my first arg, and return * nil. If the first arg is not a stream, does nothing. All other args are @@ -172,7 +301,7 @@ lisp_slurp( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer cursor = make_string( url_fgetwc( stream ), NIL ); result = cursor; - for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ); + for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ) && c != 0; c = url_fgetwc( stream ) ) { debug_print( L"slurp: cursor is: ", DEBUG_IO ); debug_dump_object( cursor, DEBUG_IO ); diff --git a/src/io/io.h b/src/io/io.h index 06dcaed..d46f8b1 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -11,6 +11,10 @@ #ifndef __psse_io_h #define __psse_io_h +URL_FILE *file_to_url_file( FILE * f ); +wint_t url_fgetwc( URL_FILE * input ); +wint_t url_ungetwc( wint_t wc, URL_FILE * input ); + struct cons_pointer lisp_close( struct stack_frame *frame, struct cons_pointer frame_pointer, struct cons_pointer env ); diff --git a/src/memory/conspage.c b/src/memory/conspage.c index 7a1a0d8..54d14e9 100644 --- a/src/memory/conspage.c +++ b/src/memory/conspage.c @@ -166,6 +166,10 @@ void free_cell( struct cons_pointer pointer ) { dec_ref( cell->payload.ratio.dividend ); dec_ref( cell->payload.ratio.divisor ); break; + case READTV: + case WRITETV: + url_fclose( cell->payload.stream.stream); + break; case SPECIALTV: dec_ref( cell->payload.special.source ); break; diff --git a/src/ops/lispops.c b/src/ops/lispops.c index 4bfe6f0..1220835 100644 --- a/src/ops/lispops.c +++ b/src/ops/lispops.c @@ -29,9 +29,9 @@ #include "debug.h" #include "dump.h" #include "equal.h" -#include "fopen.h" #include "integer.h" #include "intern.h" +#include "io.h" #include "lispops.h" #include "print.h" #include "read.h" diff --git a/src/ops/read.c b/src/ops/read.c index 989aa67..69899c0 100644 --- a/src/ops/read.c +++ b/src/ops/read.c @@ -22,6 +22,7 @@ #include "dump.h" #include "integer.h" #include "intern.h" +#include "io.h" #include "lispops.h" #include "peano.h" #include "print.h"