It works!

2019-01-28 18:46:24 +00:00 · 2019-01-28 18:46:24 +00:00 · a640c9dff9
commit a640c9dff9
parent b15c0e8f89
10 changed files with 568 additions and 438 deletions
--- a/.gitignore
+++ b/.gitignore
@ -34,3 +34,5 @@ utils_src/readprintwc/out
 *.dump

 *.bak
+
+src/io/fopen
--- a/lisp/slurp.lisp
+++ b/lisp/slurp.lisp
@ -0,0 +1 @@
+(slurp (set! f (open "http://www.journeyman.cc/")))
--- a/src/debug.c
+++ b/src/debug.c
@ -19,9 +19,9 @@
 #include <wctype.h>

 #include "consspaceobject.h"
-#include "fopen.h"
 #include "debug.h"
 #include "dump.h"
+#include "io.h"
 #include "print.h"

 /**
--- a/src/io/fopen.c
+++ b/src/io/fopen.c
@ -37,27 +37,55 @@
 * This example requires libcurl 7.9.7 or later.
 */

-#include <errno.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #ifndef WIN32
 #include <sys/time.h>
 #endif
+#include <stdlib.h>
+#include <errno.h>

 #include <curl/curl.h>

-#include "debug.h"
-#include "fopen.h"
+enum fcurl_type_e {
+  CFTYPE_NONE = 0,
+  CFTYPE_FILE = 1,
+  CFTYPE_CURL = 2
+};
+
+struct fcurl_data
+{
+  enum fcurl_type_e type;     /* type of handle */
+  union {
+    CURL *curl;
+    FILE *file;
+  } handle;                   /* handle */
+
+  char *buffer;               /* buffer to store cached data*/
+  size_t buffer_len;          /* currently allocated buffers length */
+  size_t buffer_pos;          /* end of data in buffer*/
+  int still_running;          /* Is background url fetch still in progress */
+};
+
+typedef struct fcurl_data URL_FILE;
+
+/* exported functions */
+URL_FILE *url_fopen(const char *url, const char *operation);
+int url_fclose(URL_FILE *file);
+int url_feof(URL_FILE *file);
+size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file);
+char *url_fgets(char *ptr, size_t size, URL_FILE *file);
+void url_rewind(URL_FILE *file);

 /* we use a global one for convenience */
 static CURLM *multi_handle;

-wint_t ungotten = 0;
-
 /* curl calls this routine to get more data */
 static size_t write_callback(char *buffer,
-                              size_t size, size_t nitems, void *userp ) {
+                             size_t size,
+                             size_t nitems,
+                             void *userp)
+{
  char *newbuff;
  size_t rembuff;

@ -72,7 +100,8 @@ static size_t write_callback( char *buffer,
    if(newbuff == NULL) {
      fprintf(stderr, "callback buffer grow failed\n");
      size = rembuff;
-        } else {
+    }
+    else {
      /* realloc succeeded increase buffer size*/
      url->buffer_len += size - rembuff;
      url->buffer = newbuff;
@ -86,7 +115,8 @@ static size_t write_callback( char *buffer,
 }

 /* use to attempt to fill the read buffer up to requested number of bytes */
-static int fill_buffer( URL_FILE * file, size_t want ) {
+static int fill_buffer(URL_FILE *file, size_t want)
+{
  fd_set fdread;
  fd_set fdwrite;
  fd_set fdexcep;
@ -123,8 +153,7 @@ static int fill_buffer( URL_FILE * file, size_t want ) {
    }

    /* get file descriptors from the transfers */
-        mc = curl_multi_fdset( multi_handle, &fdread, &fdwrite, &fdexcep,
-                               &maxfd );
+    mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);

    if(mc != CURLM_OK) {
      fprintf(stderr, "curl_multi_fdset() failed, code %d.\n", mc);
@ -146,7 +175,8 @@ static int fill_buffer( URL_FILE * file, size_t want ) {
      struct timeval wait = { 0, 100 * 1000 }; /* 100ms */
      rc = select(0, NULL, NULL, NULL, &wait);
 #endif
-        } else {
+    }
+    else {
      /* Note that on some platforms 'timeout' may be modified by select().
         If you need access to the original value save a copy beforehand. */
      rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
@ -164,12 +194,12 @@ static int fill_buffer( URL_FILE * file, size_t want ) {
      break;
    }
  } while(file->still_running && (file->buffer_pos < want));
-
  return 1;
 }

 /* use to remove want bytes from the front of a files buffer */
-static int use_buffer( URL_FILE * file, size_t want ) {
+static int use_buffer(URL_FILE *file, size_t want)
+{
  /* sort out buffer */
  if((file->buffer_pos - want) <= 0) {
    /* ditch buffer - write will recreate */
@ -177,47 +207,20 @@ static int use_buffer( URL_FILE * file, size_t want ) {
    file->buffer = NULL;
    file->buffer_pos = 0;
    file->buffer_len = 0;
-    } else {
+  }
+  else {
    /* move rest down make it available for later */
    memmove(file->buffer,
-                 &file->buffer[want], ( file->buffer_pos - want ) );
+            &file->buffer[want],
+            (file->buffer_pos - want));

    file->buffer_pos -= want;
  }
  return 0;
 }

-/**
- * consume one wide character on the buffer of this file.
- *
- * @param file the url or file from which the character is consumed.
- */
-static int use_one_wide( URL_FILE * file ) {
-    int c = ( int ) file->buffer[file->buffer_pos];
-    size_t count = 0;
-
-    /* The value of each individual byte indicates its UTF-8 function, as follows:
-     *
-     * 00 to 7F hex (0 to 127): first and only byte of a sequence.
-     * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence.
-     * C2 to DF hex (194 to 223): first byte of a two-byte sequence.
-     * E0 to EF hex (224 to 239): first byte of a three-byte sequence.
-     * F0 to FF hex (240 to 255): first byte of a four-byte sequence.
-     */
-    if ( c <= '0x07' ) {
-        count = 1;
-    } else if ( c >= '0xc2' && c <= '0xdf' ) {
-        count = 2;
-    } else if ( c >= '0xe0' && c <= '0xef' ) {
-        count = 3;
-    } else if ( c >= '0xf0' && c <= '0xff' ) {
-        count = 4;
-    }
-
-    return use_buffer( file, c );
-}
-
-URL_FILE *url_fopen( const char *url, const char *operation ) {
+URL_FILE *url_fopen(const char *url, const char *operation)
+{
  /* this code could check for URLs or types in the 'url' and
     basically use the real fopen() for standard files */

@ -239,8 +242,7 @@ URL_FILE *url_fopen( const char *url, const char *operation ) {
    curl_easy_setopt(file->handle.curl, CURLOPT_URL, url);
    curl_easy_setopt(file->handle.curl, CURLOPT_WRITEDATA, file);
    curl_easy_setopt(file->handle.curl, CURLOPT_VERBOSE, 0L);
-        curl_easy_setopt( file->handle.curl, CURLOPT_WRITEFUNCTION,
-                          write_callback );
+    curl_easy_setopt(file->handle.curl, CURLOPT_WRITEFUNCTION, write_callback);

    if(!multi_handle)
      multi_handle = curl_multi_init();
@ -267,7 +269,8 @@ URL_FILE *url_fopen( const char *url, const char *operation ) {
  return file;
 }

-int url_fclose( URL_FILE * file ) {
+int url_fclose(URL_FILE *file)
+{
  int ret = 0;/* default is good return */

  switch(file->type) {
@ -295,7 +298,8 @@ int url_fclose( URL_FILE * file ) {
  return ret;
 }

-int url_feof( URL_FILE * file ) {
+int url_feof(URL_FILE *file)
+{
  int ret = 0;

  switch(file->type) {
@ -316,7 +320,8 @@ int url_feof( URL_FILE * file ) {
  return ret;
 }

-size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ) {
+size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file)
+{
  size_t want;

  switch(file->type) {
@ -355,7 +360,8 @@ size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ) {
  return want;
 }

-char *url_fgets( char *ptr, size_t size, URL_FILE * file ) {
+char *url_fgets(char *ptr, size_t size, URL_FILE *file)
+{
  size_t want = size - 1;/* always need to leave room for zero termination */
  size_t loop;

@ -402,7 +408,8 @@ char *url_fgets( char *ptr, size_t size, URL_FILE * file ) {
  return ptr;/*success */
 }

-void url_rewind( URL_FILE * file ) {
+void url_rewind(URL_FILE *file)
+{
  switch(file->type) {
  case CFTYPE_FILE:
    rewind(file->handle.file); /* passthrough */
@ -428,126 +435,112 @@ void url_rewind( URL_FILE * file ) {
  }
 }

-/**
- * given this file handle f, return a new url_file handle wrapping it.
- *
- * @param f the file to be wrapped;
- * @return the new handle, or null if no such handle could be allocated.
- */
-URL_FILE *file_to_url_file( FILE * f ) {
-    URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) );
+#ifdef FOPEN_STANDALONE
+#define FGETSFILE "fgets.test"
+#define FREADFILE "fread.test"
+#define REWINDFILE "rewind.test"

-    if ( result != NULL ) {
-        result->type = CFTYPE_FILE, result->handle.file = f;
+/* Small main program to retrieve from a url using fgets and fread saving the
+ * output to two test files (note the fgets method will corrupt binary files if
+ * they contain 0 chars */
+int main(int argc, char *argv[])
+{
+  URL_FILE *handle;
+  FILE *outf;
+
+  size_t nread;
+  char buffer[256];
+  const char *url;
+
+  CURL *curl;
+  CURLcode res;
+
+  curl_global_init(CURL_GLOBAL_DEFAULT);
+
+  curl = curl_easy_init();
+
+
+  if(argc < 2)
+    url = "http://192.168.7.3/testfile";/* default to testurl */
+  else
+    url = argv[1];/* use passed url */
+
+  /* copy from url line by line with fgets */
+  outf = fopen(FGETSFILE, "wb+");
+  if(!outf) {
+    perror("couldn't open fgets output file\n");
+    return 1;
  }

-    return result;
+  handle = url_fopen(url, "r");
+  if(!handle) {
+    printf("couldn't url_fopen() %s\n", url);
+    fclose(outf);
+    return 2;
  }

-
-/**
- * get one wide character from the buffer.
- *
- * @param file the stream to read from;
- * @return the next wide character on the stream, or zero if no more.
- */
-wint_t url_fgetwc( URL_FILE * input ) {
-    wint_t result = -1;
-
-    debug_printf( DEBUG_IO, L"url_fgetwc: ungotten = %d\n", ungotten );
-
-    if ( ungotten != 0 ) {
-        /* TODO: not thread safe */
-        result = ungotten;
-        ungotten = 0;
-    } else {
-        switch ( input->type ) {
-            case CFTYPE_FILE:
-                fwide( input->handle.file, 1 ); /* wide characters */
-                result = fgetwc( input->handle.file );  /* passthrough */
-                break;
-
-            case CFTYPE_CURL:{
-                    debug_print( L"url_fgetwc: stream is URL\n", DEBUG_IO );
-
-                    char *cbuff =
-                        calloc( sizeof( wchar_t ) + 1, sizeof( char ) );
-                    wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) );
-
-                    size_t count = 0;
-
-                    debug_print( L"url_fgetwc: about to call url_fgets\n", DEBUG_IO );
-                    url_fgets( cbuff, 1, input );
-                    debug_print( L"url_fgetwc: back from url_fgets\n", DEBUG_IO );
-                    int c = ( int ) cbuff[0];
-    debug_printf( DEBUG_IO, L"url_fgetwc: (first) character = %d (%c)\n", c, c & 0xf7 );
-                    /* The value of each individual byte indicates its UTF-8 function, as follows:
-                     *
-                     * 00 to 7F hex (0 to 127): first and only byte of a sequence.
-                     * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence.
-                     * C2 to DF hex (194 to 223): first byte of a two-byte sequence.
-                     * E0 to EF hex (224 to 239): first byte of a three-byte sequence.
-                     * F0 to FF hex (240 to 255): first byte of a four-byte sequence.
-                     */
-                    if ( c <= 0x07 ) {
-                        count = 1;
-                    } else if ( c >= '0xc2' && c <= '0xdf' ) {
-                        count = 2;
-                    } else if ( c >= '0xe0' && c <= '0xef' ) {
-                        count = 3;
-                    } else if ( c >= '0xf0' && c <= '0xff' ) {
-                        count = 4;
+  while(!url_feof(handle)) {
+    url_fgets(buffer, sizeof(buffer), handle);
+    fwrite(buffer, 1, strlen(buffer), outf);
  }

-                    if ( count > 1 ) {
-                        url_fgets( cbuff, --count, input );
-                    }
-                    mbstowcs( wbuff, cbuff, 1 );  //(char *)(&input->buffer[input->buffer_pos]), 1 );
-                    result = wbuff[0];
-                    use_one_wide( input );
+  url_fclose(handle);

-                    free( wbuff );
-                    free( cbuff );
-                }
-                break;
-            case CFTYPE_NONE:
-                break;
-        }
+  fclose(outf);
+
+
+  /* Copy from url with fread */
+  outf = fopen(FREADFILE, "wb+");
+  if(!outf) {
+    perror("couldn't open fread output file\n");
+    return 1;
  }

-    debug_printf( DEBUG_IO, L"url_fgetwc returning %d (%C)\n", result,
-                  result );
-    return result;
+  handle = url_fopen("testfile", "r");
+  if(!handle) {
+    printf("couldn't url_fopen() testfile\n");
+    fclose(outf);
+    return 2;
  }

-wint_t url_ungetwc( wint_t wc, URL_FILE * input ) {
-    wint_t result = -1;
+  do {
+    nread = url_fread(buffer, 1, sizeof(buffer), handle);
+    fwrite(buffer, 1, nread, outf);
+  } while(nread);

-    switch ( input->type ) {
-        case CFTYPE_FILE:
-            fwide( input->handle.file, 1 ); /* wide characters */
-            result = ungetwc( wc, input->handle.file ); /* passthrough */
-            break;
+  url_fclose(handle);

-        case CFTYPE_CURL:{
-                ungotten = wc;
-//                wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) );
-//                char *cbuff = calloc( 5, sizeof( char ) );
-//
-//                wbuff[0] = wc;
-//                result = wcstombs( cbuff, wbuff, 1 );
-//
-//                input->buffer_pos -= strlen( cbuff );
-//
-//                free( cbuff );
-//                free( wbuff );
-//
-//                result = result > 0 ? wc : result;
-                break;
-        case CFTYPE_NONE:
-                break;
-            }
+  fclose(outf);
+
+
+  /* Test rewind */
+  outf = fopen(REWINDFILE, "wb+");
+  if(!outf) {
+    perror("couldn't open fread output file\n");
+    return 1;
  }

-    return result;
+  handle = url_fopen("testfile", "r");
+  if(!handle) {
+    printf("couldn't url_fopen() testfile\n");
+    fclose(outf);
+    return 2;
  }
+
+  nread = url_fread(buffer, 1, sizeof(buffer), handle);
+  fwrite(buffer, 1, nread, outf);
+  url_rewind(handle);
+
+  buffer[0]='\n';
+  fwrite(buffer, 1, 1, outf);
+
+  nread = url_fread(buffer, 1, sizeof(buffer), handle);
+  fwrite(buffer, 1, nread, outf);
+
+  url_fclose(handle);
+
+  fclose(outf);
+
+  return 0;/* all done */
+}
+#endif
--- a/src/io/fopen.h
+++ b/src/io/fopen.h
@ -80,8 +80,4 @@ size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file );
 char *url_fgets( char *ptr, size_t size, URL_FILE * file );
 void url_rewind( URL_FILE * file );

-wint_t url_fgetwc( URL_FILE * file );
-wint_t url_ungetwc( wint_t wc, URL_FILE * input );
-URL_FILE *file_to_url_file( FILE * f );
-
 #endif
--- a/src/io/io.c
+++ b/src/io/io.c
@ -15,6 +15,12 @@
 #include "fopen.h"
 #include "lispops.h"

+/**
+ * Allow a one-character unget facility. This may not be enough - we may need
+ * to allocate a buffer.
+ */
+wint_t ungotten = 0;
+
 /**
 * Convert this lisp string-like-thing (also works for symbols, and, later
 * keywords) into a UTF-8 string. NOTE that the returned value has been
@ -56,6 +62,129 @@ char *lisp_string_to_c_string( struct cons_pointer s ) {
    return result;
 }

+
+/**
+ * given this file handle f, return a new url_file handle wrapping it.
+ *
+ * @param f the file to be wrapped;
+ * @return the new handle, or null if no such handle could be allocated.
+ */
+URL_FILE *file_to_url_file( FILE * f ) {
+    URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) );
+
+    if ( result != NULL ) {
+        result->type = CFTYPE_FILE, result->handle.file = f;
+    }
+
+    return result;
+}
+
+
+/**
+ * get one wide character from the buffer.
+ *
+ * @param file the stream to read from;
+ * @return the next wide character on the stream, or zero if no more.
+ */
+wint_t url_fgetwc( URL_FILE * input ) {
+    wint_t result = -1;
+
+    if ( ungotten != 0 ) {
+        /* TODO: not thread safe */
+        result = ungotten;
+        ungotten = 0;
+    } else {
+        switch ( input->type ) {
+            case CFTYPE_FILE:
+                fwide( input->handle.file, 1 ); /* wide characters */
+                result = fgetwc( input->handle.file );  /* passthrough */
+                break;
+
+            case CFTYPE_CURL:{
+                    char *cbuff =
+                        calloc( sizeof( wchar_t ) + 2, sizeof( char ) );
+                    wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) );
+
+                    size_t count = 0;
+
+                    debug_print( L"url_fgetwc: about to call url_fgets\n", DEBUG_IO );
+                    url_fgets( cbuff, 2, input );
+                    debug_print( L"url_fgetwc: back from url_fgets\n", DEBUG_IO );
+                    int c = ( int ) cbuff[0];
+                    debug_printf( DEBUG_IO,
+                                 L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n",
+                                 cbuff, c, c & 0xf7 );
+                    /* The value of each individual byte indicates its UTF-8 function, as follows:
+                     *
+                     * 00 to 7F hex (0 to 127): first and only byte of a sequence.
+                     * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence.
+                     * C2 to DF hex (194 to 223): first byte of a two-byte sequence.
+                     * E0 to EF hex (224 to 239): first byte of a three-byte sequence.
+                     * F0 to FF hex (240 to 255): first byte of a four-byte sequence.
+                     */
+                    if ( c <= 0x07 ) {
+                        count = 1;
+                    } else if ( c >= '0xc2' && c <= '0xdf' ) {
+                        count = 2;
+                    } else if ( c >= '0xe0' && c <= '0xef' ) {
+                        count = 3;
+                    } else if ( c >= '0xf0' && c <= '0xff' ) {
+                        count = 4;
+                    }
+
+                    if ( count > 1 ) {
+                        url_fgets( (char *)&cbuff[1], count, input );
+                    }
+                    mbstowcs( wbuff, cbuff, 1 );  //(char *)(&input->buffer[input->buffer_pos]), 1 );
+                    result = wbuff[0];
+
+                    free( wbuff );
+                    free( cbuff );
+                }
+                break;
+            case CFTYPE_NONE:
+                break;
+        }
+    }
+
+    debug_printf( DEBUG_IO, L"url_fgetwc returning %d (%C)\n", result,
+                  result );
+    return result;
+}
+
+wint_t url_ungetwc( wint_t wc, URL_FILE * input ) {
+    wint_t result = -1;
+
+    switch ( input->type ) {
+        case CFTYPE_FILE:
+            fwide( input->handle.file, 1 ); /* wide characters */
+            result = ungetwc( wc, input->handle.file ); /* passthrough */
+            break;
+
+        case CFTYPE_CURL:{
+                ungotten = wc;
+//                wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) );
+//                char *cbuff = calloc( 5, sizeof( char ) );
+//
+//                wbuff[0] = wc;
+//                result = wcstombs( cbuff, wbuff, 1 );
+//
+//                input->buffer_pos -= strlen( cbuff );
+//
+//                free( cbuff );
+//                free( wbuff );
+//
+//                result = result > 0 ? wc : result;
+                break;
+        case CFTYPE_NONE:
+                break;
+            }
+    }
+
+    return result;
+}
+
+
 /**
 * Function, sort-of: close the file indicated by my first arg, and return
 * nil. If the first arg is not a stream, does nothing. All other args are
@ -172,7 +301,7 @@ lisp_slurp( struct stack_frame *frame, struct cons_pointer frame_pointer,
        struct cons_pointer cursor = make_string( url_fgetwc( stream ), NIL );
        result = cursor;

-        for ( wint_t c = url_fgetwc( stream ); !url_feof( stream );
+        for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ) && c != 0;
              c = url_fgetwc( stream ) ) {
            debug_print( L"slurp: cursor is: ", DEBUG_IO );
            debug_dump_object( cursor, DEBUG_IO );
--- a/src/io/io.h
+++ b/src/io/io.h
@ -11,6 +11,10 @@
 #ifndef __psse_io_h
 #define __psse_io_h

+URL_FILE *file_to_url_file( FILE * f );
+wint_t url_fgetwc( URL_FILE * input );
+wint_t url_ungetwc( wint_t wc, URL_FILE * input );
+
 struct cons_pointer
 lisp_close( struct stack_frame *frame, struct cons_pointer frame_pointer,
            struct cons_pointer env );
--- a/src/memory/conspage.c
+++ b/src/memory/conspage.c
@ -166,6 +166,10 @@ void free_cell( struct cons_pointer pointer ) {
                    dec_ref( cell->payload.ratio.dividend );
                    dec_ref( cell->payload.ratio.divisor );
                    break;
+                case READTV:
+                case WRITETV:
+                    url_fclose( cell->payload.stream.stream);
+                    break;
                case SPECIALTV:
                    dec_ref( cell->payload.special.source );
                    break;
--- a/src/ops/lispops.c
+++ b/src/ops/lispops.c
@ -29,9 +29,9 @@
 #include "debug.h"
 #include "dump.h"
 #include "equal.h"
-#include "fopen.h"
 #include "integer.h"
 #include "intern.h"
+#include "io.h"
 #include "lispops.h"
 #include "print.h"
 #include "read.h"
--- a/src/ops/read.c
+++ b/src/ops/read.c
@ -22,6 +22,7 @@
 #include "dump.h"
 #include "integer.h"
 #include "intern.h"
+#include "io.h"
 #include "lispops.h"
 #include "peano.h"
 #include "print.h"
				`@ -0,0 +1 @@`
				`(slurp (set! f (open "http://www.journeyman.cc/")))`