/* * io.c * * Communication between PSSE and the outside world, via libcurl. NOTE * that this file destructively changes metadata on URL connections, * because the metadata is not available until the stream has been read * from. It would be better to find a workaround! * * (c) 2019 Simon Brooke * Licensed under GPL version 2.0, or, at your option, any later version. */ #include #include #include #include #include #include #include #include #include #include /* * wide characters */ #include #include #include // #include "arith/integer.h" #include "debug.h" #include "io/fopen.h" #include "io/io.h" #include "memory/node.h" #include "memory/pointer.h" #include "memory/pso.h" #include "memory/pso2.h" #include "memory/pso4.h" #include "memory/tags.h" // #include "ops/intern.h" // #include "ops/lispops.h" #include "ops/assoc.h" #include "ops/bind.h" #include "ops/stack_ops.h" #include "ops/string_ops.h" #include "ops/truth.h" #include "payloads/character.h" #include "payloads/cons.h" #include "payloads/exception.h" #include "payloads/integer.h" #include "payloads/read_stream.h" #include "payloads/stack.h" #include "payloads/write_stream.h" #include "utils.h" /** * The sharing hub for all connections. TODO: Ultimately this probably doesn't * work for a multi-user environment and we will need one sharing hub for each * user, or else we will need to not share at least cookies and ssl sessions. */ CURLSH *io_share; /** * @brief bound to the Lisp symbol representing C_IO_IN in initialisation. */ struct pso_pointer lisp_io_in; /** * @brief bound to the Lisp symbol representing C_IO_OUT in initialisation. */ struct pso_pointer lisp_io_out; /** * Allow a one-character unget facility. This may not be enough - we may need * to allocate a buffer. */ wint_t ungotten = 0; /** * given this file handle f, return a new url_file handle wrapping it. * * @param f the file to be wrapped; * @return the new handle, or null if no such handle could be allocated. */ URL_FILE *file_to_url_file(FILE *f) { URL_FILE *result = (URL_FILE *)malloc(sizeof(URL_FILE)); if (result != NULL) { result->type = CFTYPE_FILE, result->handle.file = f; } return result; } /** * Initialise the I/O subsystem. * * @return 0 on success; any other value means failure. */ int initialise_io() { int result = curl_global_init(CURL_GLOBAL_SSL); io_share = curl_share_init(); if (result == 0) { curl_share_setopt(io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT); curl_share_setopt(io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE); curl_share_setopt(io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); curl_share_setopt(io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_SSL_SESSION); curl_share_setopt(io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL); } return result; } struct pso_pointer initialise_default_streams(struct pso_pointer env) { lisp_io_in = c_string_to_lisp_symbol(C_IO_IN); lisp_io_out = c_string_to_lisp_symbol(C_IO_OUT); debug_print( L"In initialise_default_streams; environment is: ", DEBUG_IO, 0); debug_print_object(env, DEBUG_IO, 0); env = c_bind( lisp_io_in, lock_object(make_read_stream( file_to_url_file(stdin), c_cons(c_cons(c_string_to_lisp_keyword(L"url"), c_string_to_lisp_string(L"::system:standard-input")), nil))), env); debug_print_object(env, DEBUG_IO, 0); if (!nilp(env) && !exceptionp(env)) { env = c_bind(lisp_io_out, lock_object(make_write_stream( file_to_url_file(stdout), c_cons(c_cons(c_string_to_lisp_keyword(L"url"), c_string_to_lisp_string( L"::system:standard-output")), nil))), env); } debug_print( L"Leaving initialise_default_streams; environment is: ", DEBUG_IO, 0); debug_print_object(env, DEBUG_IO, 0); return env; } /** * Convert this lisp string-like-thing (also works for symbols, and, later * keywords) into a UTF-8 string. NOTE that the returned value has been * malloced and must be freed. TODO: candidate to moving into a utilities * file. * * @param s the lisp string or symbol; * @return the c string. */ char *lisp_string_to_c_string(struct pso_pointer s) { char *result = NULL; if (stringp(s) || symbolp(s)) { int len = 0; for (struct pso_pointer c = s; !nilp(c); c = c_cdr(c)) { len++; } wchar_t *buffer = calloc(len + 1, sizeof(wchar_t)); /* worst case, one wide char = four utf bytes */ result = calloc((len * 4) + 1, sizeof(char)); int i = 0; for (struct pso_pointer c = s; !nilp(c); c = c_cdr(c)) { buffer[i++] = pointer_to_object(c)->payload.string.character; } wcstombs(result, buffer, len); free(buffer); } debug_print(L"lisp_string_to_c_string( ", DEBUG_IO, 0); debug_print_object(s, DEBUG_IO, 0); debug_printf(DEBUG_IO, 0, L") => '%s'\n", result); return result; } /** * get one wide character from the buffer. * * @param file the stream to read from; * @return the next wide character on the stream, or zero if no more. */ wint_t url_fgetwc(URL_FILE *input) { wint_t result = -1; if (ungotten != 0) { /* TODO: not thread safe */ result = ungotten; ungotten = 0; } else { switch (input->type) { case CFTYPE_FILE: fwide(input->handle.file, 1); /* wide characters */ result = fgetwc(input->handle.file); /* passthrough */ break; case CFTYPE_CURL: { char *cbuff = calloc(sizeof(wchar_t) + 2, sizeof(char)); wchar_t *wbuff = calloc(2, sizeof(wchar_t)); size_t count = 0; debug_print(L"url_fgetwc: about to call url_fgets\n", DEBUG_IO, 0); url_fgets(cbuff, 2, input); debug_print(L"url_fgetwc: back from url_fgets\n", DEBUG_IO, 0); int c = (int)cbuff[0]; // TODO: risk of reading off cbuff? debug_printf( DEBUG_IO, 0, L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n", cbuff, c, c & 0xf7); /* The value of each individual byte indicates its UTF-8 function, * as follows: * * 00 to 7F hex (0 to 127): first and only byte of a sequence. * 80 to BF hex (128 to 191): continuing byte in a multi-byte * sequence. C2 to DF hex (194 to 223): first byte of a two-byte * sequence. E0 to EF hex (224 to 239): first byte of a three-byte * sequence. F0 to FF hex (240 to 255): first byte of a four-byte * sequence. */ if (c <= 0xf7) { count = 1; } else if (c >= 0xc2 && c <= 0xdf) { count = 2; } else if (c >= 0xe0 && c <= 0xef) { count = 3; } else if (c >= 0xf0 && c <= 0xff) { count = 4; } if (count > 1) { url_fgets((char *)&cbuff[1], count, input); } mbstowcs(wbuff, cbuff, 2); //(char *)(&input->buffer[input->buffer_pos]), 1 ); result = wbuff[0]; free(wbuff); free(cbuff); } break; case CFTYPE_NONE: break; } } debug_printf(DEBUG_IO, 0, L"url_fgetwc returning %d (%C)\n", result, result); return result; } wint_t url_ungetwc(wint_t wc, URL_FILE *input) { wint_t result = -1; switch (input->type) { case CFTYPE_FILE: fwide(input->handle.file, 1); /* wide characters */ result = ungetwc(wc, input->handle.file); /* passthrough */ break; case CFTYPE_CURL: { ungotten = wc; break; case CFTYPE_NONE: break; } } return result; } /** * @brief Read one character object from this `read_stream`. * * @param read_stream a pointer to an object which should be a read stream * object, * * @return a pointer to a character object on success, or `nil` on failure. */ struct pso_pointer get_character(struct pso_pointer read_stream) { struct pso_pointer result = nil; if (readp(read_stream)) { result = make_character( url_fgetwc(pointer_to_object_of_size_class(read_stream, 2) ->payload.stream.stream)); } return result; } /** * @brief Push back this character `c` onto this read stream `r`. * * @param c a pointer to an object which should be a character object; * @param r a pointer to an object which should be a read stream object, * * @return `t` on success, else `nil`. */ struct pso_pointer push_back_character(struct pso_pointer c, struct pso_pointer r) { struct pso_pointer result = nil; if (characterp(c) && readp(r)) { if (url_ungetwc( (wint_t)(pointer_to_object(c)->payload.character.character), pointer_to_object(r)->payload.stream.stream) >= 0) { result = t; } } return result; } /** * Function, sort-of: close the file indicated by my first arg, and return * nil. If the first arg is not a stream, does nothing. All other args are * ignored. * * * (close stream) * * @param frame my stack frame. * @param frame_pointer a pointer to my stack frame. * @param env my environment. * @return T if the stream was successfully closed, else nil. */ struct pso_pointer lisp_close(struct pso_pointer frame_pointer, struct pso_pointer env) { struct pso4 *frame = pointer_to_pso4(frame_pointer); struct pso_pointer result = nil; if (readp(fetch_arg(frame, 0)) || writep(fetch_arg(frame, 0))) { if (url_fclose(pointer_to_object(fetch_arg(frame, 0)) ->payload.stream.stream) == 0) { result = t; } } return result; } struct pso_pointer add_meta_integer(struct pso_pointer meta, wchar_t *key, long int value) { return c_cons(c_cons(c_string_to_lisp_keyword(key), make_integer(value)), meta); } struct pso_pointer add_meta_string(struct pso_pointer meta, wchar_t *key, char *value) { value = trim(value); wchar_t buffer[strlen(value) + 1]; mbstowcs(buffer, value, strlen(value) + 1); return c_cons( c_cons(c_string_to_lisp_keyword(key), c_string_to_lisp_string(buffer)), meta); } struct pso_pointer add_meta_time(struct pso_pointer meta, wchar_t *key, time_t *value) { /* I don't yet have a concept of a date-time object, which is a * bit of an oversight! */ char datestring[256]; strftime(datestring, sizeof(datestring), nl_langinfo(D_T_FMT), localtime(value)); return add_meta_string(meta, key, datestring); } /** * Callback to assemble metadata for a URL stream. This is naughty because * it modifies data, but it's really the only way to create metadata. */ static size_t write_meta_callback(char *string, size_t size, size_t nmemb, struct pso_pointer stream) { struct pso2 *cell = pointer_to_object(stream); // TODO: reimplement /* make a copy of the string that we can destructively change */ // char *s = calloc( strlen( string ), sizeof( char ) ); // strcpy( s, string ); // if ( check_tag( cell, READTV) || // check_tag( cell, WRITETV) ) { // int offset = index_of( ':', s ); // if ( offset != -1 ) { // s[offset] = ( char ) 0; // char *name = trim( s ); // char *value = trim( &s[++offset] ); // wchar_t wname[strlen( name )]; // mbstowcs( wname, name, strlen( name ) + 1 ); // cell->payload.stream.meta = // add_meta_string( cell->payload.stream.meta, wname, value ); // debug_printf( DEBUG_IO, // L"write_meta_callback: added header '%s': value // '%s'\n", name, value ); // } else if ( strncmp( "HTTP", s, 4 ) == 0 ) { // int offset = index_of( ' ', s ); // char *value = trim( &s[offset] ); // cell->payload.stream.meta = // add_meta_integer( add_meta_string // ( cell->payload.stream.meta, L"status", // value ), L"status-code", strtol( value, // NULL, // 10 ) ); // debug_printf( DEBUG_IO, // L"write_meta_callback: added header 'status': value // '%s'\n", value ); // } else { // debug_printf( DEBUG_IO, // L"write_meta_callback: header passed with no colon: // '%s'\n", s ); // } // } else { // debug_print // ( L"Pointer passed to write_meta_callback did not point to a // stream: ", // DEBUG_IO ); // debug_dump_object( stream, DEBUG_IO ); // } // free( s ); return 0; // strlen( string ); } void collect_meta(struct pso_pointer stream, char *url) { struct pso2 *cell = pointer_to_object(stream); URL_FILE *s = pointer_to_object(stream)->payload.stream.stream; struct pso_pointer meta = add_meta_string(cell->payload.stream.meta, L"url", url); struct stat statbuf; int result = stat(url, &statbuf); struct passwd *pwd; struct group *grp; switch (s->type) { case CFTYPE_NONE: break; case CFTYPE_FILE: if (result == 0) { if ((pwd = getpwuid(statbuf.st_uid)) != NULL) { meta = add_meta_string(meta, L"owner", pwd->pw_name); } else { meta = add_meta_integer(meta, L"owner", statbuf.st_uid); } if ((grp = getgrgid(statbuf.st_gid)) != NULL) { meta = add_meta_string(meta, L"group", grp->gr_name); } else { meta = add_meta_integer(meta, L"group", statbuf.st_gid); } meta = add_meta_integer(meta, L"size", (intmax_t)statbuf.st_size); meta = add_meta_time(meta, L"modified", &statbuf.st_mtime); } break; case CFTYPE_CURL: curl_easy_setopt(s->handle.curl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(s->handle.curl, CURLOPT_HEADERFUNCTION, write_meta_callback); curl_easy_setopt(s->handle.curl, CURLOPT_HEADERDATA, stream); break; } /* this is destructive change before the cell is released into the * wild, and consequently permissible, just. */ cell->payload.stream.meta = meta; } /** * Resutn the current default input, or of `inputp` is false, output stream from * this `env`ironment. */ struct pso_pointer get_default_stream(bool inputp, struct pso_pointer env) { struct pso_pointer result = nil; struct pso_pointer stream_name = inputp ? lisp_io_in : lisp_io_out; result = c_assoc(stream_name, env); return result; } /** * @brief if `s` points to either an input or an output stream, return the * URL_FILE pointer underlying that stream, else NULL. */ URL_FILE *stream_get_url_file(struct pso_pointer s) { URL_FILE *result = NULL; if (readp(s) || writep(s)) { struct pso2 *obj = pointer_to_object(s); result = obj->payload.stream.stream; } return result; } /** * Function: return a stream open on the URL indicated by the first argument; * if a second argument is present and is non-nil, open it for writing. At * present, further arguments are ignored and there is no mechanism to open * to append, or error if the URL is faulty or indicates an unavailable * resource. * * * (open url) * * @param frame my stack frame. * @param frame_pointer a pointer to my stack frame. * @param env my environment. * @return a string of one character, namely the next available character * on my stream, if any, else nil. */ struct pso_pointer lisp_open(struct pso_pointer frame_pointer, struct pso_pointer env) { struct pso4 *frame = pointer_to_pso4(frame_pointer); struct pso_pointer result = nil; // if ( stringp( fetch_arg( frame, 0) ) ) { // char *url = lisp_string_to_c_string( fetch_arg( frame, 0) ); // if ( nilp( fetch_arg( frame, 1) ) ) { // URL_FILE *stream = url_fopen( url, "r" ); // debug_printf( DEBUG_IO, 0, // L"lisp_open: stream @ %ld, stream type = %d, stream // handle = %ld\n", ( long int ) &stream, ( int ) // stream->type, ( long int ) stream->handle.file ); // switch ( stream->type ) { // case CFTYPE_NONE: // return // make_exception( c_string_to_lisp_string // ( L"Could not open stream" ), // frame_pointer , nil ); // break; // case CFTYPE_FILE: // if ( stream->handle.file == NULL ) { // return // make_exception( c_string_to_lisp_string // ( L"Could not open file" ), // frame_pointer , nil); // } // break; // case CFTYPE_CURL: // /* can't tell whether a URL is bad without reading it */ // break; // } // result = make_read_stream( stream, nil ); // } else { // // TODO: anything more complex is a problem for another day. // URL_FILE *stream = url_fopen( url, "w" ); // result = make_write_stream( stream, nil ); // } // if ( pointer_to_object( result )->payload.stream.stream == NULL ) { // result = nil; // } else { // collect_meta( result, url ); // } // free( url ); // } return result; } /** * Function: return the next character from the stream indicated by arg 0; * further arguments are ignored. * * * (read-char stream) * * @param frame my stack frame. * @param frame_pointer a pointer to my stack frame. * @param env my environment. * @return a string of one character, namely the next available character * on my stream, if any, else nil. */ struct pso_pointer lisp_read_char(struct pso_pointer frame_pointer, struct pso_pointer env) { struct pso4 *frame = pointer_to_pso4(frame_pointer); struct pso_pointer result = nil; struct pso_pointer stream_pointer = fetch_arg(frame, 0); if (readp(stream_pointer)) { result = make_string(url_fgetwc(stream_get_url_file(stream_pointer)), nil); } return result; } /** * Function: return a string representing all characters from the stream * indicated by arg 0; further arguments are ignored. * * TODO: it should be possible to optionally pass a string URL to this function, * * * (slurp stream) * * @param frame my stack frame. * @param frame_pointer a pointer to my stack frame. * @param env my environment. * @return a string of one character, namely the next available character * on my stream, if any, else nil. */ struct pso_pointer lisp_slurp(struct pso_pointer frame_pointer, struct pso_pointer env) { struct pso4 *frame = pointer_to_pso4(frame_pointer); struct pso_pointer result = nil; if (readp(fetch_arg(frame, 0))) { URL_FILE *stream = stream_get_url_file(fetch_arg(frame, 0)); struct pso_pointer cursor = make_string(url_fgetwc(stream), nil); result = cursor; for (wint_t c = url_fgetwc(stream); !url_feof(stream) && c != 0; c = url_fgetwc(stream)) { debug_print(L"slurp: cursor is: ", DEBUG_IO, 0); debug_dump_object(cursor, DEBUG_IO, 0); debug_print(L"; result is: ", DEBUG_IO, 0); debug_dump_object(result, DEBUG_IO, 0); debug_println(DEBUG_IO); struct pso2 *cell = pointer_to_object(cursor); cursor = make_string((wchar_t)c, nil); cell->payload.string.cdr = cursor; } } return result; }