From 004ff6737c3def1f21cebb855b9f058379b6aa0f Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Thu, 12 Feb 2026 10:17:11 +0000 Subject: [PATCH] feature-2: allocating cells with count = 1; 7 unit tests (all bignums) fail. --- src/arith/integer.c | 172 ++++++++++++++++++++++++++++++------------ src/arith/integer.h | 7 ++ src/arith/peano.h | 3 +- src/arith/ratio.c | 38 +++++----- src/init.c | 25 +++--- src/io/io.c | 19 ++++- src/io/io.h | 8 +- src/io/read.c | 38 ++++------ src/memory/conspage.c | 4 +- src/ops/intern.c | 9 +-- 10 files changed, 209 insertions(+), 114 deletions(-) diff --git a/src/arith/integer.c b/src/arith/integer.c index 41c46ef..e9d9b79 100644 --- a/src/arith/integer.c +++ b/src/arith/integer.c @@ -19,12 +19,13 @@ #include #include +#include "arith/integer.h" +#include "arith/peano.h" +#include "debug.h" #include "memory/conspage.h" #include "memory/consspaceobject.h" -#include "debug.h" #include "ops/equal.h" #include "ops/lispops.h" -#include "arith/peano.h" /** * hexadecimal digits for printing numbers. @@ -34,19 +35,33 @@ const char *hex_digits = "0123456789ABCDEF"; /* * Doctrine from here on in is that ALL integers are bignums, it's just * that integers less than 61 bits are bignums of one cell only. + * that integers less than 61 bits are bignums of one cell only. + * TODO: why do I not have confidence to make this 64 bits? */ + /* + * A small_int_cache array of pointers to the integers 0...23, + * used only by functions `acquire_integer(int64) => cons_pointer` and + * `release_integer(cons_pointer) => NULL` which, if the value desired is + * in the cache, supplies it from the cache, and, otherwise, calls + * make_integer() and dec_ref() respectively. + */ + +#define SMALL_INT_LIMIT 24 +bool small_int_cache_initialised = false; +struct cons_pointer small_int_cache[SMALL_INT_LIMIT]; + /** - * Low level integer arithmetic, do not use elsewhere. - * - * @param c a pointer to a cell, assumed to be an integer cell; - * @param op a character representing the operation: expectedto be either - * '+' or '*'; behaviour with other values is undefined. - * @param is_first_cell true if this is the first cell in a bignum - * chain, else false. - * \see multiply_integers - * \see add_integers - */ + * Low level integer arithmetic, do not use elsewhere. + * + * @param c a pointer to a cell, assumed to be an integer cell; + * @param op a character representing the operation: expectedto be either + * '+' or '*'; behaviour with other values is undefined. + * @param is_first_cell true if this is the first cell in a bignum + * chain, else false. + * \see multiply_integers + * \see add_integers + */ __int128_t cell_value( struct cons_pointer c, char op, bool is_first_cell ) { long int val = nilp( c ) ? 0 : pointer2cell( c ).payload.integer.value; @@ -86,7 +101,6 @@ struct cons_pointer make_integer( int64_t value, struct cons_pointer more ) { struct cons_space_object *cell = &pointer2cell( result ); cell->payload.integer.value = value; cell->payload.integer.more = more; - inc_ref(result); } debug_print( L"make_integer: returning\n", DEBUG_ALLOC ); @@ -95,11 +109,74 @@ struct cons_pointer make_integer( int64_t value, struct cons_pointer more ) { } /** - * Overwrite the value field of the integer indicated by `new` with + * @brief Supply small valued integers from the small integer cache, if available. + * + * The pattern here is intended to be that, at least within this file, instead of + * calling make_integer when an integer is required and dec_ref when it's no longer + * required, we call acquire_integer and release_integer respectively, in order to + * reduce allocation churn. + * + * In the initial implementation, acquire_integer supplies the integer from the + * small integer cache if available, else calls make_integer. Later, more + * sophisticated caching of integers which are currently in play may be enabled. + * + * @param value the value of the integer desired. + * @param more if this value is a bignum, the rest (less significant bits) of the + * value. + * @return struct cons_pointer a pointer to the integer acquired. + */ +struct cons_pointer acquire_integer( int64_t value, struct cons_pointer more ) { + struct cons_pointer result; + + if ( !nilp( more) || value >= SMALL_INT_LIMIT) { + debug_print( L"acquire_integer passing to make_integer (too large)\n", DEBUG_ALLOC ); + result = make_integer( value, more); + } else { + if ( !small_int_cache_initialised) { + for (int64_t i = 0; i < SMALL_INT_LIMIT; i++) { + small_int_cache[i] = make_integer( i, NIL); + pointer2cell(small_int_cache[i]).count = UINT32_MAX; // lock it in so it can't be GC'd + } + small_int_cache_initialised = true; + debug_print( L"small_int_cache initialised.\n", DEBUG_ALLOC ); + } + + debug_printf( DEBUG_ALLOC, L"acquire_integer: returning %" PRId64 "\n", value); + result = small_int_cache[value]; + } + return result; +} + +/** + * @brief if the value of p is less than the size of the small integer cache + * (and thus it was presumably supplied from there), suppress dec_ref. + * + * **NOTE THAT** at this stage it's still safe to dec_ref an arbitrary integer, + * because those in the cache are locked and can't be dec_refed. + * + * @param p a pointer, expected to be to an integer. + */ +void release_integer( struct cons_pointer p) { + struct cons_space_object o = pointer2cell( p); + if ( !integerp( p) || // what I've been passed isn't an integer; + !nilp( o.payload.integer.more) || // or it's a bignum; + o.payload.integer.value >= SMALL_INT_LIMIT || // or it's bigger than the small int cache limit; + !eq( p, small_int_cache[ o.payload.integer.value]) // or it's simply not the copy in the cache... + ) { dec_ref( p); } else { + debug_printf( DEBUG_ALLOC, L"release_integer: releasing %" PRId64 "\n", + o.payload.integer.value); + } +} + + +/** + * @brief Overwrite the value field of the integer indicated by `new` with * the least significant INTEGER_BITS bits of `val`, and return the - * more significant bits (if any) right-shifted by INTEGER_BITS places. - * Destructive, primitive, do not use in any context except primitive - * operations on integers. + * more significant bits (if any) right-shifted by INTEGER_BITS places. + * + * Destructive, primitive, DO NOT USE in any context except primitive + * operations on integers. The value passed as `new` MUST be constructed + * with `make_integer`, NOT acquired with `acquire_integer`. * * @param val the value to represent; * @param less_significant the less significant words of this bignum, if any, @@ -134,25 +211,6 @@ __int128_t int128_to_integer( __int128_t val, return carry; } -struct cons_pointer make_integer_128( __int128_t val, - struct cons_pointer less_significant ) { - struct cons_pointer result = NIL; - - do { - if ( MAX_INTEGER >= val ) { - result = make_integer( ( long int ) val, less_significant ); - } else { - less_significant = - make_integer( ( long int ) val & MAX_INTEGER, - less_significant ); - val = val * INT_CELL_BASE; - } - - } while ( nilp( result ) ); - - return result; -} - /** * Return a pointer to an integer representing the sum of the integers * pointed to by `a` and `b`. If either isn't an integer, will return nil. @@ -218,28 +276,38 @@ struct cons_pointer base_partial( int depth ) { struct cons_pointer result = NIL; for ( int i = 0; i < depth; i++ ) { - result = make_integer( 0, result ); + result = acquire_integer( 0, result ); } return result; } /** - * destructively modify this `partial` by appending this `digit`. + * @brief Return a copy of this `partial` with this `digit` appended. + * + * @param partial the more significant bits of a possible bignum. + * @param digit the less significant bits of that possible bignum. NOTE: the + * name `digit` is technically correct but possibly misleading, because the + * numbering system here is base INT_CELL_BASE, currently x0fffffffffffffffL */ -struct cons_pointer append_digit( struct cons_pointer partial, +struct cons_pointer append_cell( struct cons_pointer partial, struct cons_pointer digit ) { - struct cons_pointer c = partial; + struct cons_space_object cell = pointer2cell( partial); + // TODO: I should recursively copy the whole bignum chain, because + // we're still destructively modifying the end of it. + struct cons_pointer c = make_integer( cell.payload.integer.value, + cell.payload.integer.more); struct cons_pointer result = partial; - if ( nilp( partial ) ) { + if ( nilp( partial)) { result = digit; } else { + // find the last digit in the chain... while ( !nilp( pointer2cell( c ).payload.integer.more ) ) { c = pointer2cell( c ).payload.integer.more; } - ( &pointer2cell( c ) )->payload.integer.more = digit; + ( pointer2cell( c ) ).payload.integer.more = digit; } return result; } @@ -259,7 +327,7 @@ struct cons_pointer append_digit( struct cons_pointer partial, */ struct cons_pointer multiply_integers( struct cons_pointer a, struct cons_pointer b ) { - struct cons_pointer result = make_integer( 0, NIL ); + struct cons_pointer result = acquire_integer( 0, NIL ); bool neg = is_negative( a ) != is_negative( b ); bool is_first_b = true; int i = 0; @@ -300,16 +368,18 @@ struct cons_pointer multiply_integers( struct cons_pointer a, /* if xj exceeds one digit, break it into the digit dj and * the carry */ carry = xj >> INTEGER_BIT_SHIFT; - struct cons_pointer dj = make_integer( xj & MAX_INTEGER, NIL ); + struct cons_pointer dj = acquire_integer( xj & MAX_INTEGER, NIL ); - /* destructively modify ri by appending dj */ - ri = append_digit( ri, dj ); + replace_integer_p( ri, append_cell( ri, dj )); + // struct cons_pointer new_ri = append_cell( ri, dj ); + // release_integer( ri); + // ri = new_ri; } /* end for bj */ - /* if carry is not equal to zero, append it as a final digit + /* if carry is not equal to zero, append it as a final cell * to ri */ if ( carry != 0 ) { - ri = append_digit( ri, make_integer( carry, NIL ) ); + replace_integer_i( ri, carry) } /* add ri to result */ @@ -341,6 +411,9 @@ struct cons_pointer integer_to_string_add_digit( int digit, int digits, } /** + * @brief return a string representation of this integer, which may be a + * bignum. + * * The general principle of printing a bignum is that you print the least * significant digit in whatever base you're dealing with, divide through * by the base, print the next, and carry on until you've none left. @@ -350,6 +423,9 @@ struct cons_pointer integer_to_string_add_digit( int digit, int digits, * object to the next. 64 bit integers don't align with decimal numbers, so * when we get to the last digit from one integer cell, we have potentially * to be looking to the next. H'mmmm. + * + * @param int_pointer cons_pointer to the integer to print, + * @param base the base to print it in. */ struct cons_pointer integer_to_string( struct cons_pointer int_pointer, int base ) { diff --git a/src/arith/integer.h b/src/arith/integer.h index 09a7a83..d0b4b71 100644 --- a/src/arith/integer.h +++ b/src/arith/integer.h @@ -14,8 +14,15 @@ #include #include +#define replace_integer_i(p,i) {struct cons_pointer __p = acquire_integer(i,NIL); release_integer(p); p = __p;} +#define replace_integer_p(p,q) {struct cons_pointer __p = p; release_integer( p); p = q;} + struct cons_pointer make_integer( int64_t value, struct cons_pointer more ); +struct cons_pointer acquire_integer( int64_t value, struct cons_pointer more ); + +void release_integer( struct cons_pointer p); + struct cons_pointer add_integers( struct cons_pointer a, struct cons_pointer b ); diff --git a/src/arith/peano.h b/src/arith/peano.h index 95c5013..5e83f0c 100644 --- a/src/arith/peano.h +++ b/src/arith/peano.h @@ -7,11 +7,12 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ -#include "consspaceobject.h" #ifndef PEANO_H #define PEANO_H +#include "memory/consspaceobject.h" + /** * The maximum value we will allow in an integer cell: one less than 2^60: * (let ((s (make-string-output-stream))) diff --git a/src/arith/ratio.c b/src/arith/ratio.c index 5135d6b..f0095b1 100644 --- a/src/arith/ratio.c +++ b/src/arith/ratio.c @@ -61,11 +61,11 @@ struct cons_pointer simplify_ratio( struct cons_pointer pointer ) { if ( gcd > 1 ) { if ( drrv / gcd == 1 ) { - result = make_integer( ddrv / gcd, NIL ); + result = acquire_integer( ddrv / gcd, NIL ); } else { result = - make_ratio( make_integer( ddrv / gcd, NIL ), - make_integer( drrv / gcd, NIL ) ); + make_ratio( acquire_integer( ddrv / gcd, NIL ), + acquire_integer( drrv / gcd, NIL ) ); } } } @@ -110,23 +110,24 @@ struct cons_pointer add_ratio_ratio( struct cons_pointer arg1, m1, m2 ); if ( dr1v == dr2v ) { - r = make_ratio( make_integer( dd1v + dd2v, NIL ), + r = make_ratio( acquire_integer( dd1v + dd2v, NIL ), cell1.payload.ratio.divisor ); } else { - struct cons_pointer dd1vm = make_integer( dd1v * m1, NIL ), - dr1vm = make_integer( dr1v * m1, NIL ), - dd2vm = make_integer( dd2v * m2, NIL ), - dr2vm = make_integer( dr2v * m2, NIL ), + struct cons_pointer dd1vm = acquire_integer( dd1v * m1, NIL ), + dr1vm = acquire_integer( dr1v * m1, NIL ), + dd2vm = acquire_integer( dd2v * m2, NIL ), + dr2vm = acquire_integer( dr2v * m2, NIL ), r1 = make_ratio( dd1vm, dr1vm ), r2 = make_ratio( dd2vm, dr2vm ); r = add_ratio_ratio( r1, r2 ); + if (!eq( r, r1)) { dec_ref( r1);} + if (!eq( r, r2)) { dec_ref( r2);} + /* because the references on dd1vm, dr1vm, dd2vm and dr2vm were * never incremented except when making r1 and r2, decrementing * r1 and r2 should be enought to garbage collect them. */ - dec_ref( r1 ); - dec_ref( r2 ); } result = simplify_ratio( r ); @@ -162,12 +163,12 @@ struct cons_pointer add_integer_ratio( struct cons_pointer intarg, if ( integerp( intarg ) && ratiop( ratarg ) ) { // TODO: not longer works - struct cons_pointer one = make_integer( 1, NIL ), + struct cons_pointer one = acquire_integer( 1, NIL ), ratio = make_ratio( intarg, one ); result = add_ratio_ratio( ratio, ratarg ); - dec_ref( one ); + release_integer( one ); dec_ref( ratio ); } else { result = @@ -231,11 +232,15 @@ struct cons_pointer multiply_ratio_ratio( struct pointer2cell( cell2.payload.ratio.divisor ).payload.integer.value, ddrv = dd1v * dd2v, drrv = dr1v * dr2v; + struct cons_pointer dividend = acquire_integer( ddrv, NIL ); + struct cons_pointer divisor = acquire_integer( drrv, NIL ); struct cons_pointer unsimplified = - make_ratio( make_integer( ddrv, NIL ), - make_integer( drrv, NIL ) ); + make_ratio( dividend, divisor); result = simplify_ratio( unsimplified ); + release_integer( dividend); + release_integer( divisor); + if ( !eq( unsimplified, result ) ) { dec_ref( unsimplified ); } @@ -261,12 +266,11 @@ struct cons_pointer multiply_integer_ratio( struct cons_pointer intarg, if ( integerp( intarg ) && ratiop( ratarg ) ) { // TODO: no longer works; fix - struct cons_pointer one = make_integer( 1, NIL ), + struct cons_pointer one = acquire_integer( 1, NIL ), ratio = make_ratio( intarg, one ); result = multiply_ratio_ratio( ratio, ratarg ); - dec_ref( one ); - dec_ref( ratio ); + release_integer( one ); } else { result = throw_exception( c_string_to_lisp_string diff --git a/src/init.c b/src/init.c index 2d0d2d2..45b534f 100644 --- a/src/init.c +++ b/src/init.c @@ -78,13 +78,8 @@ void bind_special( wchar_t *name, struct cons_pointer ( *executable ) /** * Bind this `value` to this `name` in the `oblist`. */ -void bind_value( wchar_t *name, struct cons_pointer value ) { - struct cons_pointer n = c_string_to_lisp_symbol( name ); - inc_ref( n ); - - deep_bind( n, value ); - - dec_ref( n ); +struct cons_pointer bind_value( wchar_t *name, struct cons_pointer value ) { + return deep_bind( c_string_to_lisp_symbol( name ), value ); } void print_banner( ) { @@ -200,14 +195,14 @@ int main( int argc, char *argv[] ) { FILE *infile = infilename == NULL ? stdin : fopen( infilename, "r"); - bind_value( L"*in*", make_read_stream( file_to_url_file(infile), - make_cons( make_cons - ( c_string_to_lisp_keyword - ( L"url" ), - c_string_to_lisp_string - ( L"system:standard input" ) ), - NIL ) ) ); - bind_value( L"*out*", + lisp_io_in = bind_value( C_IO_IN, make_read_stream( file_to_url_file(infile), + make_cons( make_cons + ( c_string_to_lisp_keyword + ( L"url" ), + c_string_to_lisp_string + ( L"system:standard input" ) ), + NIL ) ) ); + lisp_io_out = bind_value( C_IO_OUT, make_write_stream( file_to_url_file( stdout ), make_cons( make_cons ( c_string_to_lisp_keyword diff --git a/src/io/io.c b/src/io/io.c index d01f788..2db9492 100644 --- a/src/io/io.c +++ b/src/io/io.c @@ -28,11 +28,12 @@ #include -#include "memory/conspage.h" -#include "memory/consspaceobject.h" +#include "arith/integer.h" #include "debug.h" #include "io/fopen.h" -#include "arith/integer.h" +#include "io/io.h" +#include "memory/conspage.h" +#include "memory/consspaceobject.h" #include "ops/intern.h" #include "ops/lispops.h" #include "utils.h" @@ -44,6 +45,16 @@ */ CURLSH *io_share; +/** + * @brief bound to the Lisp string representing C_IO_IN in initialisation. + */ +struct cons_pointer lisp_io_in = NIL; +/** + * @brief bound to the Lisp string representing C_IO_OUT in initialisation. + */ +struct cons_pointer lisp_io_out = NIL; + + /** * Allow a one-character unget facility. This may not be enough - we may need * to allocate a buffer. @@ -400,7 +411,7 @@ void collect_meta( struct cons_pointer stream, char *url ) { struct cons_pointer get_default_stream( bool inputp, struct cons_pointer env ) { struct cons_pointer result = NIL; struct cons_pointer stream_name = - c_string_to_lisp_symbol( inputp ? L"*in*" : L"*out*" ); + inputp ? lisp_io_in : lisp_io_out; inc_ref( stream_name ); diff --git a/src/io/io.h b/src/io/io.h index dc9e8de..0f971a3 100644 --- a/src/io/io.h +++ b/src/io/io.h @@ -11,12 +11,18 @@ #ifndef __psse_io_h #define __psse_io_h #include -#include "consspaceobject.h" +#include "memory/consspaceobject.h" extern CURLSH *io_share; int io_init( ); +#define C_IO_IN L"*in*" +#define C_IO_OUT L"*out*" + +extern struct cons_pointer lisp_io_in; +extern struct cons_pointer lisp_io_out; + URL_FILE *file_to_url_file( FILE * f ); wint_t url_fgetwc( URL_FILE * input ); wint_t url_ungetwc( wint_t wc, URL_FILE * input ); diff --git a/src/io/read.c b/src/io/read.c index 13b0942..bf0b389 100644 --- a/src/io/read.c +++ b/src/io/read.c @@ -291,10 +291,10 @@ struct cons_pointer read_number( struct stack_frame *frame, wint_t initial, bool seen_period ) { debug_print( L"entering read_number\n", DEBUG_IO ); - struct cons_pointer result = make_integer( 0, NIL ); + struct cons_pointer result = acquire_integer( 0, NIL ); /* \todo we really need to be getting `base` from a privileged Lisp name - * and it should be the same privileged name we use when writing numbers */ - struct cons_pointer base = make_integer( 10, NIL ); + struct cons_pointer base = acquire_integer( 10, NIL ); struct cons_pointer dividend = NIL; int places_of_decimals = 0; wint_t c; @@ -330,20 +330,20 @@ struct cons_pointer read_number( struct stack_frame *frame, debug_print( L"read_number: ratio slash seen\n", DEBUG_IO ); dividend = result; + + result = acquire_integer( 0, NIL ); + // If I do replace_integer_p here instead of acquire_integer, + // and thus reclaim the garbage, I get a regression. Dom't yet + // know why. } break; case LCOMMA: // silently ignore comma. break; default: - { - struct cons_pointer digit = make_integer( ( int ) c - ( int ) '0', - NIL ); - struct cons_pointer new_result = add_integers( multiply_integers( result, base ), - digit ); - dec_ref( result); - dec_ref( digit); - result = new_result; + result = add_integers( multiply_integers( result, base ), + acquire_integer( ( int ) c - ( int ) '0', + NIL ) ); debug_printf( DEBUG_IO, L"read_number: added character %c, result now ", @@ -354,7 +354,6 @@ struct cons_pointer read_number( struct stack_frame *frame, if ( seen_period ) { places_of_decimals++; } - } } } @@ -364,14 +363,13 @@ struct cons_pointer read_number( struct stack_frame *frame, url_ungetwc( c, input ); if ( seen_period ) { - struct cons_pointer divisor = make_integer( powl( to_long_double( base ), - places_of_decimals ), - NIL ); debug_print( L"read_number: converting result to real\n", DEBUG_IO ); - struct cons_pointer div = make_ratio( result, - divisor); - dec_ref( divisor); + acquire_integer( powl + ( to_long_double + ( base ), + places_of_decimals ), + NIL ) ); inc_ref( div ); result = make_real( to_long_double( div ) ); @@ -383,19 +381,15 @@ struct cons_pointer read_number( struct stack_frame *frame, } if ( neg ) { - struct cons_pointer negt = negative( result ); debug_print( L"read_number: converting result to negative\n", DEBUG_IO ); - dec_ref( result); - result = negt; + result = negative( result ); } debug_print( L"read_number returning\n", DEBUG_IO ); debug_dump_object( result, DEBUG_IO ); - dec_ref( base); - return result; } diff --git a/src/memory/conspage.c b/src/memory/conspage.c index b30ee53..42c0ad1 100644 --- a/src/memory/conspage.c +++ b/src/memory/conspage.c @@ -187,6 +187,8 @@ void free_cell( struct cons_pointer pointer ) { case VECTORPOINTTV: free_vso( pointer ); break; + default: + fprintf( stderr, "WARNING: Freeing object of type %s!", (char *) &(cell->tag.bytes)); } strncpy( &cell->tag.bytes[0], FREETAG, TAGLENGTH ); @@ -231,7 +233,7 @@ struct cons_pointer allocate_cell( uint32_t tag ) { cell->tag.value = tag; - cell->count = 0; + cell->count = 1; cell->payload.cons.car = NIL; cell->payload.cons.cdr = NIL; diff --git a/src/ops/intern.c b/src/ops/intern.c index 1f6585b..cafc294 100644 --- a/src/ops/intern.c +++ b/src/ops/intern.c @@ -424,9 +424,8 @@ struct cons_pointer set( struct cons_pointer key, struct cons_pointer value, } /** - * Binds this key to this value in the global oblist, but doesn't affect the - * current environment. May not be useful except in bootstrapping (and even - * there it may not be especially useful). + * @brief Binds this key to this value in the global oblist. + */ struct cons_pointer deep_bind( struct cons_pointer key, struct cons_pointer value ) { @@ -448,10 +447,10 @@ deep_bind( struct cons_pointer key, struct cons_pointer value ) { } debug_print( L"deep_bind returning ", DEBUG_BIND ); - debug_print_object( oblist, DEBUG_BIND ); + debug_print_object( key, DEBUG_BIND ); debug_println( DEBUG_BIND ); - return oblist; + return key; } /**