From ce1c72973db7b80b381333b9daa60f6f288b8c28 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Thu, 13 Mar 2025 12:47:54 +0000 Subject: [PATCH 1/3] Defensive commit before experimenting with code::blocks --- src/arith/peano.h | 4 ++++ src/io/read.c | 19 ++++++++++--------- src/io/read.h | 9 +++++++++ 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/src/arith/peano.h b/src/arith/peano.h index 3076391..b1d3087 100644 --- a/src/arith/peano.h +++ b/src/arith/peano.h @@ -14,6 +14,10 @@ /** * The maximum value we will allow in an integer cell. + * + * NOTE: 20250312 this is 2^60. WHY? Given that we're using the sign bit + * inside the int64 record, we only have 63 value bits; but why did I decide + * not to use all 63? */ #define MAX_INTEGER ((__int128_t)0x0fffffffffffffffL) diff --git a/src/io/read.c b/src/io/read.c index df0735b..bf92f35 100644 --- a/src/io/read.c +++ b/src/io/read.c @@ -83,7 +83,7 @@ struct cons_pointer read_path( URL_FILE * input, wint_t initial, prefix = c_string_to_lisp_symbol( L"oblist" ); break; case '$': - case L'§': + case LSESSION: prefix = c_string_to_lisp_symbol( L"session" ); break; } @@ -245,7 +245,7 @@ struct cons_pointer read_continuation( struct stack_frame *frame, } break; case '$': - case L'§': + case LSESSION: result = read_path( input, c, NIL ); break; default: @@ -298,9 +298,9 @@ struct cons_pointer read_number( struct stack_frame *frame, initial ); for ( c = initial; iswdigit( c ) - || c == L'.' || c == L'/' || c == L','; c = url_fgetwc( input ) ) { + || c == LPERIOD || c == LSLASH || c == LCOMMA; c = url_fgetwc( input ) ) { switch ( c ) { - case L'.': + case LPERIOD: if ( seen_period || !nilp( dividend ) ) { return throw_exception( c_string_to_lisp_string ( L"Malformed number: too many periods" ), @@ -311,7 +311,7 @@ struct cons_pointer read_number( struct stack_frame *frame, seen_period = true; } break; - case L'/': + case LSLASH: if ( seen_period || !nilp( dividend ) ) { return throw_exception( c_string_to_lisp_string ( L"Malformed number: dividend of rational must be integer" ), @@ -324,11 +324,12 @@ struct cons_pointer read_number( struct stack_frame *frame, result = make_integer( 0, NIL ); } break; - case L',': + case LCOMMA: // silently ignore it. break; default: result = add_integers( multiply_integers( result, base ), + /* /todo: this won't work for hex digits */ make_integer( ( int ) c - ( int ) '0', NIL ) ); @@ -402,7 +403,7 @@ struct cons_pointer read_list( struct stack_frame *frame, for ( c = url_fgetwc( input ); iswblank( c ) || iswcntrl( c ); c = url_fgetwc( input ) ); - if ( c == L'.' ) { + if ( c == LPERIOD ) { /* might be a dotted pair; indeed, if we rule out numbers with * initial periods, it must be a dotted pair. \todo Ought to check, * howerver, that there's only one form after the period. */ @@ -433,7 +434,7 @@ struct cons_pointer read_map( struct stack_frame *frame, make_hashmap( DFLT_HASHMAP_BUCKETS, NIL, TRUE ); wint_t c = initial; - while ( c != L'}' ) { + while ( c != LCBRACE ) { struct cons_pointer key = read_continuation( frame, frame_pointer, env, input, c ); @@ -446,7 +447,7 @@ struct cons_pointer read_map( struct stack_frame *frame, /* skip commaa and whitespace at this point. */ for ( c = url_fgetwc( input ); - c == L',' || iswblank( c ) || iswcntrl( c ); + c == LCOMMA || iswblank( c ) || iswcntrl( c ); c = url_fgetwc( input ) ); result = diff --git a/src/io/read.h b/src/io/read.h index 031bb4f..7f58d0c 100644 --- a/src/io/read.h +++ b/src/io/read.h @@ -13,6 +13,15 @@ #include "memory/consspaceobject.h" +/* characters (other than arabic numberals) used in number representations */ +#define LCOMMA L',' +#define LPERIOD L'.' +#define LSLASH L'/' +/* ... used in map representations */ +#define LCBRACE L'}' +/* ... used in path representations */ +#define LSESSION L'§' + /** * read the next object on this input stream and return a cons_pointer to it. */ From e9f49d06a62f115776d7784e20ac25275efa2ae9 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Thu, 13 Mar 2025 18:26:38 +0000 Subject: [PATCH 2/3] Added code::blocks project experimentally; also, added macro for bits --- Makefile | 4 +- post-scarcity.cbp | 157 +++++++++++++++++++++++++++++++++ post-scarcity.cscope_file_list | 58 ++++++++++++ post-scarcity.layout | 15 ++++ src/arith/integer.c | 15 ++-- src/arith/peano.h | 11 +-- state-of-play.md | 28 ++++++ 7 files changed, 275 insertions(+), 13 deletions(-) create mode 100644 post-scarcity.cbp create mode 100644 post-scarcity.cscope_file_list create mode 100644 post-scarcity.layout create mode 100644 state-of-play.md diff --git a/Makefile b/Makefile index 7e5efb4..67bb015 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ SRC_DIRS ?= ./src SRCS := $(shell find $(SRC_DIRS) -name *.cpp -or -name *.c -or -name *.s) HDRS := $(shell find $(SRC_DIRS) -name *.h) -OBJS := $(addsuffix .o,$(basename $(SRCS))) +OBJS := $(addsuffix .o,$(basename $(SRCS))) DEPS := $(OBJS:.o=.d) TESTS := $(shell find unit-tests -name *.sh) @@ -21,6 +21,8 @@ DEBUGFLAGS := -g3 all: $(TARGET) +Debug: $(TARGET) + $(TARGET): $(OBJS) Makefile $(CC) $(DEBUGFLAGS) $(LDFLAGS) $(OBJS) -o $@ $(LDFLAGS) $(LOADLIBES) $(LDLIBS) diff --git a/post-scarcity.cbp b/post-scarcity.cbp new file mode 100644 index 0000000..a1f42e0 --- /dev/null +++ b/post-scarcity.cbp @@ -0,0 +1,157 @@ + + + + + + diff --git a/post-scarcity.cscope_file_list b/post-scarcity.cscope_file_list new file mode 100644 index 0000000..6fbf5ec --- /dev/null +++ b/post-scarcity.cscope_file_list @@ -0,0 +1,58 @@ +"/home/simon/workspace/post-scarcity/utils_src/readprintwc/readprintwc.c" +"/home/simon/workspace/post-scarcity/src/memory/vectorspace.c" +"/home/simon/workspace/post-scarcity/src/arith/peano.c" +"/home/simon/workspace/post-scarcity/src/init.c" +"/home/simon/workspace/post-scarcity/src/utils.h" +"/home/simon/workspace/post-scarcity/src/ops/intern.h" +"/home/simon/workspace/post-scarcity/src/arith/ratio.h" +"/home/simon/workspace/post-scarcity/src/io/io.c" +"/home/simon/workspace/post-scarcity/src/memory/conspage.h" +"/home/simon/workspace/post-scarcity/src/time/psse_time.h" +"/home/simon/workspace/post-scarcity/src/memory/cursor.h" +"/home/simon/workspace/post-scarcity/src/memory/dump.h" +"/home/simon/workspace/post-scarcity/src/ops/intern.c" +"/home/simon/workspace/post-scarcity/src/memory/lookup3.c" +"/home/simon/workspace/post-scarcity/src/io/fopen.h" +"/home/simon/workspace/post-scarcity/src/version.h" +"/home/simon/workspace/post-scarcity/src/memory/consspaceobject.h" +"/home/simon/workspace/post-scarcity/src/ops/meta.h" +"/home/simon/workspace/post-scarcity/src/arith/real.c" +"/home/simon/workspace/post-scarcity/src/ops/loop.c" +"/home/simon/workspace/post-scarcity/src/arith/integer.h" +"/home/simon/workspace/post-scarcity/src/time/psse_time.c" +"/home/simon/workspace/post-scarcity/src/memory/vectorspace.h" +"/home/simon/workspace/post-scarcity/src/memory/hashmap.c" +"/home/simon/workspace/post-scarcity/src/io/read.c" +"/home/simon/workspace/post-scarcity/src/ops/lispops.h" +"/home/simon/workspace/post-scarcity/src/ops/loop.h" +"/home/simon/workspace/post-scarcity/src/memory/stack.h" +"/home/simon/workspace/post-scarcity/utils_src/tagvalcalc/tagvalcalc.c" +"/home/simon/workspace/post-scarcity/src/debug.c" +"/home/simon/workspace/post-scarcity/src/io/read.h" +"/home/simon/workspace/post-scarcity/src/ops/meta.c" +"/home/simon/workspace/post-scarcity/src/memory/dump.c" +"/home/simon/workspace/post-scarcity/src/repl.c" +"/home/simon/workspace/post-scarcity/src/io/print.c" +"/home/simon/workspace/post-scarcity/src/memory/hashmap.h" +"/home/simon/workspace/post-scarcity/src/utils.c" +"/home/simon/workspace/post-scarcity/src/io/io.h" +"/home/simon/workspace/post-scarcity/src/memory/stack.c" +"/home/simon/workspace/post-scarcity/utils_src/debugflags/debugflags.c" +"/home/simon/workspace/post-scarcity/src/memory/consspaceobject.c" +"/home/simon/workspace/post-scarcity/src/memory/conspage.c" +"/home/simon/workspace/post-scarcity/src/memory/cursor.c" +"/home/simon/workspace/post-scarcity/src/arith/ratio.c" +"/home/simon/workspace/post-scarcity/Makefile" +"/home/simon/workspace/post-scarcity/src/arith/peano.h" +"/home/simon/workspace/post-scarcity/src/memory/lookup3.h" +"/home/simon/workspace/post-scarcity/src/arith/real.h" +"/home/simon/workspace/post-scarcity/src/ops/equal.c" +"/home/simon/workspace/post-scarcity/src/ops/lispops.c" +"/home/simon/workspace/post-scarcity/src/authorise.h" +"/home/simon/workspace/post-scarcity/src/io/print.h" +"/home/simon/workspace/post-scarcity/src/authorise.c" +"/home/simon/workspace/post-scarcity/src/debug.h" +"/home/simon/workspace/post-scarcity/src/arith/integer.c" +"/home/simon/workspace/post-scarcity/src/ops/equal.h" +"/home/simon/workspace/post-scarcity/src/repl.h" +"/home/simon/workspace/post-scarcity/src/io/fopen.c" diff --git a/post-scarcity.layout b/post-scarcity.layout new file mode 100644 index 0000000..98bd2b3 --- /dev/null +++ b/post-scarcity.layout @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/src/arith/integer.c b/src/arith/integer.c index eef171b..63f7dd2 100644 --- a/src/arith/integer.c +++ b/src/arith/integer.c @@ -87,9 +87,10 @@ __int128_t cell_value( struct cons_pointer c, char op, bool is_first_cell ) { /** * Overwrite the value field of the integer indicated by `new` with - * the least significant 60 bits of `val`, and return the more significant - * bits (if any) right-shifted by 60 places. Destructive, primitive, do not - * use in any context except primitive operations on integers. + * the least significant INTEGER_BITS bits of `val`, and return the + * more significant bits (if any) right-shifted by INTEGER_BITS places. + * Destructive, primitive, do not use in any context except primitive + * operations on integers. * * @param val the value to represent; * @param less_significant the less significant words of this bignum, if any, @@ -106,7 +107,7 @@ __int128_t int128_to_integer( __int128_t val, if ( MAX_INTEGER >= val ) { carry = 0; } else { - carry = val >> 60; + carry = val >> INTEGER_BITS; debug_printf( DEBUG_ARITH, L"int128_to_integer: 64 bit overflow; setting carry to %ld\n", ( int64_t ) carry ); @@ -136,7 +137,7 @@ struct cons_pointer make_integer_128( __int128_t val, less_significant = make_integer( ( long int ) val & MAX_INTEGER, less_significant ); - val = val >> 60; + val = val >> INTEGER_BITS; } } while ( nilp( result ) ); @@ -290,7 +291,7 @@ struct cons_pointer multiply_integers( struct cons_pointer a, /* if xj exceeds one digit, break it into the digit dj and * the carry */ - carry = xj >> 60; + carry = xj >> INTEGER_BITS; struct cons_pointer dj = make_integer( xj & MAX_INTEGER, NIL ); /* destructively modify ri by appending dj */ @@ -361,7 +362,7 @@ struct cons_pointer integer_to_string( struct cons_pointer int_pointer, while ( accumulator > 0 || !nilp( next ) ) { if ( accumulator < MAX_INTEGER && !nilp( next ) ) { accumulator += - ( pointer2cell( next ).payload.integer.value << 60 ); + ( pointer2cell( next ).payload.integer.value << INTEGER_BITS ); next = pointer2cell( next ).payload.integer.more; } int offset = ( int ) ( accumulator % base ); diff --git a/src/arith/peano.h b/src/arith/peano.h index b1d3087..163d47d 100644 --- a/src/arith/peano.h +++ b/src/arith/peano.h @@ -14,12 +14,13 @@ /** * The maximum value we will allow in an integer cell. - * - * NOTE: 20250312 this is 2^60. WHY? Given that we're using the sign bit - * inside the int64 record, we only have 63 value bits; but why did I decide - * not to use all 63? */ -#define MAX_INTEGER ((__int128_t)0x0fffffffffffffffL) +#define MAX_INTEGER ((__int128_t)0x7fffffffffffffffL) +/** + * @brief Number of value bits in an integer cell + * + */ +#define INTEGER_BITS 63 bool zerop( struct cons_pointer arg ); diff --git a/state-of-play.md b/state-of-play.md new file mode 100644 index 0000000..bd38ead --- /dev/null +++ b/state-of-play.md @@ -0,0 +1,28 @@ +# State of Play + +## 20250313 + +OK, the 60 bit integer cell happens in `int128_to_integer` in `arith/integer.c`. It seems to be being done consistently; but there is no obvious reason. `MAX_INTEGER` is defined in `arith/peano.h`. I've changed both to use 63 bits, and this makes no change to the number of unit tests that fail. + +With this change, `(fact 21)`, which was previously printing nothing, now prints a value, `11,891,611,015,076,642,816`. However, this value is definitively wrong, should be `51,090,942,171,709,440,000`. But, I hadn't fixed the shift in `integer_to_string`; have now... still no change in number of failed tests... + +But `(fact 21)` gives a different wrong value, `4,974,081,987,435,560,960`. Factorial values returned by `fact` are correct (agree with SBCL running the same code) up to `(fact 20)`, with both 60 bit integer cells and 63 bit integer cells giving correct values. + +Uhhhmmm... but I'd missed two other places where I'd had the number of significant bits as a numeric literal. Fixed those and now `(fact 21)` does not return a printable answer at all, although the internal representation is definitely wrong. So we may be seeing why I chose 60 bits. + +Bother. + +## 20250312 + +Printing of bignums definitely doesn't work; I'm not persuaded that reading of bignums works right either, and there are probably problems with bignum arithmetic too. + +The internal memory representation of a number rolls over from one cell to two cells at 1152921504606846976, and I'm not at all certain why it does because this is neither 263 nor 264. + +| | | | +| -------------- | -------------------- | ---- | +| 262 | 4611686018427387904 | | +| 263 | 9223372036854775808 | | +| 264 | 18446744073709551616 | | +| Mystery number | 1152921504606846976 | | + +In fact, our mystery number turns out (by inspection) to be 260. But **why**? From 4e76fad655d1537878ace936a0cf12b264c2505e Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Fri, 14 Mar 2025 10:24:38 +0000 Subject: [PATCH 3/3] Revert to 60-bit bignum chunks; better `make test` rigging Still failing the three-chunk bignum unit tests --- Makefile | 2 +- src/arith/peano.h | 4 ++-- state-of-play.md | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 67bb015..b4f9d3c 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ else indent $(INDENT_FLAGS) $(SRCS) $(HDRS) endif -test: $(OBJS) $(TESTS) Makefile +test: $(TESTS) Makefile $(TARGET) bash ./unit-tests.sh .PHONY: clean diff --git a/src/arith/peano.h b/src/arith/peano.h index 163d47d..a7d63b3 100644 --- a/src/arith/peano.h +++ b/src/arith/peano.h @@ -15,12 +15,12 @@ /** * The maximum value we will allow in an integer cell. */ -#define MAX_INTEGER ((__int128_t)0x7fffffffffffffffL) +#define MAX_INTEGER ((__int128_t)0x0fffffffffffffffL) /** * @brief Number of value bits in an integer cell * */ -#define INTEGER_BITS 63 +#define INTEGER_BITS 60 bool zerop( struct cons_pointer arg ); diff --git a/state-of-play.md b/state-of-play.md index bd38ead..e96a15a 100644 --- a/state-of-play.md +++ b/state-of-play.md @@ -1,5 +1,23 @@ # State of Play +## 20250314 + +Thinking further about this, I think at least part of the problem is that I'm storing bignums as cons-space objects, which means that the integer representation I can store has to fit into the size of a cons pointer, which is 64 bits. Which means that to store integers larger than 64 bits I need chains of these objects. + +If I stored bignums in vector space, this problem would go away (especially as I have not implemented vector space yet). + +However, having bignums in vector space would cause a churn of non-standard-sized objects in vector space, which would mean much more frequent garbage collection, which has to be mark-and-sweep because unequal-sized objects, otherwise you get heap fragmentation. + +So maybe I just have to put more work into debugging my cons-space bignums. + +Bother, bother. + +There are no perfect solutions. + +However however, it's only the node that's short on vector space which has to pause to do a mark and sweep. It doesn't interrupt any other node, because their reference to the object will remain the same, even if it is the 'home node' of the object which is sweeping. So all the node has to do is set its busy flag, do GC, and clear its busy flag, The rest of the system can just be carrying on as normal. + +So... maybe mark and sweep isn't the big deal I think it is? + ## 20250313 OK, the 60 bit integer cell happens in `int128_to_integer` in `arith/integer.c`. It seems to be being done consistently; but there is no obvious reason. `MAX_INTEGER` is defined in `arith/peano.h`. I've changed both to use 63 bits, and this makes no change to the number of unit tests that fail.