Work on reducing allocation leaks in read_number(). This is now improved, but not yet satisfactory.

This commit is contained in:
Simon Brooke 2026-02-04 22:57:10 +00:00
parent e489d02069
commit 351ca5bd17
9 changed files with 275 additions and 17 deletions

View file

@ -4,4 +4,6 @@
(cond ((= n 1) 1) (cond ((= n 1) 1)
(t (* n (fact (- n 1))))))) (t (* n (fact (- n 1)))))))
(fact 1000) ; (fact 1000)

View file

@ -1 +1 @@
(slurp (set! f (open "http://www.journeyman.cc/"))) (slurp (open "http://www.journeyman.cc/"))

View file

@ -86,6 +86,7 @@ struct cons_pointer make_integer( int64_t value, struct cons_pointer more ) {
struct cons_space_object *cell = &pointer2cell( result ); struct cons_space_object *cell = &pointer2cell( result );
cell->payload.integer.value = value; cell->payload.integer.value = value;
cell->payload.integer.more = more; cell->payload.integer.more = more;
inc_ref(result);
} }
debug_print( L"make_integer: returning\n", DEBUG_ALLOC ); debug_print( L"make_integer: returning\n", DEBUG_ALLOC );

View file

@ -28,6 +28,7 @@
#include "memory/hashmap.h" #include "memory/hashmap.h"
#include "ops/intern.h" #include "ops/intern.h"
#include "io/io.h" #include "io/io.h"
#include "io/fopen.h"
#include "ops/lispops.h" #include "ops/lispops.h"
#include "ops/meta.h" #include "ops/meta.h"
#include "arith/peano.h" #include "arith/peano.h"
@ -124,6 +125,7 @@ int main( int argc, char *argv[] ) {
int option; int option;
bool dump_at_end = false; bool dump_at_end = false;
bool show_prompt = false; bool show_prompt = false;
char * infilename = NULL;
setlocale( LC_ALL, "" ); setlocale( LC_ALL, "" );
if ( io_init( ) != 0 ) { if ( io_init( ) != 0 ) {
@ -131,7 +133,7 @@ int main( int argc, char *argv[] ) {
exit( 1 ); exit( 1 );
} }
while ( ( option = getopt( argc, argv, "phdv:" ) ) != -1 ) { while ( ( option = getopt( argc, argv, "phdv:i:" ) ) != -1 ) {
switch ( option ) { switch ( option ) {
case 'd': case 'd':
dump_at_end = true; dump_at_end = true;
@ -141,6 +143,9 @@ int main( int argc, char *argv[] ) {
print_options( stdout ); print_options( stdout );
exit( 0 ); exit( 0 );
break; break;
case 'i' :
infilename = optarg;
break;
case 'p': case 'p':
show_prompt = true; show_prompt = true;
break; break;
@ -191,8 +196,12 @@ int main( int argc, char *argv[] ) {
fwide( stdout, 1 ); fwide( stdout, 1 );
fwide( stderr, 1 ); fwide( stderr, 1 );
fwide( sink->handle.file, 1 ); fwide( sink->handle.file, 1 );
bind_value( L"*in*", make_read_stream( file_to_url_file( stdin ),
make_cons( make_cons FILE *infile = infilename == NULL ? stdin : fopen( infilename, "r");
bind_value( L"*in*", make_read_stream( file_to_url_file(infile),
make_cons( make_cons
( c_string_to_lisp_keyword ( c_string_to_lisp_keyword
( L"url" ), ( L"url" ),
c_string_to_lisp_string c_string_to_lisp_string

View file

@ -330,17 +330,20 @@ struct cons_pointer read_number( struct stack_frame *frame,
debug_print( L"read_number: ratio slash seen\n", debug_print( L"read_number: ratio slash seen\n",
DEBUG_IO ); DEBUG_IO );
dividend = result; dividend = result;
result = make_integer( 0, NIL );
} }
break; break;
case LCOMMA: case LCOMMA:
// silently ignore comma. // silently ignore comma.
break; break;
default: default:
result = add_integers( multiply_integers( result, base ), {
make_integer( ( int ) c - ( int ) '0', struct cons_pointer digit = make_integer( ( int ) c - ( int ) '0',
NIL ) ); NIL );
struct cons_pointer new_result = add_integers( multiply_integers( result, base ),
digit );
dec_ref( result);
dec_ref( digit);
result = new_result;
debug_printf( DEBUG_IO, debug_printf( DEBUG_IO,
L"read_number: added character %c, result now ", L"read_number: added character %c, result now ",
@ -351,6 +354,7 @@ struct cons_pointer read_number( struct stack_frame *frame,
if ( seen_period ) { if ( seen_period ) {
places_of_decimals++; places_of_decimals++;
} }
}
} }
} }
@ -360,13 +364,14 @@ struct cons_pointer read_number( struct stack_frame *frame,
url_ungetwc( c, input ); url_ungetwc( c, input );
if ( seen_period ) { if ( seen_period ) {
debug_print( L"read_number: converting result to real\n", DEBUG_IO ); struct cons_pointer divisor = make_integer( powl( to_long_double( base ),
struct cons_pointer div = make_ratio( result,
make_integer( powl
( to_long_double
( base ),
places_of_decimals ), places_of_decimals ),
NIL ) ); NIL );
debug_print( L"read_number: converting result to real\n", DEBUG_IO );
struct cons_pointer div = make_ratio( result,
divisor);
dec_ref( divisor);
inc_ref( div ); inc_ref( div );
result = make_real( to_long_double( div ) ); result = make_real( to_long_double( div ) );
@ -378,15 +383,19 @@ struct cons_pointer read_number( struct stack_frame *frame,
} }
if ( neg ) { if ( neg ) {
struct cons_pointer negt = negative( result );
debug_print( L"read_number: converting result to negative\n", debug_print( L"read_number: converting result to negative\n",
DEBUG_IO ); DEBUG_IO );
result = negative( result ); dec_ref( result);
result = negt;
} }
debug_print( L"read_number returning\n", DEBUG_IO ); debug_print( L"read_number returning\n", DEBUG_IO );
debug_dump_object( result, DEBUG_IO ); debug_dump_object( result, DEBUG_IO );
dec_ref( base);
return result; return result;
} }

View file

@ -1,5 +1,162 @@
# State of Play # State of Play
## 20260204
### Testing what is leaking memory
#### Analysis
If you just start up and immediately abort the current build of psse, you get:
> Allocation summary: allocated 19986; deallocated 245; not deallocated 19741.
Allocation summaries from the current unit tests give the following ranges of values:
| | Min | Max | |
| --------------- | ----- | ----- | ---- |
| Allocated | 19991 | 39009 | |
| Deallocated | 238 | 1952 | |
| Not deallocated | 19741 | 37057 | |
The numbers go up broadly in sinc with one another — that is to say, broadly, as the number allocated rises, so do both the numbers deallocated and the numbers not deallocated. But not exactly.
#### Strategy: what doesn't get cleaned up?
Write a test wrapper which reads a file of forms, one per line, from standard input, and passes each in turn to a fresh invocation of psse, reporting the form and the allocation summary.
```bash
#1/bin/bash
while IFS= read -r form; do
allocation=`echo ${form} | ../../target/psse 2>&1 | grep Allocation`
echo "* ${allocation}: ${form}"
done
```
So, from this:
* Allocation summary: allocated 19986; deallocated 245; not deallocated 19741.:
* Allocation summary: allocated 19990; deallocated 249; not deallocated 19741.: ()
* Allocation summary: allocated 20019; deallocated 253; not deallocated 19766.: nil
Allocating an empty list allocates four additional cells, all of which are deallocated. Allocating 'nil' allocates a further **29** cells, 25 of which are not deallocated. WTF?
Following further work I have this, showing the difference added to the base case of cells allocated, cells deallocated, and, most critically, cells not deallocated.
From this we see that reading and printing `nil` allocates an additional 33 cells, of which eight are not cleaned up. That's startling, and worrying.
But the next row shows us that reading and printing an empty list costs only four cells, each of which is cleaned up. Further down the table we see that an empty map is also correctly cleaned up. Where we're leaking memory is in reading (or printing, although I doubt this) symbols, either atoms, numbers, or keywords (I haven't yet tried strings, but I expect they're similar.)
| **Case** | **Delta Allocated** | **Delta Deallocated** | **Delta Not Deallocated** |
| --------------------------------- | ------------------- | --------------------- | ------------------------- |
| **Basecase** | 0 | 0 | 0 |
| **nil** | 33 | 8 | 25 |
| **()** | 4 | 4 | 0 |
| **(quote ())** | 39 | 2 | 37 |
| **(list )** | 37 | 12 | 25 |
| **(list 1)** | 47 | 14 | 33 |
| **(list 1 1)** | 57 | 16 | 41 |
| **(list 1 1 1)** | 67 | 18 | 49 |
| **(list 1 2 3)** | 67 | 18 | 49 |
| **(+)** | 36 | 10 | 26 |
| **(+ 1)** | 44 | 12 | 32 |
| **(+ 1 1)** | 53 | 14 | 39 |
| **(+ 1 1 1)** | 62 | 16 | 46 |
| **(+ 1 2 3)** | 62 | 16 | 46 |
| **(list 'a 'a 'a)** | 151 | 33 | 118 |
| **(list 'a 'b 'c)** | 151 | 33 | 118 |
| **(list :a :b :c)** | 121 | 15 | 106 |
| **(list :alpha :bravo :charlie)** | 485 | 15 | 470 |
| **{}** | 6 | 6 | 0 |
| **{:z 0}** | 43 | 10 | 33 |
| **{:zero 0}** | 121 | 10 | 111 |
| **{:z 0 :o 1}** | 80 | 11 | 69 |
| **{:zero 0 :one 1}** | 210 | 14 | 196 |
| **{:z 0 :o 1 :t 2}** | 117 | 12 | 105 |
Looking at the entries, we see that
1. each number read costs ten allocations, of which only two are successfully deallocated;
2. the symbol `list` costs 33 cells, of which 25 are not deallocated, whereas the symbol `+` costs only one cell fewer, and an additional cell is not deallocated. So it doesn't seem that cell allocation scales with the length of the symbol;
3. Keyword allocation does scale with the length of the keyword, apparently, since `(list :a :b :c)` allocates 121 and deallocates 15, while `(list :alpha :bravo :charlie)` allocates 485 and deallocates the same 15;
4. The fact that both those two deallocate 15, and a addition of three numbers `(+ 1 2 3)` or `(+ 1 1 1)` deallocates 16 suggest to me that the list structure is being fully reclaimed but atoms are not being.
5. The atom `'a` costs more to read than the keyword `:a` because the reader macro is expanding `'a` to `(quote a)` behind the scenes.
### The integer allocation bug
Looking at what happens when we read a single digit number, we get the following:
```
2
Entering make_integer
Allocated cell of type 'INTR' at 19, 507
make_integer: returning
INTR (1381256777) at page 19, offset 507 count 0
Integer cell: value 0, count 0
Entering make_integer
Allocated cell of type 'INTR' at 19, 508
make_integer: returning
INTR (1381256777) at page 19, offset 508 count 0
Integer cell: value 10, count 0
Entering make_integer
Allocated cell of type 'INTR' at 19, 509
make_integer: returning
INTR (1381256777) at page 19, offset 509 count 0
Integer cell: value 2, count 0
Entering make_integer
Allocated cell of type 'INTR' at 19, 510
make_integer: returning
INTR (1381256777) at page 19, offset 510 count 0
Integer cell: value 0, count 0
Entering make_integer
Allocated cell of type 'INTR' at 19, 506
make_integer: returning
INTR (1381256777) at page 19, offset 506 count 0
Integer cell: value 0, count 0
Entering make_integer
Allocated cell of type 'INTR' at 19, 505
make_integer: returning
INTR (1381256777) at page 19, offset 505 count 0
Integer cell: value 0, count 0
Entering make_integer
Allocated cell of type 'INTR' at 19, 504
make_integer: returning
INTR (1381256777) at page 19, offset 504 count 0
Integer cell: value 0, count 0
Allocated cell of type 'STRG' at 19, 503
Freeing cell STRG (1196577875) at page 19, offset 503 count 0
String cell: character '2' (50) with hash 0; next at page 0 offset 0, count 0
value: "2"
Freeing cell INTR (1381256777) at page 19, offset 504 count 0
Integer cell: value 2, count 0
2
Allocated cell of type 'SYMB' at 19, 504
Allocated cell of type 'SYMB' at 19, 503
Allocated cell of type 'SYMB' at 19, 502
Allocated cell of type 'SYMB' at 19, 501
Freeing cell SYMB (1112365395) at page 19, offset 501 count 0
Symbol cell: character '*' (42) with hash 485100; next at page 19 offset 502, count 0
value: *in*
Freeing cell SYMB (1112365395) at page 19, offset 502 count 0
Symbol cell: character 'i' (105) with hash 11550; next at page 19 offset 503, count 0
value: in*
Freeing cell SYMB (1112365395) at page 19, offset 503 count 0
Symbol cell: character 'n' (110) with hash 110; next at page 19 offset 504, count 0
value: n*
Freeing cell SYMB (1112365395) at page 19, offset 504 count 0
Symbol cell: character '*' (42) with hash 0; next at page 0 offset 0, count 0
value: *
```
Many things are worrying here.
1. The only thing being freed here is the symbol to which the read stream is bound — and I didn't see where that got allocated, but we shouldn't be allocating and tearing down a symbol for every read! This implies that when I create a string with `c_string_to_lisp_string`, I need to make damn sure that that string is deallocated as soon as I'm done with it — and wherever I'm dealing with symbols which will be referred to repeatedly in `C` code, I need either
1. to bind a global on the C side of the world, which will become messy;
2. or else write a hash function which returns, for a `C` string, the same value that the standard hashing function will return for the lexically equivalent `Lisp` string, so that I can search hashmap structures from C without having to allocate and deallocate a fresh copy of the `Lisp` string;
3. In reading numbers, I'm generating a fresh instance of `Lisp zero` and `Lisp ten`, each time `read_integer` is called, and I'm not deallocating them.
4. I am correctly deallocating the number I did read, though!
## 20260203 ## 20260203
I'm consciously avoiding the bignum issue for now. My current thinking is that if the C code only handles 64 bit integers, and bignums have to be done in Lisp code, that's perfectly fine with me. I'm consciously avoiding the bignum issue for now. My current thinking is that if the C code only handles 64 bit integers, and bignums have to be done in Lisp code, that's perfectly fine with me.
@ -53,6 +210,8 @@ In other words, all failures are in bignum arithmetic **except** that I still ha
### Zig ### Zig
I've also experimented with autotranslating my C into Zig, but this failed. Although I don't think C is the right language for implementing my base Lisp in, it's what I've got; and until I can get some form of autotranslate to bootstrap me into some more modern systems language, I think I need to stick with it.
## 20250704 ## 20250704
Right, I'm getting second and subsequent integer cells with negative values, which should not happen. This is probably the cause of (at least some of) the bignum problems. I need to find out why. This is (probably) fixable. Right, I'm getting second and subsequent integer cells with negative values, which should not happen. This is probably the cause of (at least some of) the bignum problems. I need to find out why. This is (probably) fixable.

View file

@ -0,0 +1,22 @@
#1/bin/bash
echo "Case, Summary, Allocated, Deallocated, Not deallocated, Delta Allocated, Delta Deallocated, Delta Not Deallocated"
basecase=`echo '' | ../../target/psse 2>&1 | grep Allocation | tr -d '[:punct:]'`
bca=`echo ${basecase} | awk '{print $4}'`
bcd=`echo ${basecase} | awk '{print $6}'`
bcn=`echo ${basecase} | awk '{print $9}'`
echo "\"Basecase\", \"${basecase}\", ${bca}, ${bcd}, ${bcn}"
while IFS= read -r form; do
allocation=`echo ${form} | ../../target/psse 2>&1 | grep Allocation | tr -d '[:punct:]'`
tca=`echo ${allocation} | awk '{print $4}'`
tcd=`echo ${allocation} | awk '{print $6}'`
tcn=`echo ${allocation} | awk '{print $9}'`
dca=`echo "${tca} - ${bca}" | bc`
dcd=`echo "${tcd} - ${bcd}" | bc`
dcn=`echo "${tcn} - ${bcn}" | bc`
echo "\"${form}\", \"${allocation}\", ${tca}, ${tcd}, ${tcn}, ${dca}, ${dcd}, ${dcn}"
done

View file

@ -0,0 +1,28 @@
Case, Summary, Allocated, Deallocated, Not deallocated, Delta Allocated, Delta Deallocated, Delta Not Deallocated
"Basecase", "Allocation summary allocated 19986 deallocated 245 not deallocated 19741", 19986, 245, 19741
"", "Allocation summary allocated 19986 deallocated 245 not deallocated 19741", 19986, 245, 19741, 0, 0, 0
"nil", "Allocation summary allocated 20019 deallocated 253 not deallocated 19766", 20019, 253, 19766, 33, 8, 25
"()", "Allocation summary allocated 19990 deallocated 249 not deallocated 19741", 19990, 249, 19741, 4, 4, 0
"(quote ())", "Allocation summary allocated 20025 deallocated 247 not deallocated 19778", 20025, 247, 19778, 39, 2, 37
"(list)", "Allocation summary allocated 20023 deallocated 257 not deallocated 19766", 20023, 257, 19766, 37, 12, 25
"(list )", "Allocation summary allocated 20023 deallocated 257 not deallocated 19766", 20023, 257, 19766, 37, 12, 25
"(list 1)", "Allocation summary allocated 20033 deallocated 259 not deallocated 19774", 20033, 259, 19774, 47, 14, 33
"(list 1 1)", "Allocation summary allocated 20043 deallocated 261 not deallocated 19782", 20043, 261, 19782, 57, 16, 41
"(list 1 1 1)", "Allocation summary allocated 20053 deallocated 263 not deallocated 19790", 20053, 263, 19790, 67, 18, 49
"(list 1 2 3)", "Allocation summary allocated 20053 deallocated 263 not deallocated 19790", 20053, 263, 19790, 67, 18, 49
"(+)", "Allocation summary allocated 20022 deallocated 255 not deallocated 19767", 20022, 255, 19767, 36, 10, 26
"(+ 1)", "Allocation summary allocated 20030 deallocated 257 not deallocated 19773", 20030, 257, 19773, 44, 12, 32
"(+ 1 1)", "Allocation summary allocated 20039 deallocated 259 not deallocated 19780", 20039, 259, 19780, 53, 14, 39
"(+ 1 1 1)", "Allocation summary allocated 20048 deallocated 261 not deallocated 19787", 20048, 261, 19787, 62, 16, 46
"(+ 1 2 3)", "Allocation summary allocated 20048 deallocated 261 not deallocated 19787", 20048, 261, 19787, 62, 16, 46
"(list 'a 'a 'a)", "Allocation summary allocated 20137 deallocated 278 not deallocated 19859", 20137, 278, 19859, 151, 33, 118
"(list 'a 'b 'c)", "Allocation summary allocated 20137 deallocated 278 not deallocated 19859", 20137, 278, 19859, 151, 33, 118
"(list :a :b :c)", "Allocation summary allocated 20107 deallocated 260 not deallocated 19847", 20107, 260, 19847, 121, 15, 106
"(list :alpha :bravo :charlie)", "Allocation summary allocated 20471 deallocated 260 not deallocated 20211", 20471, 260, 20211, 485, 15, 470
"{}", "Allocation summary allocated 19992 deallocated 251 not deallocated 19741", 19992, 251, 19741, 6, 6, 0
"{:z 0}", "Allocation summary allocated 20029 deallocated 255 not deallocated 19774", 20029, 255, 19774, 43, 10, 33
"{:zero 0}", "Allocation summary allocated 20107 deallocated 255 not deallocated 19852", 20107, 255, 19852, 121, 10, 111
"{:z 0 :o 1}", "Allocation summary allocated 20066 deallocated 256 not deallocated 19810", 20066, 256, 19810, 80, 11, 69
"{:zero 0 :one 1}", "Allocation summary allocated 20196 deallocated 259 not deallocated 19937", 20196, 259, 19937, 210, 14, 196
"{:z 0 :o 1 :t 2}", "Allocation summary allocated 20103 deallocated 257 not deallocated 19846", 20103, 257, 19846, 117, 12, 105
"{:zero 0 :one 1 :two 2 :three 3 :four 4 :five five :six 6 :seven 7 :eight 8 :nine 9}", "Allocation summary allocated 21164 deallocated 286 not deallocated 20878", 21164, 286, 20878, 1178, 41, 1137
Can't render this file because it has a wrong number of fields in line 2.

View file

@ -0,0 +1,28 @@
nil
()
(quote ())
(list)
(list )
(list 1)
(list 1 1)
(list 1 1 1)
(list 1 2 3)
(+)
(+ 1)
(+ 1 1)
(+ 1 1 1)
(+ 1 2 3)
(list 'a 'a 'a)
(list 'a 'b 'c)
(list :a :b :c)
(list :aa :bb :cc)
(list :aaa :bbb :ccc)
(list :alpha :bravo :charlie)
{}
{:z 0}
{:zero 0}
{:z 0 :o 1}
{:zero 0 :one 1}
{:z 0 :o 1 :t 2}
{:zero 0 :one 1 :two 2 :three 3 :four 4 :five five :six 6 :seven 7 :eight 8 :nine 9}