diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..1bfece3 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "munit"] + path = munit + url = https://github.com/nemequ/munit.git diff --git a/Doxyfile b/Doxyfile index c608536..bb8427d 100644 --- a/Doxyfile +++ b/Doxyfile @@ -778,7 +778,7 @@ WARN_FORMAT = "$file:$line: $text" # messages should be written. If left blank the output is written to standard # error (stderr). -WARN_LOGFILE = doxy.log +WARN_LOGFILE = tmp/doxy.log #--------------------------------------------------------------------------- # Configuration options related to the input files diff --git a/Makefile b/Makefile index bc2952b..701b16b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ TARGET ?= target/psse -SRC_DIRS ?= ./src +SRC_DIRS ?= ./src/c SRCS := $(shell find $(SRC_DIRS) -name *.cpp -or -name *.c -or -name *.s) HDRS := $(shell find $(SRC_DIRS) -name *.h) @@ -8,8 +8,9 @@ DEPS := $(OBJS:.o=.d) TESTS := $(shell find unit-tests -name *.sh) -INC_DIRS := $(shell find $(SRC_DIRS) -type d) -INC_FLAGS := $(addprefix -I,$(INC_DIRS)) +# INC_DIRS := $(shell find $(SRC_DIRS) -type d) +# INC_FLAGS := $(addprefix -I,$(INC_DIRS)) +INC_FLAGS := -I $(SRC_DIRS) TMP_DIR ?= ./tmp @@ -20,13 +21,14 @@ INDENT_FLAGS := -nbad -bap -nbc -br -brf -brs -c33 -cd33 -ncdb -ce -ci4 -cli4 \ CPPFLAGS ?= $(INC_FLAGS) -MMD -MP -g -DDEBUG LDFLAGS := -lm -lcurl DEBUGFLAGS := -g3 +GCCFLAGS := -std=gnu23 all: $(TARGET) Debug: $(TARGET) $(TARGET): $(OBJS) Makefile - $(CC) $(DEBUGFLAGS) $(LDFLAGS) $(OBJS) -o $@ $(LDFLAGS) $(LOADLIBES) $(LDLIBS) + $(CC) $(GCCFLAGS) $(DEBUGFLAGS) $(LDFLAGS) $(OBJS) -o $@ $(LDFLAGS) $(LOADLIBES) $(LDLIBS) doc: $(SRCS) Makefile Doxyfile doxygen diff --git a/src/arith/integer.c b/archive/c/arith/integer.c similarity index 100% rename from src/arith/integer.c rename to archive/c/arith/integer.c diff --git a/src/arith/integer.h b/archive/c/arith/integer.h similarity index 100% rename from src/arith/integer.h rename to archive/c/arith/integer.h diff --git a/src/arith/peano.c b/archive/c/arith/peano.c similarity index 100% rename from src/arith/peano.c rename to archive/c/arith/peano.c diff --git a/src/arith/peano.h b/archive/c/arith/peano.h similarity index 100% rename from src/arith/peano.h rename to archive/c/arith/peano.h diff --git a/src/arith/ratio.c b/archive/c/arith/ratio.c similarity index 100% rename from src/arith/ratio.c rename to archive/c/arith/ratio.c diff --git a/src/arith/ratio.h b/archive/c/arith/ratio.h similarity index 100% rename from src/arith/ratio.h rename to archive/c/arith/ratio.h diff --git a/src/arith/real.c b/archive/c/arith/real.c similarity index 100% rename from src/arith/real.c rename to archive/c/arith/real.c diff --git a/src/arith/real.h b/archive/c/arith/real.h similarity index 100% rename from src/arith/real.h rename to archive/c/arith/real.h diff --git a/src/authorise.c b/archive/c/authorise.c similarity index 100% rename from src/authorise.c rename to archive/c/authorise.c diff --git a/src/authorise.h b/archive/c/authorise.h similarity index 100% rename from src/authorise.h rename to archive/c/authorise.h diff --git a/src/debug.c b/archive/c/debug.c similarity index 100% rename from src/debug.c rename to archive/c/debug.c diff --git a/src/debug.h b/archive/c/debug.h similarity index 100% rename from src/debug.h rename to archive/c/debug.h diff --git a/src/init.c b/archive/c/init.c similarity index 100% rename from src/init.c rename to archive/c/init.c diff --git a/src/io/fopen.c b/archive/c/io/fopen.c similarity index 100% rename from src/io/fopen.c rename to archive/c/io/fopen.c diff --git a/src/io/fopen.h b/archive/c/io/fopen.h similarity index 100% rename from src/io/fopen.h rename to archive/c/io/fopen.h diff --git a/src/io/history.c b/archive/c/io/history.c similarity index 100% rename from src/io/history.c rename to archive/c/io/history.c diff --git a/src/io/history.h b/archive/c/io/history.h similarity index 100% rename from src/io/history.h rename to archive/c/io/history.h diff --git a/src/io/io.c b/archive/c/io/io.c similarity index 100% rename from src/io/io.c rename to archive/c/io/io.c diff --git a/src/io/io.h b/archive/c/io/io.h similarity index 100% rename from src/io/io.h rename to archive/c/io/io.h diff --git a/src/io/print.c b/archive/c/io/print.c similarity index 100% rename from src/io/print.c rename to archive/c/io/print.c diff --git a/src/io/print.h b/archive/c/io/print.h similarity index 100% rename from src/io/print.h rename to archive/c/io/print.h diff --git a/src/io/read.c b/archive/c/io/read.c similarity index 100% rename from src/io/read.c rename to archive/c/io/read.c diff --git a/src/io/read.h b/archive/c/io/read.h similarity index 100% rename from src/io/read.h rename to archive/c/io/read.h diff --git a/src/memory/conspage.c b/archive/c/memory/conspage.c similarity index 100% rename from src/memory/conspage.c rename to archive/c/memory/conspage.c diff --git a/src/memory/conspage.h b/archive/c/memory/conspage.h similarity index 100% rename from src/memory/conspage.h rename to archive/c/memory/conspage.h diff --git a/src/memory/consspaceobject.c b/archive/c/memory/consspaceobject.c similarity index 100% rename from src/memory/consspaceobject.c rename to archive/c/memory/consspaceobject.c diff --git a/src/memory/consspaceobject.h b/archive/c/memory/consspaceobject.h similarity index 100% rename from src/memory/consspaceobject.h rename to archive/c/memory/consspaceobject.h diff --git a/src/memory/cursor.c b/archive/c/memory/cursor.c similarity index 100% rename from src/memory/cursor.c rename to archive/c/memory/cursor.c diff --git a/src/memory/cursor.h b/archive/c/memory/cursor.h similarity index 100% rename from src/memory/cursor.h rename to archive/c/memory/cursor.h diff --git a/src/memory/dump.c b/archive/c/memory/dump.c similarity index 100% rename from src/memory/dump.c rename to archive/c/memory/dump.c diff --git a/src/memory/dump.h b/archive/c/memory/dump.h similarity index 100% rename from src/memory/dump.h rename to archive/c/memory/dump.h diff --git a/src/memory/hashmap.c b/archive/c/memory/hashmap.c similarity index 100% rename from src/memory/hashmap.c rename to archive/c/memory/hashmap.c diff --git a/src/memory/hashmap.h b/archive/c/memory/hashmap.h similarity index 100% rename from src/memory/hashmap.h rename to archive/c/memory/hashmap.h diff --git a/src/memory/lookup3.c b/archive/c/memory/lookup3.c similarity index 100% rename from src/memory/lookup3.c rename to archive/c/memory/lookup3.c diff --git a/src/memory/lookup3.h b/archive/c/memory/lookup3.h similarity index 100% rename from src/memory/lookup3.h rename to archive/c/memory/lookup3.h diff --git a/src/memory/stack.c b/archive/c/memory/stack.c similarity index 100% rename from src/memory/stack.c rename to archive/c/memory/stack.c diff --git a/src/memory/stack.h b/archive/c/memory/stack.h similarity index 100% rename from src/memory/stack.h rename to archive/c/memory/stack.h diff --git a/src/memory/vectorspace.c b/archive/c/memory/vectorspace.c similarity index 100% rename from src/memory/vectorspace.c rename to archive/c/memory/vectorspace.c diff --git a/src/memory/vectorspace.h b/archive/c/memory/vectorspace.h similarity index 100% rename from src/memory/vectorspace.h rename to archive/c/memory/vectorspace.h diff --git a/src/ops/equal.c b/archive/c/ops/equal.c similarity index 98% rename from src/ops/equal.c rename to archive/c/ops/equal.c index 296aea6..9a7aded 100644 --- a/src/ops/equal.c +++ b/archive/c/ops/equal.c @@ -272,7 +272,7 @@ bool equal_map_map( struct cons_pointer a, struct cons_pointer b ) { for ( struct cons_pointer i = keys_a; !nilp( i ); i = c_cdr( i ) ) { struct cons_pointer key = c_car( i ); - if ( !equal + if ( !c_equal ( hashmap_get( a, key, false ), hashmap_get( b, key, false ) ) ) { result = false; @@ -331,7 +331,7 @@ bool equal_vector_vector( struct cons_pointer a, struct cons_pointer b ) { * Deep, and thus expensive, equality: true if these two objects have * identical structure, else false. */ -bool equal( struct cons_pointer a, struct cons_pointer b ) { +bool c_equal( struct cons_pointer a, struct cons_pointer b ) { debug_print( L"\nequal: ", DEBUG_EQUAL ); debug_print_object( a, DEBUG_EQUAL ); debug_print( L" = ", DEBUG_EQUAL ); @@ -353,8 +353,8 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) { * structures can be of indefinite extent. It *must* be done by * iteration (and even that is problematic) */ result = - equal( cell_a->payload.cons.car, cell_b->payload.cons.car ) - && equal( cell_a->payload.cons.cdr, + c_equal( cell_a->payload.cons.car, cell_b->payload.cons.car ) + && c_equal( cell_a->payload.cons.cdr, cell_b->payload.cons.cdr ); break; case KEYTV: @@ -401,7 +401,7 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) { * isn't sufficient. So we recurse at least once. */ result = ( wcsncmp( a_buff, b_buff, i ) == 0 ) - && equal( c_cdr( a ), c_cdr( b ) ); + && c_equal( c_cdr( a ), c_cdr( b ) ); } break; case VECTORPOINTTV: diff --git a/src/ops/equal.h b/archive/c/ops/equal.h similarity index 91% rename from src/ops/equal.h rename to archive/c/ops/equal.h index 061eb94..a3ae93a 100644 --- a/src/ops/equal.h +++ b/archive/c/ops/equal.h @@ -31,6 +31,6 @@ bool eq( struct cons_pointer a, struct cons_pointer b ); * Deep, and thus expensive, equality: true if these two objects have * identical structure, else false. */ -bool equal( struct cons_pointer a, struct cons_pointer b ); +bool c_equal( struct cons_pointer a, struct cons_pointer b ); #endif diff --git a/src/ops/intern.c b/archive/c/ops/intern.c similarity index 99% rename from src/ops/intern.c rename to archive/c/ops/intern.c index 989686b..f16733d 100644 --- a/src/ops/intern.c +++ b/archive/c/ops/intern.c @@ -334,7 +334,7 @@ struct cons_pointer search_store( struct cons_pointer key, switch ( get_tag_value( entry_ptr ) ) { case CONSTV: - if ( equal( key, c_car( entry_ptr ) ) ) { + if ( c_equal( key, c_car( entry_ptr ) ) ) { result = return_key ? c_car( entry_ptr ) : c_cdr( entry_ptr ); @@ -441,7 +441,7 @@ struct cons_pointer internedp( struct cons_pointer key, for ( struct cons_pointer pair = c_car( store ); eq( result, NIL ) && !nilp( pair ); pair = c_car( store ) ) { if ( consp( pair ) ) { - if ( equal( c_car( pair ), key ) ) { + if ( c_equal( c_car( pair ), key ) ) { // yes, this should be `eq`, but if symbols are correctly // interned this will work efficiently, and if not it will // still work. diff --git a/src/ops/intern.h b/archive/c/ops/intern.h similarity index 100% rename from src/ops/intern.h rename to archive/c/ops/intern.h diff --git a/src/ops/lispops.c b/archive/c/ops/lispops.c similarity index 99% rename from src/ops/lispops.c rename to archive/c/ops/lispops.c index a9dd7ea..2a8cc47 100644 --- a/src/ops/lispops.c +++ b/archive/c/ops/lispops.c @@ -987,7 +987,7 @@ lisp_equal( struct stack_frame *frame, struct cons_pointer frame_pointer, if ( frame->args > 1 ) { for ( int b = 1; ( truep( result ) ) && ( b < frame->args ); b++ ) { result = - equal( frame->arg[0], fetch_arg( frame, b ) ) ? TRUE : NIL; + c_equal( frame->arg[0], fetch_arg( frame, b ) ) ? TRUE : NIL; } } diff --git a/src/ops/lispops.h b/archive/c/ops/lispops.h similarity index 100% rename from src/ops/lispops.h rename to archive/c/ops/lispops.h diff --git a/src/ops/loop.c b/archive/c/ops/loop.c similarity index 100% rename from src/ops/loop.c rename to archive/c/ops/loop.c diff --git a/src/ops/loop.h b/archive/c/ops/loop.h similarity index 100% rename from src/ops/loop.h rename to archive/c/ops/loop.h diff --git a/src/ops/meta.c b/archive/c/ops/meta.c similarity index 100% rename from src/ops/meta.c rename to archive/c/ops/meta.c diff --git a/src/ops/meta.h b/archive/c/ops/meta.h similarity index 100% rename from src/ops/meta.h rename to archive/c/ops/meta.h diff --git a/src/repl.c b/archive/c/repl.c similarity index 100% rename from src/repl.c rename to archive/c/repl.c diff --git a/src/repl.h b/archive/c/repl.h similarity index 100% rename from src/repl.h rename to archive/c/repl.h diff --git a/src/time/psse_time.c b/archive/c/time/psse_time.c similarity index 100% rename from src/time/psse_time.c rename to archive/c/time/psse_time.c diff --git a/src/time/psse_time.h b/archive/c/time/psse_time.h similarity index 100% rename from src/time/psse_time.h rename to archive/c/time/psse_time.h diff --git a/src/utils.c b/archive/c/utils.c similarity index 100% rename from src/utils.c rename to archive/c/utils.c diff --git a/src/utils.h b/archive/c/utils.h similarity index 100% rename from src/utils.h rename to archive/c/utils.h diff --git a/src/version.h b/archive/c/version.h similarity index 87% rename from src/version.h rename to archive/c/version.h index 462f9be..6548d30 100644 --- a/src/version.h +++ b/archive/c/version.h @@ -8,4 +8,4 @@ * Licensed under GPL version 2.0, or, at your option, any later version. */ -#define VERSION "0.0.6-SNAPSHOT" +#define VERSION "0.0.7-SNAPSHOT" diff --git a/docs/0-1-0-design-decistions.md b/docs/0-1-0-design-decistions.md new file mode 100644 index 0000000..3ec5401 --- /dev/null +++ b/docs/0-1-0-design-decistions.md @@ -0,0 +1,89 @@ +# Design decisions for 0.1.0 + +This is a document that is likely to be revisited, probably frequently. + +## Retire the 0.0.X codebase + +Move the existing codebase out of the compile space altogether; it is to be +treated as a finished rapid prototype, not extended further, and code largely +not copied but learned from. + +## Remain open to new substrate languages, but continue in C for now + +I'm disappointed with [Zig](https://ziglang.org/). While the language +concepts are beautiful, and if it were stable it would be an excellent tool, it +isn't stable. I'm still open to build some of the 0.1.X prototype in Zig, but +it isn't the main tool. + +I haven't yet evaluated [Nim](https://nim-lang.org/). I'm prejudiced against +its syntax, but, again, I'm open to using it for some of this prototype. + +But for now, I will continue to work in C. + +## Substrate is shallow + +In the 0.0.X prototype, I tried to do too much in the substrate. I tried to +write bignums in C, and in this I failed; I would have done much better to +get a very small Lisp working well sooner, and build new features in that. + +In 0.1.X the substrate will be much less feature rich, but support the creation +of novel types of data object in Lisp. + +## Paged Space Objects + +Paged space objects will be implemented largely in line with [this document](Paged-space-objects.md). + +## Tags + +Tags will continue to be 32 bit objects, which can be considered as unsigned +integer values or as four bytes. However, only the first three bytes will be +mnemonic. The fourth byte will indicate the size class of the object; where +the size class represents the allocation size, *not* the payload size. The +encoding is as in this table: + +| Tag | | | Size of payload | | +| ---- | ----------- | --- | --------------- | --------------- | +| Bits | Field value | Hex | Number of words | Number of bytes | +| ---- | ----------- | --- | --------------- | --------------- | +| 0000 | 0 | 0 | 1 | 8 | +| 0001 | 1 | 1 | 2 | 16 | +| 0010 | 2 | 2 | 4 | 32 | +| 0011 | 3 | 3 | 8 | 64 | +| 0100 | 4 | 4 | 16 | 128 | +| 0101 | 5 | 5 | 32 | 256 | +| 0110 | 6 | 6 | 64 | 512 | +| 0111 | 7 | 7 | 128 | 1024 | +| 1000 | 8 | 8 | 256 | 2048 | +| 1001 | 9 | 9 | 512 | 4096 | +| 1010 | 10 | A | 1024 | 8192 | +| 1011 | 11 | B | 2048 | 16384 | +| 1100 | 12 | C | 4096 | 32768 | +| 1101 | 13 | D | 8192 | 65536 | +| 1110 | 14 | E | 16384 | 131072 | +| 1111 | 15 | F | 32768 | 262144 | + +Consequently, an object of size class F will have an allocation size of 32,768 words, but a payload size of 32,766 words. This obviously means that size classes 0 and 1 will not exist, since they would not have any payload. + +## Page size + +Every page will be 1,048,576 bytes. + +## Namespaces + +Namespaces will be implemented; in addition to the root namespace, there will be at least the following namespaces: + +### :bootstrap + +Functions written in the substrate language, intended to be replaced for all normal purposes by functions written in Lisp which may call these bootstrap functions. Not ever available to user code. + +### :substrate + +Functions written in the substrate language which *may* be available to user-written code. + +### :system + +Functions, written either in Lisp or in the substrate language, which modify system memory in ways that only trusted and privileged users are permitted to do. + +## Access control + +Obviously, for this to work, access control lists must be implemented and must work. \ No newline at end of file diff --git a/docs/Compiler.md b/docs/Compiler.md new file mode 100644 index 0000000..e6c8c3e --- /dev/null +++ b/docs/Compiler.md @@ -0,0 +1,108 @@ +# Towards a Compiler + +Abdulaziz Ghuloum's paper [An Incremental Approach to Compiler Construction](https://bernsteinbear.com/assets/img/11-ghuloum.pdf) starts with the observation: + +> Compilers are perceived to be magical artifacts, carefully crafted +> by the wizards, and unfathomable by the mere mortals. Books on +> compilers are better described as wizard-talk: written by and for +> a clique of all-knowing practitioners. Real-life compilers are too +> complex to serve as an educational tool. And the gap between +> real-life compilers and the educational toy compilers is too wide. +> The novice compiler writer stands puzzled facing an impenetrable +> barrier, “better write an interpreter instead.” + +Well, yes. That *is* what I feel. But the thing is, I've written two Lisp interpreters (and interpreters for a few other languages into one dialect of Lisp or another) now. I still feel [imposter syndrome](https://en.wikipedia.org/wiki/Impostor_syndrome) — that my interpreters are not as good as they should be, that I haven't understood the ideas clearly enough or implemented them cleanly enough, but [Beowulf](https://git.journeyman.cc/simon/beowulf) works (and evaluates Lisp) very well; the [`0.0.6` Post Scarcity](https://git.journeyman.cc/simon/post-scarcity) prototype works, after a fashion; and, after only a week of work, the `0.1.0` Post Scarcity prototype is close to working now. + +Further back in my history, the [MicroWorld rule language](https://git.journeyman.cc/simon/mw-parser) is still easily buildable and works well; and, long before that, my LemonADE adventure game writing language did work well; and KnacqTools suite of rule 'compilers,' which although not strictly speaking either interpreters or compilers in this sense were very similar technology, also worked extremely well. Interpreters — even reasonably good interpreters — are a done problem, but I have really no idea where to start building a compiler. + +So why bother? + +Beowulf is *mostly* written in Lisp — which is to say, it is mostly written in itself. If you check the [list of functions](https://git.journeyman.cc/simon/beowulf#functions-and-symbols-implemented), you'll see that the overwhelming majority of them are described as 'Lisp lambda functions'. This means, they're Beowulf functions written in Beowulf — and you can read the source code of them [here](https://git.journeyman.cc/simon/beowulf/src/branch/master/resources/lisp1.5.lsp). + +But Post Scarcity `0.0.6` is written almost entirely in C. It never got to the point, as Beowulf did, where you could start a Lisp session, hack up a few functions, and save out your system to persistent storage to start again later with the work you'd written already incorporated. And this is mainly because I tried to do too many of the hard parts, like the sophisticated reader and bignum arithmetic, in C. + +I'm not a confident C programmer. Post Scarcity `0.0.6`'s bignum arithmetic doesn't work, and I've failed to make it work. Post Scarcity `0.0.6`'s garbage collector works unacceptably poorly. My goal, in `0.1.0`, is to write far less in the substrate and far more in Lisp. + +Which means, the Lisp must be as performant as possible. Which means, I think, that I need a compiler. Which means I need to learn to be (more of a) wizard. + +So, where do I start? Where is my grimoire? + +## Online tutorials on Lisp compilers + +### Ghuloum + +I've mentioned Abdulaziz Ghuloum's [An Incremental Approach to Compiler Construction](https://bernsteinbear.com/assets/img/11-ghuloum.pdf) at the top. It's PDF, of course. Why do people publish things as PDF? It makes them *so hard* to read! + +However, I very much like his approach: small incremental steps. He writes mainly in Scheme, which is similar enough to Post Scarcity Lisp that it should be reasonably simple to carry over ideas; he targets what he describes as 'Intel-x86' assembler, but I don't yet know whether that means 16, 32 or 64 bit — since the paper dates from 2006 I'm guessing 32 bit. However, his method is to write a C fragment that implements a small step of his process, and then examine assembler output from GCC; that's an approach I could follow. + +He uses test driven development, which should make things easy to reproduce. + +He implements tail-call optimisation. + +The paper is quite brief, and does not include source code; I have not found source code relating to it. + +The paper contains a link to the author's home page at Indiana.edu, but that link is now dead. Archive.org has snapshots dated from [18th September 2006](https://web.archive.org/web/20060918162504/https://www.cs.indiana.edu/~aghuloum/) (the paper is dated from the 16th) to [March 10th 2011](https://web.archive.org/web/20110310092701/http://www.cs.indiana.edu/~aghuloum/). Although the lecture notes appear in both the listed snapshots, the paper itself is not in the first of them. + +Ghuloum appears to have recently been teaching at the American University of Kuwait; he has a [GitHub presence](https://github.com/azizghuloum), but his Scheme compiler is not listed there. He published [a number of technical papers on Scheme](https://scholar.google.com/citations?user=5rd6dWUAAAAJ&hl=en) between 2006 and 2009, but does not appear to have published anything since. + +### Healey + +This blog post by [Andrew Healey](https://github.com/healeycodes), [Compiling Lisp to Bytecode and Running It](https://healeycodes.com/compiling-lisp-to-bytecode-and-running-it) is essentially 'write your own virtual machine,' which, given that I've been thinking about the ideal instruction set for the Post Scarcity processor, isn't a bad idea. [This repository](https://github.com/healeycodes/lisp-to-js) appears to be his implementation. + +His code has virtually no internal documentation, and is in a language I don't even recognise (it might be Rust — it builds and tests with `cargo`); however, it's clearly written in nice small functions, and there is really surprisingly little of it. It does build, and all its tests pass. + +Healey is still active on GitHub, and currently works for Vercel, an 'AI Cloud' company, apparently as a software engineer. + +### Bernstein + +There's a [blog series](https://bernsteinbear.com/blog/lisp/) by [Max Bernstein](https://github.com/tekknolagi) which is nicely clear. He references Ghuloum's work (and indeed the link I found to Ghuloum's paper is on his site), but builds his compiler in C. His repository for the compiler posts appears to be [this one](https://github.com/tekknolagi/ghuloum). + +His code is mainly in C, with a test harness in Python. Again, his code is internally largely undocumented, but builds cleanly, and all his unit tests pass. The way he implements his unit tests is new to me, and worth studying; it's certainly better than the scrappy mess of shell scripts I used for the `0.0.X` series. + +### Others + +That's the list of things I've found so far that look useful to me. If I find others, I'll add them here. + +## Things which inevitably make the Post Scarcity compiler different + +### Tag location + +Objects in Lisp have to know that they are. This is what makes it possible to compute with an 'untyped' language: the type is not encoded in the program but in the data. In most conventional Lisp systems, things are typed by having a tag. Back in the day, when we had hardware specially built to run Lisp, Lisp specific hardware often had a word size — and thus registers, and a data bus — wider than the address bus, wider by the number of bits in the tag, and stored the tag on the pointer. + +Modern Lisps still, I think, mostly store the tag on the pointer, but they run on commodity hardware which doesn't have those extra bits in the word size. That means that the size of an integer, or the precision of a real, that you can store in one word of memory is much less. It also means either that they can address much less memory than other programming languages on the same hardware, because for every bit you steal out of the address bus you halve the amount of memory you can address; or else that they bit shift up every address before they fetch it. + +The bit shift works if all memory objects are powers of two words wide, which, in Post Scarcity `0.1.0` they are, see [Paged Space Objects](Paged-space-objects.md); but as I am already doing the upshifting trick so that I can address more than 64 (actually 104, on the current sketch of how memory works) 'bits wide' of memory, this doesn't help me. + +Consequently, in both the `0.0.X` series of prototypes and now in the `0.1.0` prototype, I have the tag in the object, not in the pointer. + +#### Is that a good decision? + +There's a really big inefficiency in that decision. In early versions of Java, numbers (and a few other things) were not objects, but 'primitives'. That is to say, the word of memory which, for objects, would be a pointer, is, for primitives, the actual data; and thus you can operate on it without doing an additional fetch. In modern Java, those primitives still exist, as [unboxed types](https://en.wikipedia.org/wiki/Boxing_(computer_programming)). Java can do this because it is a typed language. Every method knows the type of its arguments. + +In Lisp we don't. So we either have the tag on the pointer, reducing, as I pointed out above, the number of addresses that can be addressed and the amount of data that can be stored in each object, or we have the tag on the object, meaning that (the header of) every object has to be fetched before we even know what it is, and thus how to despatch it further. And, in the Post Scarcity architecture as I conceive it now, in the case of an object which is curated on a node somewhere far distant across the hypercube and not yet in local cache, that means it has to be fetched hoppity hop across the mesh, which is extremely costly. + +But, not only does Post Scarcity need a bigger tag than most Lisps in order to have user extensible types, it also needs to have an access control list on every object in order to have security between users; and, although I failed to make the reference counting garbage collector work in `0.0.X`, and although the thinking I've been doing about the 'mark but don't sweep' garbage collector may make it unnecessary, I still want to experiment with reference counting. So I need space in every header for a reference count. + +So I can't really have unboxed objects, I think[^1] — at least, allowing unboxed integers, reals, and characters would need a very thorough rethink of the security model. + +[^1]: except that, in compiled functions, local variables could potentially be the equivalent of unboxed. That's one of the main speed increases I hope to get from compiling. + +All decisions in engineering are compromises. At present, I am content to proceed with this compromise. + +### Reifying compiled functions + +I don't honestly know where most modern Lisps allocate space for compiled functions, but I suspect that it's on the heap. In the `0.1.0` prototype I'm really trying to limit the use of 'raw' heap allocation, to prevent heap fragmentation, to reduce garbage collection problems. So I want to put each compiled function into a paged space object. Which means they have to be relocatable in memory. + +And certainly, when a compiled function is copied from the node on which it is curated to another node where it will be cached, it will be at a different place in the memory of that node. + +*(Question: should we copy only source functions across the mesh, and compile them 'just in time' on the node where they will be used? Doing that would allow each compiled function to incorporate raw pointers to every other function it called, which would greatly speed execution. However, if any of those functions were subsequently redefined, it would not update to use the new definition without recompilation.)* + +I don't *think* relocatability is a problem. Lisps which use heap-allocated compiled functions and run mark and sweep garbage collectors on their heap, as I'm almost certain Portable Standard Lisp does and imagine most other conventional Lisps must, must have relocatable functions. + +However, it may be. I certainly need to think about relocatability in this design. + +## Conclusion + +Post Scarcity's compiler won't be — can't be — a straight lift of anyone else's Lisp compiler. Post Scarcity is just inevitably a very different beast. The whole idea of a multiple instruction, multiple data, massively parallel processor is one that has not been very much explored because it is hard; and I don't have the technical or mathematical understanding to demonstrate whether, even if a Post Scarcity machine really could use four billion processor nodes petabytes of memory, it could do so efficiently. + +But the compiler is doable; none of the peculiarities of the architecture is a blocker. And even if this won't be a conventional compiler, there is a great deal that can be learned from conventional compilers. \ No newline at end of file diff --git a/docs/Dont-know-dont-care.md b/docs/Dont-know-dont-care.md new file mode 100644 index 0000000..8c28fae --- /dev/null +++ b/docs/Dont-know-dont-care.md @@ -0,0 +1,71 @@ +# Don't know, don't care + +![The famous XKCD cartoon showing all modern digital infrastructure depending on a single person's spare-time project](https://imgs.xkcd.com/comics/dependency.png) + +One of the key design principles of the Post Scarcity computing project since my 2006 essay, [Post Scarcity Software](Post-scarcity-software.md), has been "don't know, don't care." + +The reason for this is simple. Modern computing systems are extremely complex. It is impossible for someone to be expert on every component of the system. To produce excellent work, it is necessary to specialise, to avoid being distracted by the necessary intricacies of the things on which your work depends, or of the (not yet conceived) intricacies of the work of other people which will ultimately depend on yours. It is necessary to trust. + +Randal Munroe's graphic which I've used to illustrate this essay looks like a joke, but it isn't. + +[Daniel Stenberg](https://en.wikipedia.org/wiki/Daniel_Stenberg) lives not in Nebraska, but in Sweden. He wrote what became [libcurl](https://curl.se/) in 1996, not 2003. He is still its primary maintainer. It pretty much is true to say that all modern digital infrastructure depends on it. It is a basic component which fetches data over a broad range of internet protocols, negotiating the appropriate security. There *are* alternatives to libcurl in (some) other software environments, but it is extremely widely used. Because it deals with security, it is critical; any vulnerability in it needs to be fixed quickly, because it has very major impact. + +The current [post-scarcity software environment](https://git.journeyman.cc/simon/post-scarcity) depends on libcurl, because of course it does. You certainly use libcurl yourself, even if you don't know it. You probably used it to fetch this document, in order to read it. + +I don't need to know the intricacies of URL schemae, or of Internet protocols, or of security, to the level of detail Daniel does. I've never even reviewed his code. I trust him to know what he's doing. + +Daniel's not alone, of course. Linus Torvalds wrote Linux in a university dorm room in Finland; now it powers the vast majority of servers on the Internet, and the vast majority of mobile phones in the world, and, quite incidentally, a cheap Chinese camera drone I bought to film bike rides. Linux is now an enormous project with thousands of contributors, but Linus is still the person who holds it together. [Rasmus Lerdorf](https://en.wikipedia.org/wiki/Rasmus_Lerdorf), from Greenland, wrote PHP to run his personal home page (the clue is in the name); Mark Zuckerberg used PHP to write Facebook; Michel Valdrighi used PHP to write something called b/cafelog, which Matt Mullenweg further developed into WordPress. + +There are thousands of others, of course; and, at the layer of hardware, on which all software depends, there are thousands of others whose names I do not even know. I'm vaguely aware of the architects of the ARM chip, but I had to look them up just now because I couldn't remember their names. I know that the ARM is at least a spiritual descendant of the 6502, but I don't know who designed that or anything of their story; and the antecedents behind that I don't know at all. The people behind all the many other chips which make up a working computer? I know nothing about them. + +(In any case, if one seriously wanted to build this thing, it would be better to have custom hardware — one would probably have to have custom hardware at least for the router — and if one were to have custom hardware it would be nice if it ran something very close to Lisp right down on the silicon, as the [Symbolics Ivory](https://gwern.net/doc/cs/hardware/1987-baker.pdf) chips did; so you probably wouldn't use ARM cores at all.) + +I have met and personally spoken with most of the people behind the Internet protocol stack, but I don't need to have done so in order to use it; and, indeed, the reason that [Jon Postel](https://en.wikipedia.org/wiki/Jon_Postel) bought me a beer was so that he could sit me down and very gently explain how badly I'd misunderstood something. + +----- + +But this is the point. We don't need to know, or have known, these people to build on their work. We don't have to, and cannot in detail, fully understand their work. There is simply too much of it, its complexity would overwhelm us. + +We don't know. We don't care. And that is a protective mechanism, a mechanism which is necessary in order to allow us to focus on our own task, if we are to produce excellent work. If we are to create a meaningful contribution on which the creators of the future can build. + +----- + +But there is a paradox, here, one of many conceptual paradoxes that I have encountered working on the Post Scarcity project. + +I am essentially a philosopher, or possibly a dilettante, rather than an engineer. When [Danny Hillis](https://longnow.org/people/board/danny0/) came up with the conception of the [Connection Machine](), a machine which is consciously one of the precursors of the post-scarcity project, he sought expert collaborators — and was so successful in doing so that [he persuaded Richard Feynman to join the project](https://longnow.org/ideas/richard-feynman-and-the-connection-machine/). I haven't recruited any collaborators. I don't have the social skills. And I don't have sufficient confidence that my idea is even good in itself. + +In building the first software prototype, I realised that I don't even properly understand what it means to [intern](http://www.ai.mit.edu/projects/iiip/doc/CommonLISP/HyperSpec/Body/fun_intern.html) something. I realised that I still don't understand how in many Common Lisp implementations, for any integer number `n`, `(eq n n)` can return true. I note that in practice it *does*, but I don't understand how it's done. + +In the current post scarcity prototype, it *is* true for very small values of `n`, because I cache an array of small positive integers as an optimisation hack to prevent memory churn, but that's very special case and I cannot believe that Common Lisp implementations are doing it for significantly larger numbers of integers. I note that in SBCL, two bignums of equal value are not `eq`, so presumably SBCL is doing some sort of hack similar to mine, but I do not know how it works and I *shouldn't* care. + +Platonically, two instances of the same number *should be* the same object; but we do not live in a Platonic world and I don't want to. I'm perfectly happy that `eq` (which should perhaps be renamed `identical?`) should not work for numbers. + +What the behaviour is of the functions that we use, at whatever layer in the stack we work, does matter. We do need to know that. But what happens under the surface in order to deliver that behaviour? We don't need to know. We don't need to care. And we shouldn't, because that way leads to runaway recursion: behind every component, there is another component, which makes other compromises with physical matter which make good engineering sense to the people who understand that component well enough to design and to maintain it. + +The stack is not of infinite depth, of course. At its base is silicon, and traces of metals on silicon, and the behaviour of electrons as they interact with individual atoms in those traces. That is knowable, in principle, by someone. But there are sufficiently many layers in the stack, and sufficient complexity in each layer, that to have a good, clear, understanding of every layer is beyond the mental capacity of anyone I know, and, I believe, is generally beyond the mental capacity of any single person. + +----- + +But this is the point. The point is I do need to know, and do need to care, if I am to complete this project on my own; and I don't have sufficient faith in the utility of the project (or my ability to communicate that utility) that I believe that anyone else will ever care enough to contribute to it. + +And I don't have the skills, or the energy, or, indeed, the remaining time, to build any of it excellently. If it is to be built, I need collaborators; but I don't have the social skills to attract collaborators, or probably to work with them; and, actually, if I did have expert collaborators there would probably be no place for me in the project, because I don't have excellence at anything. + +----- + +I realise that I don't even really understand what a hypercube is. I describe my architecture as a hypercube. It is a cube because it has three axes, even though each of those axes is conceptually circular. Because the axes are circular, the thing can only be approximated in three dimensional space by using links of flexible wire or glass fibres to join things which, in three dimensional topology, cannot otherwise be joined; it is therefore slightly more than three dimensional while being considerably less than four dimensional. + +I *think* this is also Hillis' understanding of a hypercube, but I could be wrong on that. + +Of course, my architecture could be generalised to have four, or five, or six, or more circular axes + +[^1]: Could it? I'm reasonably confident that it could have *six* circular axes, but I cannot picture in my head how the grid intersections of a four-and-a-bit dimensional grid would work. + +, and this would result in each node having more immediate neighbours, which would potentially speed up computation by shortening hop paths. But I cannot help feeling that with each additional axis there comes a very substantial increase in the complexity of physically routing the wires, so three-and-a-bit dimensions may be as good as you practically get. + +I don't have the mathematical skill to mentally model how a computation would scale through this structure. It's more an 'if I build it I will find out whether this is computationally efficient' than an 'I have a principled idea of why this should be computationally efficient.' Intuitively, it *should be* more efficient than a [von Neumann architecture](https://en.wikipedia.org/wiki/Von_Neumann_architecture), and it's easy to give an account of how it can address (much) more memory than obvious developments of our current architectures. But I don't have a good feel of the actual time cost of copying data hoppity-hop across the structure, or the heuristics of when it will be beneficial to shard a computation between neighbours. + +----- + +Which brings me back to why I'm doing this. I'm doing it, principally, to quiet the noises in my brain; as an exercise in preventing my propensity for psychiatric melt-down from overwhelming me. It isn't, essentially, well-directed engineering. It is, essentially, self-prescribed therapy. There is no reason why anyone else should be interested. + +Which is, actually, rather solipsistic. Not a thought I like! \ No newline at end of file diff --git a/docs/Home.md b/docs/Home.md index b4dfc0e..8937653 100644 --- a/docs/Home.md +++ b/docs/Home.md @@ -1,6 +1,6 @@ # Post Scarcity Software Environment: general documentation -Work towards the implementation of a software system like that described in [Post Scarcity Software](https://www.journeyman.cc/blog/posts-output/2006-02-20-postscarcity-software/). +Work towards the implementation of a software system for the hardware of the deep future. ## Note on canonicity @@ -12,7 +12,7 @@ You can read about the current [state of play](State-of-play.md). ## Roadmap -There is now a [roadmap](Roadmap.md) for the project. +There is now a [roadmap](https://www.journeyman.cc/post-scarcity/html/md_workspace_2post-scarcity_2docs_2_roadmap.html) for the project. ## AWFUL WARNING 1 diff --git a/docs/Nodes-threads-locks-links.md b/docs/Nodes-threads-locks-links.md new file mode 100644 index 0000000..8108168 --- /dev/null +++ b/docs/Nodes-threads-locks-links.md @@ -0,0 +1,141 @@ +# Nodes, threads, locks and links + +## The problem + +Up to now, I've been building a single threaded Lisp. I haven't had to worry about who is mutating memory while I'm trying to read it. The idea that this is a mostly immutable Lisp has encouraged me to be blasé about this. But actually, it isn't entirely immutable, and that matters. + +Whenever *any* new datum is created, the freelist pointers have to mutate; whenever any new value is written to any namespace, the namespace has to mutate. The freelist pointers also mutate when objects are allocated and when objects are freed. + +Earlier in the design, I had the idea that in the hypercube system, each node would have a two core processor, one core doing execution — actually evaluating Lisp functions — the other handling inter-node communication. I had at one stage the idea that the memory on the node would be partitioned into fixed areas: + +| Partition | Contents | Core written by | +| --------- | -------- | --------------- | +| Local cons space | Small objects curated locally | Execution | +| Local vector space | Large objects curated locally | Excecution | +| Cache cons space | Copies of small objects curated elsewhere | Communications | +| Cache vector space | Copies of large objects curated elsewhere | Communications | + +So, the execution thread is chuntering merrily along, and it encounters a data item it needs to get from another node. This is intended to happen all the time: every time a function of more than one argument is evaluated, the node will seek to farm out some of the arguments to idle neighbours for evaluation. So the results will often be curated by them. My original vague idea was that the execution node would choose the argument which seemed most costly to evaluate to evaluate locally, pass off the others to neighbours, evaluate the hard one, and by the time that was done probably all the farmed out results would already be back. + +The move from cons space objects to the more flexible [paged space objects](Paged-space-objects.md) doesn't really change this, in principle. There will still be a need for some objects which do not fit into pages, and will thus have to lurk in the outer darkness of vector space. Paged space should make the allocation of objects more efficient, but it doesn't change the fundamental issue + +But there's an inevitable overhead to copying objects over inter-node links. Even if we have 64 bit (plus housekeeping) wide links, copying a four word object still takes four clock ticks. Of course, in the best case, we could be receiving six four word objects over the six links in those four clock ticks, but + +1. The best case only applies to the node initiating a computation; +2. This ignores contention on the communication mesh consequent on hoppity-hop communications between more distant nodes. + +So, even if the execution core correctly chose the most expensive argument to evaluate locally, it's quite likely that when it returns to the stack frame, some results from other nodes have still not arrived. What does it do then? Twiddle its thumbs? + +It could start another thread, declare itself idle, accept a work request from a neighbour, execute that, and return to the frame to see whether its original task was ready to continue. One of the benefits of having the stack in managed space is that a single stack frame can have arbitrarily many 'next' frames, in arbitrarily many threads. This is exactly how [Interlisp](https://dl.acm.org/doi/10.1145/362375.362379) manages multitasking, after all. + +If we do it like that I think we're still safe, because it can't have left any data item in a half-modified state when it switched contexts. + +But nevertheless, we still have the issue of contention between the execution process and the communications process. They both need to be able to mutate freelist pointers; and they both need to be able to mutate explicitly mutable objects, which for the present is just namespaces but this will change. + +We can work around the freelist problem by assigning separate freelists for each size of paged-space objects to each processor, that's just sixteen more words. But if a foreign node wants to change a value in a local namespace, then the communications process needs to be able to make that change. + +Which means we have to be able to lock objects. Which is something I didn't want to have to do. + +## Mutexes + +It's part of the underlying philosophy of the post scarcity project that one person can't be expert in every part of the stack. I don't fully understand the subtleties of thread safe locking. In my initial draft of this essay, I was planning to reserve one bit in the tag of an object as a thread lock. + +There is a well respected standard thread locking library, [`pthreads`](https://www.cs.cmu.edu/afs/cs/academic/class/15492-f07/www/pthreads.html), part of the [POSIX](https://en.wikipedia.org/wiki/POSIX) standard, which implements thread locks. The lock object it implements is called a `mutex` ('mutual exclusion'), and the size of a `mutex` is... complicated. It is declared as a union: + +```c +typedef union +{ + struct __pthread_mutex_s __data; + char __size[__SIZEOF_PTHREAD_MUTEX_T]; + long int __align; +} pthread_mutex_t; + +``` + +I guessed that the `long int __align` member was intended as a contract that this would be *no bigger* than a `long int`, but `long int` may mean 32 or 64 bits depending on context. The payload is clearly `__pthread_mutex_s`; so how big is that? Answer: it varies, dependent on the hardware architecture. But `__SIZEOF_PTHREAD_MUTEX_T` also varies dependent on architecture, and is defined as 40 *bytes* on 64 bit Intel machines: + +```c +#ifdef __x86_64__ +# if __WORDSIZE == 64 +# define __SIZEOF_PTHREAD_MUTEX_T 40 +... +``` + +The header file I have access to declares that for 32 bit Intel machines it's 32 bytes and for all non-Intel machines the size is only 24 bytes, but + +1. the machines I'm working on are actually AMD, but x86 64 bit Intel architecture; and +2. I don't currently have a 64 bit ARM version of this library, and ARM is quite likely to be the architecture I would use for a hardware implementation; + +So let's be cautious. + +Let's also be realistic: what I'm building now is the 0.1.0 prototype, which is not planned to run on even a simulated hypercube, so it doesn't need to have locks at all. I am crossing a bridge I do not yet strictly need to cross. + +## Where to put the lock? + +Currently, we have namespaces implemented as hashtables (or hashmaps, if you prefer, but I appreciate that it's old fashioned). We have hashtables implemented as an array of buckets. We have buckets implemented, currently, as association lists (lists of dotted pairs), although they could later be implemented as further hashtables. We can always cons a new `(key . value)` pair onto the front of an association list; the fact that there may be a different binding of the same key further down the association list doesn't matter, except in so far as it slows further searches down that association list. + +Changing the pointer to the bucket happens in one clock tick: we're writing one 64 bit word to memory over a 64 bit wide address bus. The replacement bucket can — must! — be prepared in advance. So changing the bucket is pretty much an atomic operation. + +But the size of a mutex is uncertain, and **must** fit within the footprint of the namespace object. + +Forty bytes is (on a 64 bit machine) five words; but, more relevantly, our `pso_pointer` object is 64 bits irrespective of hardware architecture, so forty bytes is the size of five (pointers to) buckets. This means that namespaces are no longer 'the same' as hashtables; hashtables can accommodate (at least) five more buckets within a given [paged space object](Paged-space-objects.md) size. But obviously we can — the whole paged space objects architecture is predicated on ensuring that we can — accommodate any moderately sized fixed size datum into a paged space object, so we can accommodate a mutex into the footprint of a namespace object. + +Oh, but wait. + +Oh, but wait, here's a more beautiful idea. + +### First class mutexes + +We can make the mutex a first class object in paged space in its own right. + +This has a number of advantages: + +1. the space we need to reserve in the namespace object is just a pointer like any other pointer, and is not implementation dependent; +2. we can change the implementation of the mutex object, if we need to do so when changing architecture, without changing the implementation of anything which relies on a mutex; +3. mutexes then become available as ordinary objects in the Lisp system, to be used by any Lisp functions which need to do thread-safe locking. + +So we need a new Lisp function, + +```lisp +(with-lock mutex forms...) +``` + +which, when called + +1. waits until it can lock the specified mutex; +2. evaluates each of the forms sequentially in the context of that locked mutex; +3. if evaluation of any of the forms results in the throwing of an exception, catches the exception, unlocks the mutex, and then re-throws the exception; +4. on successful completion of the evaluation of the forms, unlocks the mutex and returns the value of the last form. + +This means that I *could* write the bootstrap layer namespace handling code non-thread-safe, and then reimplement it for the user layer in Lisp, thread-safe. But it also means that users could write thread safe handlers for any new types of mutable object they need to define. + +### Other types + +We don't currently have any other mutable objects, but in future at least lazy objects will be mutable; we may have other things that are mutable. It doesn't seem silly to have a single consistent way to store locks, even if it will only be used in the case of a small minority of objects. + +## Procedure for using the lock + +### Reading namespaces + +Secondly, reading from a namespace does not happen in a single clock tick, it takes quite a long time. So it's no good setting a lock bit on the namespace object itself and then immediately assuming that it's now mutable. A reading process could already have started, and be proceeding. + +So what I think is, that we have a single top level function, `(::substrate:search-store key store return-key?)` (which we already sort of have in the 0.0.6 prototype, [here](https://www.journeyman.cc/post-scarcity/doc/html/intern_8c.html#a2189c0ab60e57a70adeb32aca99dbc43)). This searches a store (hashmap, namespace, association list, or hybrid association list) to find a binding for a key, and, having found that binding, then, if there is a namespace on the search path, checks whether the lock on the any namespace on the search path is set, and if it is, aborts the search and tries again; but otherwise returns either the key found (if `return-key?` is non-`nil`), or the value found otherwise. + +This function implements the user-level Lisp functions `assoc`, `interned`, and `interned?`. It also implements *hashmap-in-function-position* and *keyword-in-function-position*, in so far as both of these are treated as calls to `assoc`. + +### Writing namespaces + +When writing to a namespace, top level function [`(::substrate:set key value store)`](https://www.journeyman.cc/post-scarcity/doc/html/intern_8c.html#af8e370c233928d41c268874a6aa5d9e2), we first try to acquire the lock on the namespace. If it is not available, we pause a short time, and try again. It it is clear, we lock it, then identify the right bucket, then cons the new `(key . value)` pair onto the front of the bucket[^1], then update the bucket pointer, and finally unlock the lock. + +This function implements the user-level Lisp functions `set` and `set!`. + +### Allocating/deallocating objects + +When allocating a new object from a freelist... Actually, a lock on the tag of the `car` of the freelist doesn't work here. The lock has to be somewhere else. We could have a single lock for all freelists; that feels like a bad idea because it means e.g. that you can't allocate stack frames while allocating cons cells, and you're bound to get in a mess there. But actually, allocating and deallocating objects of size class 2 — cons cells, integers, other numbers, links in strings, many other small things — is going to be happening all the time, so I'm not sure that it makes much difference. Most of the contention is going to be in size class 2. Nevertheless, one lock per size class is probably not a bad idea, and doesn't take up much space. + +So: one lock per freelist. + +When allocating *or deallocating* objects, we first try to obtain the lock for the freelist. If it is already locked, wait and try again. If it is clear, lock it, make the necessary change to the freelist, then unlock it. + +[^1]: We probably remove any older bindings of the same key from the bucket at this point, too, because it will speed later searches, but this is not critical. + diff --git a/docs/Paged-space-objects.md b/docs/Paged-space-objects.md new file mode 100644 index 0000000..8ecbd11 --- /dev/null +++ b/docs/Paged-space-objects.md @@ -0,0 +1,69 @@ +# Paged space objects + +*Antecedents for this essay: + +1. [Reference counting, and the garbage collection of equal sized objects](https://www.journeyman.cc/blog/posts-output/2013-08-25-reference-counting-and-the-garbage-collection-of-equal-sized-objects/); +2. [Vector space, Pages, Mark-but-don't-sweep, and the world's slowest ever rapid prototype](https://www.journeyman.cc/blog/posts-output/2026-03-13-The-worlds-slowest-ever-rapid-prototype/).* + +The post-scarcity software environment needs to store data in objects. Much of the data will be in objects which will fit in the memory footpring ot a cons cell, but some won't, and those that won't will be in a variety of sizes. + +Conventionally, operating systems allocate memory as a heap. If you allocate objects of differing sizes from a heap, the heap becoms fragmented, like a [Sierpiński carpet] or [Cantor dust](https://en.wikipedia.org/wiki/Cantor_set#Cantor_dust) — there are lots of holes in it, but it becomes increasingly difficult to find a hole which will fit anything large. + +If we store our objects in containers of standardised sizes, then, for each of those standardised sizes, we can maintain a freelisp of currently unused containers, from which new containers can be allocated. But we still don't want those relatively small objects floating around independently in memory, because we'll still get the fragmentation problem. + +This was the initial motivation behind [cons pages](https://www.journeyman.cc/post-scarcity/html/conspage_8h.html#structcons__page). However, quite early in the development of the prototype, it became obvious that we were allocating and deallocating very many stack frames, and many hash tables, neither of which fit in the memory footprint of a cons cell; and that, going forward, it was likely that we would generate many other sorts of larger objects. + +My first thought was to generalise the cons page idea, and generate pages of equal sized objects; that is, one set of pages for objects (like cons cells) with a two word payload, one for objects with a four word payload, one for objects with an eight word payload, and so on. The key idea was that each of these pages would be of equal size, so that if, say, we needed to allocate more eight word objects and there was a page for two word objects currently empty, the memory footprint could be reassigned: the hole in the carpet would be the right size. + +If we have to allocate an object which needs a five word payload, it will have to be allocated as an eight word object in an eight word object page, which wastes some memory, for the lifetime of that object; but that memory can be efficiently recovered at the end of life, and the heap doesn't fragment. Any page will, at any time, be partly empty, which wastes more memory, but again, that memory can later be efficiently reused. + +The potential problem is that you might end up, say, with many pages for two word objects each of which were partly empty, and have nowhere to allocate new eight word objects; and if this does prove in practice to be a problem, then a mark and sweep garbage collector — something I *really* don't want — will be needed. But that is not a problem for just now. + +## Efficiently allocating pages + +I cannot see how we can efficiently manage pages without each page having some housekeeping data, as every other data object in the system must have a header for housekeeping data. It may be that I am just stuck in my thinking and that the header for pages is not needed, but I *think* it is, and I am going to proceed for now as though it were. + +The problem here is that, on an essentially binary machine, it makes sense to allocate things in powers of two; and, as that makes sense at the level of allocating objects in pages, so it makes sense at the level of the basic heap allocator. I'm proposing to allocate objects in standardised containers of these payload sizes: + +| Tag | | | Size of payload | | +| ---- | ----------- | --- | --------------- | --------------- | +| Bits | Field value | Hex | Number of words | Number of bytes | +| ---- | ----------- | --- | --------------- | --------------- | +| 0000 | 0 | 0 | 1 | 8 | +| 0001 | 1 | 1 | 2 | 16 | +| 0010 | 2 | 2 | 4 | 32 | +| 0011 | 3 | 3 | 8 | 64 | +| 0100 | 4 | 4 | 16 | 128 | +| 0101 | 5 | 5 | 32 | 256 | +| 0110 | 6 | 6 | 64 | 512 | +| 0111 | 7 | 7 | 128 | 1024 | +| 1000 | 8 | 8 | 256 | 2048 | +| 1001 | 9 | 9 | 512 | 4096 | +| 1010 | 10 | A | 1024 | 8192 | +| 1011 | 11 | B | 2048 | 16384 | +| 1100 | 12 | C | 4096 | 32768 | +| 1101 | 13 | D | 8192 | 65536 | +| 1110 | 14 | E | 16384 | 131072 | +| 1111 | 15 | F | 32768 | 262144 | + +This scheme allows me to store the allocation payload size of an object, and consequently the type of a page intended to store objects of that size, in four bits, which is pretty economic. But it's not nothing, and there's a cost to this. The irreducable minimum size of header that objects in the system need to have — in my current design — is two words. So the allocation size of an object with a payload of two words, is four words; but the allocation size of an object with a payload size of thirty two thousand, seven hundred and sixty eight words, is thirty two thousand, seven hundred and seventy words. + +Why does that matter? + +Well, suppose we allocate pages of a megabyte, and we take out of that megabyte a two word page header. Then we can fit 262,143 objects with a payload size of two into that page, and waste only two words. But we can fit only three objects of size 262,144 into such a page, and we waste 262,138 words, which feels bad. + +When I first realised this, I thought, well, the idea was nice, but it doesn't work. There are three potential solutions, each of which feel inelegant to me: + +1. We simply ignore the wasted space; +2. Given that the overwhelming majority of objects used by the system, especially of transient objects, will be of payload size two (allocation size four), we fill all 'spare' space in pages with objects of payload size two, and push them all onto the freelist of objects of payload size two; + (this feels ugly to me because it breaks the idea that all objects on a given page should be of the same size) +3. We treat the size signature of the page — that four bit value — as being related not to the payload size of the ojects to be allocated into the page, but to the allocation size; so that cons cells, with a payload size of two and thus an allocation size of four, would be allocated into pages with a size tag of 0001 and not a size tag of 0010; and we store the housekeeping data for the page itself (waves hands vaguely) somewhere else; + (this feels ugly to me because, for me, the size of an object is its payload size, and I'm deeply bothered by things foating about randomly in memory without identifying information). + +There's a wee bit of autistic insistence on order in my design choices there, that I should not get hung up on. Some objects really do need allocation sizes in memory which are powers of two, but most in fact don't. Currently, the only objects which I commonly allocate and deallocate which are not cons-space objects — not objects with a payload size of two — are stack frames (current payload size 12) and hash tables (current payload size variable, but defaults to 34). + +If we're storing the (encoded) allocation size of each object in the tag of the object — which I think that in the 0.1.0 prototype we will, and if every object on any given page is of the same size, which seems to me a good plan, then I'm not sure that we actually need to store any other housekeeping data on the page, because the header of every object is the same size, and the header of every object in the page holds the critical bit of housekeeping information about the page, so we can always get that value from the header of the first object in the page. + +If we take these two pragmatic compromises together — that the size encoded in the tag of an object is its allocation saize not its payload size, and that the allocation size in the first object on a page is the allocation size for that page — then every page can fit an exact number of objects with no space wasted. + +That's not beautiful but I think it's sensible. diff --git a/docs/Roadmap.md b/docs/Roadmap.md index fb83875..fd227d2 100644 --- a/docs/Roadmap.md +++ b/docs/Roadmap.md @@ -1,17 +1,23 @@ # Roadmap -With the release of 0.0.6 close, it's time to look at a plan for the future development of the project. +With the release of 0.0.6 close, it's time to look at a plan for the future +development of the project. -I have an almost-working Lisp interpreter, which, as an interpreter, has many of the features of the language I want. It runs in one thread on one processor. +I have an almost-working Lisp interpreter, which, as an interpreter, has many +of the features of the language I want. It runs in one thread on one processor. -Given how experimental this all is, I don't think I need it to be a polished interpreter, and polished it isn't. Lots of things are broken. +Given how experimental this all is, I don't think I need it to be a polished +interpreter, and polished it isn't. Lots of things are broken. -* garbage collection is pretty broken, and I'n beginning to doubt my whole garbage collection strategy; +* garbage collection is pretty broken, and I'n beginning to doubt my whole + garbage collection strategy; * bignums are horribly broken; -* there's something very broken in shallow-bound symbols, and that matters and wil have to be fixed; +* there's something very broken in shallow-bound symbols, and that matters + and will have to be fixed; * there are undoubtedly many other bugs I don't know about. -However, while I will fix bugs where I can, it's good enough for other people to play with if they're mad enough, and it's time to move on. +However, while I will fix bugs where I can, it's good enough for other people +to play with if they're mad enough, and it's time to move on. ## Next major milestones @@ -50,44 +56,77 @@ So release 0.1.0, which I'll target for 1st January 2027, will essentially be a Lisp interpreter running on the new substrate and memory architecture, without any significant new features. +See [0.1.0 design decisions](0-1-0-design-decisions.md) for more detail. + ### Simulated hypercube -There is really no point to this whole project while it remains a single thread running on a single processor. Until I can pass off computation to peer neighbours, I can't begin to understand what the right strategies are for when to do so. +There is really no point to this whole project while it remains a single thread +running on a single processor. Until I can pass off computation to peer +neighbours, I can't begin to understand what the right strategies are for when +to do so. -`cond` is explicitly sequential, since later clauses should not be executed at all if earlier ones succeed. `progn` is sort of implicitly sequential, since it's the value of the last form in the sequence which will be returned. +`cond` is explicitly sequential, since later clauses should not be executed at +all if earlier ones succeed. `progn` is sort of implicitly sequential, since +it's the value of the last form in the sequence which will be returned. -For `mapcar`, the right strategy might be to partition the list argument between each of the idle neighbours, and then reassemble the results that come bask. +For `mapcar`, the right strategy might be to partition the list argument +between each of the idle neighbours, and then reassemble the results that come +bask. -For most other things, my hunch is that you pass args which are not self-evaluating to idle neighbours, keeping (at least) one on the originating node to work on while they're busy. +For most other things, my hunch is that you pass args which are not +self-evaluating to idle neighbours, keeping (at least) one on the originating +node to work on while they're busy. -But before that can happen, we need a router on each node which can monitor concurrent traffic on six bidirectional links. I think at least initially what gets written across those links is just S-expressions. +But before that can happen, we need a router on each node which can monitor +concurrent traffic on six bidirectional links. I think at least initially what +gets written across those links is just S-expressions. -I think a working simulated hypercube is the key milestone for version 0.1.1. +I think a working simulated hypercube is the key milestone for version 0.2.0. ### Sysout, sysin, and system persistance -Doctrine is that the post scarcity computing environment doesn't have a file system, but nevertheless we need some way of making an image of a working system so that, after a catastrophic crash or a power outage, it can be brought back up to a known good state. This also really needs to be in 0.1.1. +Doctrine is that the post scarcity computing environment doesn't have a file +system, but nevertheless we need some way of making an image of a working +system so that, after a catastrophic crash or a power outage, it can be brought +back up to a known good state. This really needs to be in 0.1.1. ### Better command line experience -The current command line experience is embarrassingly poor. Recallable input history, input line editing, and a proper structure editor are all things that I will need for my comfort. +The current command line experience is embarrassingly poor. Recallable input +history, input line editing, and a proper structure editor are all things that +I will need for my comfort. ### Users, groups and ACLs -Allowing multiple users to work together within the same post scarcity computing environment while retaining security and privacy is a major goal. So working out ways for users to sign on and be authenticated, and to configure their own environment, and to set up their own access control lists on objects they create, needs to be another nearish term goal. Probably 0.1.2. +Allowing multiple users to work together within the same post scarcity +computing environment while retaining security and privacy is a major goal. So +working out ways for users to sign on and be authenticated, and to configure +their own environment, and to set up their own access control lists on objects +they create, needs to be another nearish term goal. Probably 0.1.2. ### Homogeneities, regularities, slots, migration, permeability -There are a lot of good ideas about the categorisation and organisation of data which are sketched in my original [Post scarcity software](Post-scarcity-software.md) essay which I've never really developed further because I didn't have the right software environment for them, which now I shall have. It would be good to build them. +There are a lot of good ideas about the categorisation and organisation of data +which are sketched in my original +[Post scarcity software](Post-scarcity-software.md) essay which I've never +really developed further because I didn't have the right software environment +for them, which now I shall have. It would be good to build them. ### Compiler -I do want this system to have a compiler. I do want compiled functions to be the default. And I do want to understand how to write my own compiler for a system like this. But until I know what the processor architecture of the system I'm targetting is, worrying too much about a compiler seems premature. +I do want this system to have a compiler. I do want compiled functions to be +the default. And I do want to understand how to write my own compiler for a +system like this. But until I know what the processor architecture of the +system I'm targetting is, worrying too much about a compiler seems premature. ### Graphical User Interface -Ultimately I want a graphical user interface at least as fluid and flexible as what we had on Interlisp machines 40 years ago. It's not a near term goal there. +Ultimately I want a graphical user interface at least as fluid and flexible as +what we had on Interlisp machines 40 years ago. It's not a near term goal yet. ### Real hardware -This machine would be **very** expensive to build, and there's no way I'm ever going to afford more than a sixty-four node machine. But it would be nice to have software which would run effectively on a four billion node machine, if one could ever be built. I think that has to be the target for version 1.0.0. \ No newline at end of file +This machine would be **very** expensive to build, and there's no way I'm ever +going to afford more than a sixty-four node machine. But it would be nice to +have software which would run effectively on a four billion node machine, if +one could ever be built. I think that has to be the target for version 1.0.0. \ No newline at end of file diff --git a/docs/State-of-play.md b/docs/State-of-play.md index 55d9bab..45d553d 100644 --- a/docs/State-of-play.md +++ b/docs/State-of-play.md @@ -1,5 +1,47 @@ # State of Play +## 20260331 + +Substrate layer `print` is written; all the building blocks for substrate +layer `read` is in place. This will read far less than the 0.0.6, but it +will be extensible with read macros *written in Lisp*, so much more flexible, +and will gradually grow to read more than the non-extensible 0.0.6 reader +was. Pleased with myself. + +The new print may grow to be extensible in Lisp, as well. In fact, it will +have to! + +## 20260326 + +Most of the memory architecture of the new prototype is now roughed out, but +in C, not in a more modern language. It doesn't compile yet. + +My C is getting better... but it needed to! + +## 20260323 + +I started an investigastion of the [Zig language](https://ziglang.org/) and +come away frustrated. It's definitely an interesting language, and *I think* +one capable of doing what I want. But in trying to learn, I checked out +someone else's [Lisp interpreter in Zig](https://github.com/cryptocode/bio). +The last commit to this project is six months ago, so fairly current; project +documentation is polished, implying the project is well advanced and by someone +competent. + +It won't build. + +It won't build because there are breaking changes to the build system in the +current version of Zig, and, according to helpful people on the Zig language +Discord, breaking changes in Zig versions are quite frequent. + +Post-scarcity is a project which procedes slowly, and is very large indeed. I +will certainly not complete it before I die. + +I don't feel unstable tools are a good choice. + +I have, however, done more thinking about [Paged space objects], and think I +now have a buildable specification. + ## 20260319 Right, the `member?` bug [is fixed](https://git.journeyman.cc/simon/post-scarcity/issues/11). diff --git a/munit b/munit new file mode 160000 index 0000000..fbbdf14 --- /dev/null +++ b/munit @@ -0,0 +1 @@ +Subproject commit fbbdf1467eb0d04a6ee465def2e529e4c87f2118 diff --git a/src/c/arith/READMDE.md b/src/c/arith/READMDE.md new file mode 100644 index 0000000..f59b772 --- /dev/null +++ b/src/c/arith/READMDE.md @@ -0,0 +1,24 @@ +# README: PSSE substrate arithmetic + +This folder/pseudo package is to implement enough of arithmetic for bootstrap: +that is, enough that all more sophisticated arithmetic can be built on top of +it. + +Ratio arithmetic will not be implemented in the substrate, but `make-ratio` +will. The signature for `make-ratio` will be: + +`(make-ratio dividend divisor) => ratio` + +Both divisor and dividend should be integers. If the divisor is `1` it will +return the dividend (as an integer). If the divisor is 0 it will return ∞. + +This implies we need a privileged data item representing infinity... + +Bignum arithmetic will not be implemented in the substrate, but `make-bignum` +will be. The signature for `make-bignum` will be + +`(make-bignum integer) => bignum` + +If the integer argument is less than 64 bits, the argument will be returned +unmodified. If it is more than 64 bits, a bignum of the same value will be +returned. \ No newline at end of file diff --git a/src/c/debug.c b/src/c/debug.c new file mode 100644 index 0000000..3c7b1bc --- /dev/null +++ b/src/c/debug.c @@ -0,0 +1,138 @@ +/** + * debug.c + * + * Post Scarcity Software Environment: debugging messages. + * + * Print debugging output. + * + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "debug.h" + +int verbosity = 0; + + +/** + * @brief print this debug `message` to stderr, if `verbosity` matches `level`. + * + * `verbosity` is a set of flags, see debug_print.h; so you can + * turn debugging on for only one part of the system. + * + * NOTE THAT: contrary to behaviour in the 0.0.X prototypes, a line feed is + * always printed before a debug_print message. Hopefully this will result + * in clearer formatting. + * + * @param message The message to be printed, in *wide* (32 bit) characters. + * @param level a mask for `verbosity`. If a bitwise and of `verbosity` and + * `level` is non-zero, print this `message`, else don't. + * @param indent print `indent` spaces before the message. + */ +void debug_print( wchar_t *message, int level, int indent ) { +#ifdef DEBUG + if ( level & verbosity ) { + fwide( stderr, 1 ); + fputws( L"\n", stderr ); + for ( int i = 0; i < indent; i++ ) { + fputws( L" ", stderr ); + } + fputws( message, stderr ); + } +#endif +} + +void debug_print_object( struct pso_pointer object, int level, int indent ) { + // TODO: not yet implemented +} + +void debug_dump_object( struct pso_pointer object, int level, int indent ) { + // TODO: not yet implemented +} + +/** + * @brief print a 128 bit integer value to stderr, if `verbosity` matches `level`. + * + * `verbosity` is a set of flags, see debug_print.h; so you can + * turn debugging on for only one part of the system. + * + * stolen from https://stackoverflow.com/questions/11656241/how-to-print-uint128-t-number-using-gcc + * + * @param n the large integer to print. + * @param level a mask for `verbosity`. If a bitwise and of `verbosity` and + * `level` is non-zero, print this `message`, else don't. + */ +void debug_print_128bit( __int128_t n, int level ) { +#ifdef DEBUG + if ( level & verbosity ) { + if ( n == 0 ) { + fwprintf( stderr, L"0" ); + } else { + char str[40] = { 0 }; // log10(1 << 128) + '\0' + char *s = str + sizeof( str ) - 1; // start at the end + while ( n != 0 ) { + if ( s == str ) + return; // never happens + + *--s = "0123456789"[n % 10]; // save last digit + n /= 10; // drop it + } + fwprintf( stderr, L"%s", s ); + } + } +#endif +} + +/** + * @brief print a line feed to stderr, if `verbosity` matches `level`. + * + * `verbosity` is a set of flags, see debug_print.h; so you can + * turn debugging on for only one part of the system. + * + * @param level a mask for `verbosity`. If a bitwise and of `verbosity` and + * `level` is non-zero, print this `message`, else don't. + */ +void debug_println( int level ) { +#ifdef DEBUG + if ( level & verbosity ) { + fwide( stderr, 1 ); + fputws( L"\n", stderr ); + } +#endif +} + + +/** + * @brief `wprintf` adapted for the debug logging system. + * + * Print to stderr only if `verbosity` matches `level`. All other arguments + * as for `wprintf`. + * + * @param level a mask for `verbosity`. If a bitwise and of `verbosity` and + * `level` is non-zero, print this `message`, else don't. + * @param indent print `indent` spaces before the message. + * @param format Format string in *wide characters*, but otherwise as used by + * `printf` and friends. + * + * Remaining arguments should match the slots in the format string. + */ +void debug_printf( int level, int indent, wchar_t *format, ... ) { +#ifdef DEBUG + if ( level & verbosity ) { + fwide( stderr, 1 ); + fputws( L"\n", stderr ); + for ( int i = 0; i < indent; i++ ) { + fputws( L" ", stderr ); + } + va_list( args ); + va_start( args, format ); + vfwprintf( stderr, format, args ); + } +#endif +} + +// debug_dump_object, debug_print_binding, debug_print_exception, debug_print_object, +// not yet implemented but probably will be. diff --git a/src/c/debug.h b/src/c/debug.h new file mode 100644 index 0000000..be9d166 --- /dev/null +++ b/src/c/debug.h @@ -0,0 +1,117 @@ +/** + * debug.h + * + * Post Scarcity Software Environment: entry point. + * + * Print debugging output. + * + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_debug_h +#define __psse_debug_h +#include +#include +#include +#include + +/* + * wide characters + */ +#include +#include + +#include "memory/pointer.h" + +/** + * @brief Print messages debugging memory allocation. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_ALLOC 1 + +/** + * @brief Print messages debugging arithmetic operations. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_ARITH 2 + +/** + * @brief Print messages debugging symbol binding. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_BIND 4 + +/** + * @brief Print messages debugging bootstrapping and teardown. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_BOOTSTRAP 8 + +/** + * @brief Print messages debugging evaluation. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_EVAL 16 + +/** + * @brief Print messages debugging input/output operations. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_IO 32 + +/** + * @brief Print messages debugging lambda functions (interpretation). + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_LAMBDA 64 + +/** + * @brief Print messages debugging the read eval print loop. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_REPL 128 + +/** + * @brief Print messages debugging stack operations. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_STACK 256 + +/** + * @brief Print messages about equality tests. + * + * Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v. + */ +#define DEBUG_EQUAL 512 + +/** + * @brief Verbosity (and content) of debugging output + * + * Interpreted as a sequence of topic-specific flags, see above. + */ +extern int verbosity; + +void debug_print( wchar_t *message, int level, int indent ); + +void debug_print_object( struct pso_pointer object, int level, int indent ); + +void debug_dump_object( struct pso_pointer object, int level, int indent ); + +void debug_print_128bit( __int128_t n, int level ); + +void debug_println( int level ); + +void debug_printf( int level, int indent, wchar_t *format, ... ); + +#endif diff --git a/src/c/environment/environment.c b/src/c/environment/environment.c new file mode 100644 index 0000000..cf512c4 --- /dev/null +++ b/src/c/environment/environment.c @@ -0,0 +1,36 @@ +/** + * environment/environment.c + * + * Initialise a MINIMAL environment. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" + +/** + * @brief Flag to prevent re-initialisation. + */ +bool environment_initialised = false; + +/** + * @brief Initialise a minimal environment, so that Lisp can be bootstrapped. + * + * @param node theindex of the node we are initialising. + * @return struct pso_pointer t on success, else an exception. + */ + +struct pso_pointer initialise_environment( uint32_t node ) { + struct pso_pointer result = t; + if ( environment_initialised ) { + // TODO: throw an exception "Attempt to reinitialise environment" + } else { + // TODO: actually initialise it. + } + + return result; +} diff --git a/src/c/environment/environment.h b/src/c/environment/environment.h new file mode 100644 index 0000000..4ec736a --- /dev/null +++ b/src/c/environment/environment.h @@ -0,0 +1,15 @@ +/** + * environment/environment.h + * + * Initialise a MINIMAL environment. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_environment_environment_h +#define __psse_environment_environment_h + +struct pso_pointer initialise_environment( uint32_t node ); + +#endif diff --git a/src/c/io/fopen.c b/src/c/io/fopen.c new file mode 100644 index 0000000..983fcd1 --- /dev/null +++ b/src/c/io/fopen.c @@ -0,0 +1,526 @@ +/* + * fopen.c + * + * adapted from https://curl.haxx.se/libcurl/c/fopen.html. + * + * Modifications to read/write wide character streams by + * Simon Brooke. + * + * NOTE THAT: for my purposes, I'm only interested in wide characters, + * and I always read them one character at a time. + * + * Copyright (c) 2003, 2017 Simtec Electronics + * Some portions (c) 2019 Simon Brooke + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This example requires libcurl 7.9.7 or later. + */ + +#include +#include +#ifndef WIN32 +#include +#endif +#include +#include + +#include + +#include "io/fopen.h" +#ifdef FOPEN_STANDALONE +CURLSH *io_share; +#else +#include "memory/pso2.h" +#include "io/io.h" +#include "utils.h" +#endif + + +/* exported functions */ +URL_FILE *url_fopen( const char *url, const char *operation ); +int url_fclose( URL_FILE * file ); +int url_feof( URL_FILE * file ); +size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ); +char *url_fgets( char *ptr, size_t size, URL_FILE * file ); +void url_rewind( URL_FILE * file ); + +/* we use a global one for convenience */ +static CURLM *multi_handle; + +/* curl calls this routine to get more data */ +static size_t write_callback( char *buffer, + size_t size, size_t nitems, void *userp ) { + char *newbuff; + size_t rembuff; + + URL_FILE *url = ( URL_FILE * ) userp; + size *= nitems; + + rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */ + + if ( size > rembuff ) { + /* not enough space in buffer */ + newbuff = realloc( url->buffer, url->buffer_len + ( size - rembuff ) ); + if ( newbuff == NULL ) { + fprintf( stderr, "callback buffer grow failed\n" ); + size = rembuff; + } else { + /* realloc succeeded increase buffer size */ + url->buffer_len += size - rembuff; + url->buffer = newbuff; + } + } + + memcpy( &url->buffer[url->buffer_pos], buffer, size ); + url->buffer_pos += size; + + return size; +} + +/* use to attempt to fill the read buffer up to requested number of bytes */ +static int fill_buffer( URL_FILE *file, size_t want ) { + fd_set fdread; + fd_set fdwrite; + fd_set fdexcep; + struct timeval timeout; + int rc; + CURLMcode mc; /* curl_multi_fdset() return code */ + + /* only attempt to fill buffer if transactions still running and buffer + * doesn't exceed required size already + */ + if ( ( !file->still_running ) || ( file->buffer_pos > want ) ) + return 0; + + /* attempt to fill buffer */ + do { + int maxfd = -1; + long curl_timeo = -1; + + FD_ZERO( &fdread ); + FD_ZERO( &fdwrite ); + FD_ZERO( &fdexcep ); + + /* set a suitable timeout to fail on */ + timeout.tv_sec = 60; /* 1 minute */ + timeout.tv_usec = 0; + + curl_multi_timeout( multi_handle, &curl_timeo ); + if ( curl_timeo >= 0 ) { + timeout.tv_sec = curl_timeo / 1000; + if ( timeout.tv_sec > 1 ) + timeout.tv_sec = 1; + else + timeout.tv_usec = ( curl_timeo % 1000 ) * 1000; + } + + /* get file descriptors from the transfers */ + mc = curl_multi_fdset( multi_handle, &fdread, &fdwrite, &fdexcep, + &maxfd ); + + if ( mc != CURLM_OK ) { + fprintf( stderr, "curl_multi_fdset() failed, code %d.\n", mc ); + break; + } + + /* On success the value of maxfd is guaranteed to be >= -1. We call + select(maxfd + 1, ...); specially in case of (maxfd == -1) there are + no fds ready yet so we call select(0, ...) --or Sleep() on Windows-- + to sleep 100ms, which is the minimum suggested value in the + curl_multi_fdset() doc. */ + + if ( maxfd == -1 ) { +#ifdef _WIN32 + Sleep( 100 ); + rc = 0; +#else + /* Portable sleep for platforms other than Windows. */ + struct timeval wait = { 0, 100 * 1000 }; /* 100ms */ + rc = select( 0, NULL, NULL, NULL, &wait ); +#endif + } else { + /* Note that on some platforms 'timeout' may be modified by select(). + If you need access to the original value save a copy beforehand. */ + rc = select( maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout ); + } + + switch ( rc ) { + case -1: + /* select error */ + break; + + case 0: + default: + /* timeout or readable/writable sockets */ + curl_multi_perform( multi_handle, &file->still_running ); + break; + } + } while ( file->still_running && ( file->buffer_pos < want ) ); + return 1; +} + +/* use to remove want bytes from the front of a files buffer */ +static int use_buffer( URL_FILE *file, size_t want ) { + /* sort out buffer */ + if ( ( file->buffer_pos - want ) <= 0 ) { + /* ditch buffer - write will recreate */ + free( file->buffer ); + file->buffer = NULL; + file->buffer_pos = 0; + file->buffer_len = 0; + } else { + /* move rest down make it available for later */ + memmove( file->buffer, + &file->buffer[want], ( file->buffer_pos - want ) ); + + file->buffer_pos -= want; + } + return 0; +} + +URL_FILE *url_fopen( const char *url, const char *operation ) { + /* this code could check for URLs or types in the 'url' and + basically use the real fopen() for standard files */ + + URL_FILE *file; + ( void ) operation; + + file = calloc( 1, sizeof( URL_FILE ) ); + if ( !file ) + return NULL; + + file->handle.file = fopen( url, operation ); + if ( file->handle.file ) { + file->type = CFTYPE_FILE; /* marked as file */ + } else if ( index_of( ':', url ) > -1 ) { + file->type = CFTYPE_CURL; /* marked as URL */ + file->handle.curl = curl_easy_init( ); + + curl_easy_setopt( file->handle.curl, CURLOPT_URL, url ); + curl_easy_setopt( file->handle.curl, CURLOPT_WRITEDATA, file ); + curl_easy_setopt( file->handle.curl, CURLOPT_VERBOSE, 0L ); + curl_easy_setopt( file->handle.curl, CURLOPT_WRITEFUNCTION, + write_callback ); + /* use the share object */ + curl_easy_setopt( file->handle.curl, CURLOPT_SHARE, io_share ); + + + if ( !multi_handle ) + multi_handle = curl_multi_init( ); + + curl_multi_add_handle( multi_handle, file->handle.curl ); + + /* lets start the fetch */ + curl_multi_perform( multi_handle, &file->still_running ); + + if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) { + /* if still_running is 0 now, we should return NULL */ + + /* make sure the easy handle is not in the multi handle anymore */ + curl_multi_remove_handle( multi_handle, file->handle.curl ); + + /* cleanup */ + curl_easy_cleanup( file->handle.curl ); + + free( file ); + + file = NULL; + } + } else { + file->type = CFTYPE_NONE; + /* not a file, and doesn't look like a URL. */ + } + + return file; +} + +int url_fclose( URL_FILE *file ) { + int ret = 0; /* default is good return */ + + switch ( file->type ) { + case CFTYPE_FILE: + ret = fclose( file->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL: + /* make sure the easy handle is not in the multi handle anymore */ + curl_multi_remove_handle( multi_handle, file->handle.curl ); + + /* cleanup */ + curl_easy_cleanup( file->handle.curl ); + break; + + default: /* unknown or supported type - oh dear */ + ret = EOF; + errno = EBADF; + break; + } + + free( file->buffer ); /* free any allocated buffer space */ + free( file ); + + return ret; +} + +int url_feof( URL_FILE *file ) { + int ret = 0; + + switch ( file->type ) { + case CFTYPE_FILE: + ret = feof( file->handle.file ); + break; + + case CFTYPE_CURL: + if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) + ret = 1; + break; + + default: /* unknown or supported type - oh dear */ + ret = -1; + errno = EBADF; + break; + } + return ret; +} + +size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE *file ) { + size_t want; + + switch ( file->type ) { + case CFTYPE_FILE: + want = fread( ptr, size, nmemb, file->handle.file ); + break; + + case CFTYPE_CURL: + want = nmemb * size; + + fill_buffer( file, want ); + + /* check if there's data in the buffer - if not fill_buffer() + * either errored or EOF */ + if ( !file->buffer_pos ) + return 0; + + /* ensure only available data is considered */ + if ( file->buffer_pos < want ) + want = file->buffer_pos; + + /* xfer data to caller */ + memcpy( ptr, file->buffer, want ); + + use_buffer( file, want ); + + want = want / size; /* number of items */ + break; + + default: /* unknown or supported type - oh dear */ + want = 0; + errno = EBADF; + break; + + } + return want; +} + +char *url_fgets( char *ptr, size_t size, URL_FILE *file ) { + size_t want = size - 1; /* always need to leave room for zero termination */ + size_t loop; + + switch ( file->type ) { + case CFTYPE_FILE: + ptr = fgets( ptr, ( int ) size, file->handle.file ); + break; + + case CFTYPE_CURL: + fill_buffer( file, want ); + + /* check if there's data in the buffer - if not fill either errored or + * EOF */ + if ( !file->buffer_pos ) + return NULL; + + /* ensure only available data is considered */ + if ( file->buffer_pos < want ) + want = file->buffer_pos; + + /*buffer contains data */ + /* look for newline or eof */ + for ( loop = 0; loop < want; loop++ ) { + if ( file->buffer[loop] == '\n' ) { + want = loop + 1; /* include newline */ + break; + } + } + + /* xfer data to caller */ + memcpy( ptr, file->buffer, want ); + ptr[want] = 0; /* always null terminate */ + + use_buffer( file, want ); + + break; + + default: /* unknown or supported type - oh dear */ + ptr = NULL; + errno = EBADF; + break; + } + + return ptr; /*success */ +} + +void url_rewind( URL_FILE *file ) { + switch ( file->type ) { + case CFTYPE_FILE: + rewind( file->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL: + /* halt transaction */ + curl_multi_remove_handle( multi_handle, file->handle.curl ); + + /* restart */ + curl_multi_add_handle( multi_handle, file->handle.curl ); + + /* ditch buffer - write will recreate - resets stream pos */ + free( file->buffer ); + file->buffer = NULL; + file->buffer_pos = 0; + file->buffer_len = 0; + + break; + + default: /* unknown or supported type - oh dear */ + break; + } +} + +#ifdef FOPEN_STANDALONE +#define FGETSFILE "fgets.test" +#define FREADFILE "fread.test" +#define REWINDFILE "rewind.test" + +/* Small main program to retrieve from a url using fgets and fread saving the + * output to two test files (note the fgets method will corrupt binary files if + * they contain 0 chars */ +int main( int argc, char *argv[] ) { + URL_FILE *handle; + FILE *outf; + + size_t nread; + char buffer[256]; + const char *url; + + CURL *curl; + CURLcode res; + + curl_global_init( CURL_GLOBAL_DEFAULT ); + + curl = curl_easy_init( ); + + + if ( argc < 2 ) + url = "http://192.168.7.3/testfile"; /* default to testurl */ + else + url = argv[1]; /* use passed url */ + + /* copy from url line by line with fgets */ + outf = fopen( FGETSFILE, "wb+" ); + if ( !outf ) { + perror( "couldn't open fgets output file\n" ); + return 1; + } + + handle = url_fopen( url, "r" ); + if ( !handle ) { + printf( "couldn't url_fopen() %s\n", url ); + fclose( outf ); + return 2; + } + + while ( !url_feof( handle ) ) { + url_fgets( buffer, sizeof( buffer ), handle ); + fwrite( buffer, 1, strlen( buffer ), outf ); + } + + url_fclose( handle ); + + fclose( outf ); + + + /* Copy from url with fread */ + outf = fopen( FREADFILE, "wb+" ); + if ( !outf ) { + perror( "couldn't open fread output file\n" ); + return 1; + } + + handle = url_fopen( "testfile", "r" ); + if ( !handle ) { + printf( "couldn't url_fopen() testfile\n" ); + fclose( outf ); + return 2; + } + + do { + nread = url_fread( buffer, 1, sizeof( buffer ), handle ); + fwrite( buffer, 1, nread, outf ); + } while ( nread ); + + url_fclose( handle ); + + fclose( outf ); + + + /* Test rewind */ + outf = fopen( REWINDFILE, "wb+" ); + if ( !outf ) { + perror( "couldn't open fread output file\n" ); + return 1; + } + + handle = url_fopen( "testfile", "r" ); + if ( !handle ) { + printf( "couldn't url_fopen() testfile\n" ); + fclose( outf ); + return 2; + } + + nread = url_fread( buffer, 1, sizeof( buffer ), handle ); + fwrite( buffer, 1, nread, outf ); + url_rewind( handle ); + + buffer[0] = '\n'; + fwrite( buffer, 1, 1, outf ); + + nread = url_fread( buffer, 1, sizeof( buffer ), handle ); + fwrite( buffer, 1, nread, outf ); + + url_fclose( handle ); + + fclose( outf ); + + return 0; /* all done */ +} +#endif diff --git a/src/c/io/fopen.h b/src/c/io/fopen.h new file mode 100644 index 0000000..5bffe92 --- /dev/null +++ b/src/c/io/fopen.h @@ -0,0 +1,83 @@ +/* + * io/fopen.h + * + * adapted from https://curl.haxx.se/libcurl/c/fopen.html. + * + * + * Modifications to read/write wide character streams by + * Simon Brooke. + * + * NOTE THAT: for my purposes, I'm only interested in wide characters, + * and I always read them one character at a time. + * + * Copyright (c) 2003, 2017 Simtec Electronics + * Some portions (c) 2019 Simon Brooke + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * This example requires libcurl 7.9.7 or later. + */ + +#ifndef __fopen_h +#define __fopen_h +#include +/* + * wide characters + */ +#include +#include + +#define url_fwprintf(f, ...) ((f->type = CFTYPE_FILE) ? fwprintf( f->handle.file, __VA_ARGS__) : -1) +#define url_fputws(ws, f) ((f->type = CFTYPE_FILE) ? fputws(ws, f->handle.file) : 0) +#define url_fputwc(wc, f) ((f->type = CFTYPE_FILE) ? fputwc(wc, f->handle.file) : 0) + +enum fcurl_type_e { + CFTYPE_NONE = 0, + CFTYPE_FILE = 1, + CFTYPE_CURL = 2 +}; + +struct fcurl_data { + enum fcurl_type_e type; /* type of handle */ + union { + CURL *curl; + FILE *file; + } handle; /* handle */ + + char *buffer; /* buffer to store cached data */ + size_t buffer_len; /* currently allocated buffer's length */ + size_t buffer_pos; /* cursor into in buffer */ + int still_running; /* Is background url fetch still in progress */ +}; + +typedef struct fcurl_data URL_FILE; + +/* exported functions */ +URL_FILE *url_fopen( const char *url, const char *operation ); +int url_fclose( URL_FILE * file ); +int url_feof( URL_FILE * file ); +size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file ); +char *url_fgets( char *ptr, size_t size, URL_FILE * file ); +void url_rewind( URL_FILE * file ); + +#endif diff --git a/src/c/io/io.c b/src/c/io/io.c new file mode 100644 index 0000000..e9b40e1 --- /dev/null +++ b/src/c/io/io.c @@ -0,0 +1,620 @@ +/* + * io.c + * + * Communication between PSSE and the outside world, via libcurl. NOTE + * that this file destructively changes metadata on URL connections, + * because the metadata is not available until the stream has been read + * from. It would be better to find a workaround! + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* + * wide characters + */ +#include +#include + +#include + +//#include "arith/integer.h" +#include "debug.h" +#include "io/fopen.h" +#include "io/io.h" + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso2.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +// #include "ops/intern.h" +// #include "ops/lispops.h" + +#include "ops/stack_ops.h" +#include "ops/string_ops.h" +#include "ops/truth.h" + +#include "payloads/character.h" +#include "payloads/cons.h" +#include "payloads/exception.h" +#include "payloads/integer.h" +#include "payloads/stack.h" + +#include "utils.h" + +/** + * The sharing hub for all connections. TODO: Ultimately this probably doesn't + * work for a multi-user environment and we will need one sharing hub for each + * user, or else we will need to not share at least cookies and ssl sessions. + */ +CURLSH *io_share; + +/** + * @brief bound to the Lisp string representing C_IO_IN in initialisation. + */ +struct pso_pointer lisp_io_in; +/** + * @brief bound to the Lisp string representing C_IO_OUT in initialisation. + */ +struct pso_pointer lisp_io_out; + + +/** + * Allow a one-character unget facility. This may not be enough - we may need + * to allocate a buffer. + */ +wint_t ungotten = 0; + +/** + * Initialise the I/O subsystem. + * + * @return 0 on success; any other value means failure. + */ +int initialise_io( ) { + int result = curl_global_init( CURL_GLOBAL_SSL ); + + io_share = curl_share_init( ); + + if ( result == 0 ) { + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, + CURL_LOCK_DATA_SSL_SESSION ); + curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL ); + } + + return result; +} + +/** + * Convert this lisp string-like-thing (also works for symbols, and, later + * keywords) into a UTF-8 string. NOTE that the returned value has been + * malloced and must be freed. TODO: candidate to moving into a utilities + * file. + * + * @param s the lisp string or symbol; + * @return the c string. + */ +char *lisp_string_to_c_string( struct pso_pointer s ) { + char *result = NULL; + + if ( stringp( s ) || symbolp( s ) ) { + int len = 0; + + for ( struct pso_pointer c = s; !nilp( c ); c = cdr( c ) ) { + len++; + } + + wchar_t *buffer = calloc( len + 1, sizeof( wchar_t ) ); + /* worst case, one wide char = four utf bytes */ + result = calloc( ( len * 4 ) + 1, sizeof( char ) ); + + int i = 0; + for ( struct pso_pointer c = s; !nilp( c ); c = cdr( c ) ) { + buffer[i++] = pointer_to_object( c )->payload.string.character; + } + + wcstombs( result, buffer, len ); + free( buffer ); + } + + debug_print( L"lisp_string_to_c_string( ", DEBUG_IO, 0 ); + debug_print_object( s, DEBUG_IO, 0 ); + debug_printf( DEBUG_IO, 0, L") => '%s'\n", result ); + + return result; +} + + +/** + * given this file handle f, return a new url_file handle wrapping it. + * + * @param f the file to be wrapped; + * @return the new handle, or null if no such handle could be allocated. + */ +URL_FILE *file_to_url_file( FILE *f ) { + URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) ); + + if ( result != NULL ) { + result->type = CFTYPE_FILE, result->handle.file = f; + } + + return result; +} + + +/** + * get one wide character from the buffer. + * + * @param file the stream to read from; + * @return the next wide character on the stream, or zero if no more. + */ +wint_t url_fgetwc( URL_FILE *input ) { + wint_t result = -1; + + if ( ungotten != 0 ) { + /* TODO: not thread safe */ + result = ungotten; + ungotten = 0; + } else { + switch ( input->type ) { + case CFTYPE_FILE: + fwide( input->handle.file, 1 ); /* wide characters */ + result = fgetwc( input->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL:{ + char *cbuff = + calloc( sizeof( wchar_t ) + 2, sizeof( char ) ); + wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) ); + + size_t count = 0; + + debug_print( L"url_fgetwc: about to call url_fgets\n", + DEBUG_IO, 0 ); + url_fgets( cbuff, 2, input ); + debug_print( L"url_fgetwc: back from url_fgets\n", + DEBUG_IO, 0 ); + int c = ( int ) cbuff[0]; + // TODO: risk of reading off cbuff? + debug_printf( DEBUG_IO, 0, + L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n", + cbuff, c, c & 0xf7 ); + /* The value of each individual byte indicates its UTF-8 function, as follows: + * + * 00 to 7F hex (0 to 127): first and only byte of a sequence. + * 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence. + * C2 to DF hex (194 to 223): first byte of a two-byte sequence. + * E0 to EF hex (224 to 239): first byte of a three-byte sequence. + * F0 to FF hex (240 to 255): first byte of a four-byte sequence. + */ + if ( c <= 0xf7 ) { + count = 1; + } else if ( c >= 0xc2 && c <= 0xdf ) { + count = 2; + } else if ( c >= 0xe0 && c <= 0xef ) { + count = 3; + } else if ( c >= 0xf0 && c <= 0xff ) { + count = 4; + } + + if ( count > 1 ) { + url_fgets( ( char * ) &cbuff[1], count, input ); + } + mbstowcs( wbuff, cbuff, 2 ); //(char *)(&input->buffer[input->buffer_pos]), 1 ); + result = wbuff[0]; + + free( wbuff ); + free( cbuff ); + } + break; + case CFTYPE_NONE: + break; + } + } + + debug_printf( DEBUG_IO, 0, L"url_fgetwc returning %d (%C)\n", result, + result ); + return result; +} + +wint_t url_ungetwc( wint_t wc, URL_FILE *input ) { + wint_t result = -1; + + switch ( input->type ) { + case CFTYPE_FILE: + fwide( input->handle.file, 1 ); /* wide characters */ + result = ungetwc( wc, input->handle.file ); /* passthrough */ + break; + + case CFTYPE_CURL:{ + ungotten = wc; + break; + case CFTYPE_NONE: + break; + } + } + + return result; +} + +/** + * @brief Read one character object from this `read_stream`. + * + * @param read_stream a pointer to an object which should be a read stream + * object, + * + * @return a pointer to a character object on success, or `nil` on failure. + */ +struct pso_pointer get_character( struct pso_pointer read_stream ) { + struct pso_pointer result = nil; + + if ( readp( read_stream ) ) { + result = + make_character( url_fgetwc + ( pointer_to_object_of_size_class + ( read_stream, 2 )->payload.stream.stream ) ); + } + + return result; +} + +/** + * @brief Push back this character `c` onto this read stream `r`. + * + * @param c a pointer to an object which should be a character object; + * @param r a pointer to an object which should be a read stream object, + * + * @return `t` on success, else `nil`. + */ +struct pso_pointer push_back_character( struct pso_pointer c, + struct pso_pointer r ) { + struct pso_pointer result = nil; + + if ( characterp( c ) && readp( r ) ) { + if ( url_ungetwc( ( wint_t ) + ( pointer_to_object( c )->payload.character. + character ), + pointer_to_object( r )->payload.stream.stream ) >= + 0 ) { + result = t; + } + } + return result; +} + +/** + * Function, sort-of: close the file indicated by my first arg, and return + * nil. If the first arg is not a stream, does nothing. All other args are + * ignored. + * + * * (close stream) + * + * @param frame my stack frame. + * @param frame_pointer a pointer to my stack frame. + * @param env my environment. + * @return T if the stream was successfully closed, else nil. + */ +struct pso_pointer +lisp_close( struct pso_pointer frame_pointer, struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer result = nil; + + if ( readp( fetch_arg( frame, 0 ) ) || writep( fetch_arg( frame, 0 ) ) ) { + if ( url_fclose + ( pointer_to_object( fetch_arg( frame, 0 ) )->payload. + stream.stream ) + == 0 ) { + result = t; + } + } + + return result; +} + +struct pso_pointer add_meta_integer( struct pso_pointer meta, wchar_t *key, + long int value ) { + return + cons( cons + ( c_string_to_lisp_keyword( key ), + make_integer( value ) ), meta ); +} + +struct pso_pointer add_meta_string( struct pso_pointer meta, wchar_t *key, + char *value ) { + value = trim( value ); + wchar_t buffer[strlen( value ) + 1]; + mbstowcs( buffer, value, strlen( value ) + 1 ); + + return cons( cons( c_string_to_lisp_keyword( key ), + c_string_to_lisp_string( buffer ) ), meta ); +} + +struct pso_pointer add_meta_time( struct pso_pointer meta, wchar_t *key, + time_t *value ) { + /* I don't yet have a concept of a date-time object, which is a + * bit of an oversight! */ + char datestring[256]; + + strftime( datestring, + sizeof( datestring ), + nl_langinfo( D_T_FMT ), localtime( value ) ); + + return add_meta_string( meta, key, datestring ); +} + +/** + * Callback to assemble metadata for a URL stream. This is naughty because + * it modifies data, but it's really the only way to create metadata. + */ +static size_t write_meta_callback( char *string, size_t size, size_t nmemb, + struct pso_pointer stream ) { + struct pso2 *cell = pointer_to_object( stream ); + + // TODO: reimplement + + /* make a copy of the string that we can destructively change */ + // char *s = calloc( strlen( string ), sizeof( char ) ); + + // strcpy( s, string ); + + // if ( check_tag( cell, READTV) || + // check_tag( cell, WRITETV) ) { + // int offset = index_of( ':', s ); + + // if ( offset != -1 ) { + // s[offset] = ( char ) 0; + // char *name = trim( s ); + // char *value = trim( &s[++offset] ); + // wchar_t wname[strlen( name )]; + + // mbstowcs( wname, name, strlen( name ) + 1 ); + + // cell->payload.stream.meta = + // add_meta_string( cell->payload.stream.meta, wname, value ); + + // debug_printf( DEBUG_IO, + // L"write_meta_callback: added header '%s': value '%s'\n", + // name, value ); + // } else if ( strncmp( "HTTP", s, 4 ) == 0 ) { + // int offset = index_of( ' ', s ); + // char *value = trim( &s[offset] ); + + // cell->payload.stream.meta = + // add_meta_integer( add_meta_string + // ( cell->payload.stream.meta, L"status", + // value ), L"status-code", strtol( value, + // NULL, + // 10 ) ); + + // debug_printf( DEBUG_IO, + // L"write_meta_callback: added header 'status': value '%s'\n", + // value ); + // } else { + // debug_printf( DEBUG_IO, + // L"write_meta_callback: header passed with no colon: '%s'\n", + // s ); + // } + // } else { + // debug_print + // ( L"Pointer passed to write_meta_callback did not point to a stream: ", + // DEBUG_IO ); + // debug_dump_object( stream, DEBUG_IO ); + // } + + // free( s ); + return 0; // strlen( string ); +} + +void collect_meta( struct pso_pointer stream, char *url ) { + struct pso2 *cell = pointer_to_object( stream ); + URL_FILE *s = pointer_to_object( stream )->payload.stream.stream; + struct pso_pointer meta = + add_meta_string( cell->payload.stream.meta, L"url", url ); + struct stat statbuf; + int result = stat( url, &statbuf ); + struct passwd *pwd; + struct group *grp; + + switch ( s->type ) { + case CFTYPE_NONE: + break; + case CFTYPE_FILE: + if ( result == 0 ) { + if ( ( pwd = getpwuid( statbuf.st_uid ) ) != NULL ) { + meta = add_meta_string( meta, L"owner", pwd->pw_name ); + } else { + meta = add_meta_integer( meta, L"owner", statbuf.st_uid ); + } + + if ( ( grp = getgrgid( statbuf.st_gid ) ) != NULL ) { + meta = add_meta_string( meta, L"group", grp->gr_name ); + } else { + meta = add_meta_integer( meta, L"group", statbuf.st_gid ); + } + + meta = + add_meta_integer( meta, L"size", + ( intmax_t ) statbuf.st_size ); + + meta = add_meta_time( meta, L"modified", &statbuf.st_mtime ); + } + break; + case CFTYPE_CURL: + curl_easy_setopt( s->handle.curl, CURLOPT_VERBOSE, 1L ); + curl_easy_setopt( s->handle.curl, CURLOPT_HEADERFUNCTION, + write_meta_callback ); + curl_easy_setopt( s->handle.curl, CURLOPT_HEADERDATA, stream ); + break; + } + + /* this is destructive change before the cell is released into the + * wild, and consequently permissible, just. */ + cell->payload.stream.meta = meta; +} + +/** + * Resutn the current default input, or of `inputp` is false, output stream from + * this `env`ironment. + */ +struct pso_pointer get_default_stream( bool inputp, struct pso_pointer env ) { + struct pso_pointer result = nil; + // struct pso_pointer stream_name = inputp ? lisp_io_in : lisp_io_out; + + // result = c_assoc( stream_name, env ); + + return result; +} + + +/** + * Function: return a stream open on the URL indicated by the first argument; + * if a second argument is present and is non-nil, open it for writing. At + * present, further arguments are ignored and there is no mechanism to open + * to append, or error if the URL is faulty or indicates an unavailable + * resource. + * + * * (open url) + * + * @param frame my stack frame. + * @param frame_pointer a pointer to my stack frame. + * @param env my environment. + * @return a string of one character, namely the next available character + * on my stream, if any, else nil. + */ +struct pso_pointer +lisp_open( struct pso_pointer frame_pointer, struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer result = nil; + + // if ( stringp( fetch_arg( frame, 0) ) ) { + // char *url = lisp_string_to_c_string( fetch_arg( frame, 0) ); + + // if ( nilp( fetch_arg( frame, 1) ) ) { + // URL_FILE *stream = url_fopen( url, "r" ); + + // debug_printf( DEBUG_IO, 0, + // L"lisp_open: stream @ %ld, stream type = %d, stream handle = %ld\n", + // ( long int ) &stream, ( int ) stream->type, + // ( long int ) stream->handle.file ); + + // switch ( stream->type ) { + // case CFTYPE_NONE: + // return + // make_exception( c_string_to_lisp_string + // ( L"Could not open stream" ), + // frame_pointer , nil ); + // break; + // case CFTYPE_FILE: + // if ( stream->handle.file == NULL ) { + // return + // make_exception( c_string_to_lisp_string + // ( L"Could not open file" ), + // frame_pointer , nil); + // } + // break; + // case CFTYPE_CURL: + // /* can't tell whether a URL is bad without reading it */ + // break; + // } + + // result = make_read_stream( stream, nil ); + // } else { + // // TODO: anything more complex is a problem for another day. + // URL_FILE *stream = url_fopen( url, "w" ); + // result = make_write_stream( stream, nil ); + // } + + // if ( pointer_to_object( result )->payload.stream.stream == NULL ) { + // result = nil; + // } else { + // collect_meta( result, url ); + // } + + // free( url ); + // } + + return result; +} + +/** + * Function: return the next character from the stream indicated by arg 0; + * further arguments are ignored. + * + * * (read-char stream) + * + * @param frame my stack frame. + * @param frame_pointer a pointer to my stack frame. + * @param env my environment. + * @return a string of one character, namely the next available character + * on my stream, if any, else nil. + */ +struct pso_pointer +lisp_read_char( struct pso_pointer frame_pointer, struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer result = nil; + + if ( readp( fetch_arg( frame, 0 ) ) ) { + result = + make_string( url_fgetwc + ( pointer_to_object( fetch_arg( frame, 0 ) )->payload. + stream.stream ), nil ); + } + + return result; +} + +/** + * Function: return a string representing all characters from the stream + * indicated by arg 0; further arguments are ignored. + * + * TODO: it should be possible to optionally pass a string URL to this function, + * + * * (slurp stream) + * + * @param frame my stack frame. + * @param frame_pointer a pointer to my stack frame. + * @param env my environment. + * @return a string of one character, namely the next available character + * on my stream, if any, else nil. + */ +struct pso_pointer +lisp_slurp( struct pso_pointer frame_pointer, struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer result = nil; + + if ( readp( fetch_arg( frame, 0 ) ) ) { + URL_FILE *stream = + pointer_to_object( fetch_arg( frame, 0 ) )->payload.stream.stream; + struct pso_pointer cursor = make_string( url_fgetwc( stream ), nil ); + result = cursor; + + for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ) && c != 0; + c = url_fgetwc( stream ) ) { + debug_print( L"slurp: cursor is: ", DEBUG_IO, 0 ); + debug_dump_object( cursor, DEBUG_IO, 0 ); + debug_print( L"; result is: ", DEBUG_IO, 0 ); + debug_dump_object( result, DEBUG_IO, 0 ); + debug_println( DEBUG_IO ); + + struct pso2 *cell = pointer_to_object( cursor ); + cursor = make_string( ( wchar_t ) c, nil ); + cell->payload.string.cdr = cursor; + } + } + + return result; +} diff --git a/src/c/io/io.h b/src/c/io/io.h new file mode 100644 index 0000000..703ed2e --- /dev/null +++ b/src/c/io/io.h @@ -0,0 +1,50 @@ + +/* + * io.h + * + * Communication between PSSE and the outside world, via libcurl. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_io_io_h +#define __psse_io_io_h +#include + +#include "memory/pointer.h" +#include "memory/pso2.h" +#include "memory/pso4.h" + +extern CURLSH *io_share; + +int initialise_io( ); + +#define C_IO_IN L"*in*" +#define C_IO_OUT L"*out*" + +extern struct pso_pointer lisp_io_in; +extern struct pso_pointer lisp_io_out; + +URL_FILE *file_to_url_file( FILE * f ); +wint_t url_fgetwc( URL_FILE * input ); +wint_t url_ungetwc( wint_t wc, URL_FILE * input ); + +struct pso_pointer get_character( struct pso_pointer read_stream ); + +struct pso_pointer push_back_character( struct pso_pointer c, + struct pso_pointer r ); + +struct pso_pointer get_default_stream( bool inputp, struct pso_pointer env ); + +struct pso_pointer +lisp_close( struct pso_pointer frame_pointer, struct pso_pointer env ); +struct pso_pointer +lisp_open( struct pso_pointer frame_pointer, struct pso_pointer env ); +struct pso_pointer +lisp_read_char( struct pso_pointer frame_pointer, struct pso_pointer env ); +struct pso_pointer +lisp_slurp( struct pso_pointer frame_pointer, struct pso_pointer env ); + +char *lisp_string_to_c_string( struct pso_pointer s ); +#endif diff --git a/src/c/io/print.c b/src/c/io/print.c new file mode 100644 index 0000000..e56babf --- /dev/null +++ b/src/c/io/print.c @@ -0,0 +1,128 @@ +/** + * io/print.c + * + * Post Scarcity Software Environment: print. + * + * Print basic Lisp objects..This is :bootstrap layer print; it needs to be + * able to print characters, symbols, integers, lists and dotted pairs. I + * don't think it needs to be able to print anything else. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include +#include +/* + * wide characters + */ +#include +#include +/* libcurl, used for io */ +#include + +#include "io/fopen.h" +#include "io/io.h" + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "payloads/character.h" +#include "payloads/cons.h" +#include "payloads/integer.h" + +struct pso_pointer in_print( struct pso_pointer p, URL_FILE * output ); + +struct pso_pointer print_list_content( struct pso_pointer p, URL_FILE *output ) { + struct pso_pointer result = nil; + + if ( consp( p ) ) { + for ( ; consp( p ); p = cdr( p ) ) { + struct pso2 *object = pointer_to_object( p ); + + result = in_print( object->payload.cons.car, output ); + + if ( exceptionp( result ) ) + break; + + switch ( get_tag_value( object->payload.cons.cdr ) ) { + case NILTV: + break; + case CONSTV: + url_fputwc( L' ', output ); + break; + default: + url_fputws( L" . ", output ); + result = in_print( object->payload.cons.cdr, output ); + } + + } + } else { + // TODO: return exception + } + + return result; +} + +struct pso_pointer in_print( struct pso_pointer p, URL_FILE *output ) { + struct pso2 *object = pointer_to_object( p ); + struct pso_pointer result = nil; + + if ( object != NULL ) { + switch ( get_tag_value( p ) ) { + case CHARACTERTV: + url_fputwc( object->payload.character.character, output ); + break; + case CONSTV: + url_fputwc( L'(', output ); + result = print_list_content( p, output ); + url_fputwc( L')', output ); + break; + case INTEGERTV: + url_fwprintf( output, L"%d", + ( int64_t ) ( object->payload.integer.value ) ); + break; + case TRUETV: + url_fputwc( L't', output ); + break; + case NILTV: + url_fputws( L"nil", output ); + default: + // TODO: return exception + } + } else { + // TODO: return exception + } + + return result; +} + +/** + * @brief Simple print for bootstrap layer. + * + * @param p pointer to the object to print. + * @param stream if a pointer to an open write stream, print to there. + * @return struct pso_pointer `nil`, or an exception if some erroe occurred. + */ +struct pso_pointer print( struct pso_pointer p, struct pso_pointer stream ) { + URL_FILE *output = writep( stream ) ? + pointer_to_object( stream )->payload.stream.stream : + file_to_url_file( stdout ); + + if ( writep( stream ) ) { + inc_ref( stream ); + } + + struct pso_pointer result = in_print( p, output ); + + if ( writep( stream ) ) { + dec_ref( stream ); + } + + return result; +} diff --git a/src/c/io/print.h b/src/c/io/print.h new file mode 100644 index 0000000..eb728c3 --- /dev/null +++ b/src/c/io/print.h @@ -0,0 +1,19 @@ +/** + * io/print.c + * + * Post Scarcity Software Environment: print. + * + * Print basic Lisp objects..This is :bootstrap layer print; it needs to be + * able to print characters, symbols, integers, lists and dotted pairs. I + * don't think it needs to be able to print anything else. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_io_print_h +#define __psse_io_print_h + +struct pso_pointer print( struct pso_pointer p, struct pso_pointer stream ); + +#endif diff --git a/src/c/io/read.c b/src/c/io/read.c new file mode 100644 index 0000000..71c96f8 --- /dev/null +++ b/src/c/io/read.c @@ -0,0 +1,232 @@ +/** + * read.c + * + * Read basic Lisp objects..This is :bootstrap layer print; it needs to be + * able to read characters, symbols, integers, lists and dotted pairs. I + * don't think it needs to be able to read anything else. It must, however, + * take a readtable as argument and expand reader macros. + * + * + * (c) 2017 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include +#include + +/* + * wide characters + */ +#include +#include + +#include "debug.h" + +#include "io/io.h" +#include "io/read.h" +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "payloads/integer.h" +#include "payloads/read_stream.h" + +#include "ops/assoc.h" +#include "ops/reverse.h" +#include "ops/stack_ops.h" +#include "ops/string_ops.h" +#include "ops/truth.h" + +// TODO: what I've copied from 0.0.6 is *wierdly* over-complex for just now. +// I think I'm going to essentially delete all this and start again. We need +// to be able to despatch on readttables, and the initial readtable functions +// don't need to be written in Lisp. +// +// In the long run a readtable ought to be a hashtable, but for now an assoc +// list will do. +// +// A readtable function is a Lisp function so needs the stackframe and the +// environment. Other arguments (including the output stream) should be passed +// in the argument, so I think the first arg in the frame is the character read; +// the next is the input stream; the next is the readtable, if any. + +/* + * for the time being things which may be read are: + * * integers + * * lists + * * atoms + * * dotted pairs + */ + +/** + * An example wrapper function while I work out how I'm going to do this. + * + * For this and all other `read` functions unless documented otherwise, the + * arguments in the frame are expected to be: + * + * 0. The input stream to read from; + * 1. The read table currently in use; + * 2. The character most recently read from that stream. + */ +struct pso_pointer read_example( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer stream = fetch_arg( frame, 0 ); + struct pso_pointer readtable = fetch_arg( frame, 1 ); + struct pso_pointer character = fetch_arg( frame, 2 ); + struct pso_pointer result = nil; + + return result; +} + +/** + * @brief Read one integer from the stream and return it. + * + * For this and all other `read` functions unless documented otherwise, the + * arguments in the frame are expected to be: + * + * 0. The input stream to read from; + * 1. The read table currently in use; + * 2. The character most recently read from that stream. + */ +struct pso_pointer read_number( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer stream = fetch_arg( frame, 0 ); + struct pso_pointer readtable = fetch_arg( frame, 1 ); + struct pso_pointer character = fetch_arg( frame, 2 ); + struct pso_pointer result = nil; + + int base = 10; + // TODO: should check for *read-base* in the environment + int64_t value = 0; + + if ( readp( stream ) ) { + if ( nilp( character ) ) { + character = get_character( stream ); + } + wchar_t c = nilp( character ) ? 0 : + pointer_to_object( character )->payload.character.character; + + URL_FILE *input = pointer_to_object( stream )->payload.stream.stream; + for ( ; iswdigit( c ); c = url_fgetwc( input ) ) { + value = ( value * base ) + ( ( int ) c - ( int ) L'0' ); + } + + url_ungetwc( c, input ); + result = make_integer( value ); + } // else exception? + + return result; +} + +struct pso_pointer read_symbol( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer stream = fetch_arg( frame, 0 ); + struct pso_pointer readtable = fetch_arg( frame, 1 ); + struct pso_pointer character = fetch_arg( frame, 2 ); + struct pso_pointer result = nil; + + if ( readp( stream ) ) { + if ( nilp( character ) ) { + character = get_character( stream ); + } + + wchar_t c = nilp( character ) ? 0 : + pointer_to_object( character )->payload.character.character; + + URL_FILE *input = pointer_to_object( stream )->payload.stream.stream; + for ( ; iswalnum( c ); c = url_fgetwc( input ) ) { + result = make_string_like_thing( c, result, SYMBOLTAG ); + } + + url_ungetwc( c, input ); + result = reverse( result ); + } + + return result; +} + +/** + * @brief Read the next object on the input stream indicated by this stack + * frame, and return a pso_pointer to the object read. + * + * For this and all other `read` functions unless documented otherwise, the + * arguments in the frame are expected to be: + * + * 0. The input stream to read from; + * 1. The read table currently in use; + * 2. The character most recently read from that stream. + */ +struct pso_pointer read( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer stream = fetch_arg( frame, 0 ); + struct pso_pointer readtable = fetch_arg( frame, 1 ); + struct pso_pointer character = fetch_arg( frame, 2 ); + + struct pso_pointer result = nil; + + if ( nilp( stream ) ) { + stream = make_read_stream( file_to_url_file( stdin ), nil ); + } + + if ( nilp( readtable ) ) { + // TODO: check for the value of `*read-table*` in the environment and + // use that. + } + + if ( nilp( character ) ) { + character = get_character( stream ); + } + + struct pso_pointer readmacro = assoc( character, readtable ); + + if ( !nilp( readmacro ) ) { + // invoke the read macro on the stream + } else if ( readp( stream ) && characterp( character ) ) { + wchar_t c = + pointer_to_object( character )->payload.character.character; + URL_FILE *input = pointer_to_object( stream )->payload.stream.stream; + + switch ( c ) { + case ';': + for ( c = url_fgetwc( input ); c != '\n'; + c = url_fgetwc( input ) ); + /* skip all characters from semi-colon to the end of the line */ + break; + case EOF: +// result = throw_exception( c_string_to_lisp_symbol( L"read" ), +// c_string_to_lisp_string +// ( L"End of input while reading" ), +// frame_pointer ); + break; + default: + struct pso_pointer next = + make_frame( frame_pointer, stream, readtable, + make_character( c ) ); + inc_ref( next ); + if ( iswdigit( c ) ) { + result = read_number( next, env ); + } else if ( iswalpha( c ) ) { + result = read_symbol( next, env ); + } else { +// result = +// throw_exception( c_string_to_lisp_symbol( L"read" ), +// make_cons( c_string_to_lisp_string +// ( L"Unrecognised start of input character" ), +// make_string( c, NIL ) ), +// frame_pointer ); + } + dec_ref( next ); + break; + } + } + + return result; +} diff --git a/src/c/io/read.h b/src/c/io/read.h new file mode 100644 index 0000000..a3e0ffc --- /dev/null +++ b/src/c/io/read.h @@ -0,0 +1,25 @@ +/** + * read.h + * + * Read basic Lisp objects..This is :bootstrap layer print; it needs to be + * able to read characters, symbols, integers, lists and dotted pairs. I + * don't think it needs to be able to read anything else. It must, however, + * take a readtable as argument and expand reader macros. + * + * + * (c) 2017 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_io_read_h +#define __psse_io_read_h +struct pso_pointer read_number( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +struct pso_pointer read_symbol( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +struct pso_pointer read( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +#endif diff --git a/src/c/memory/destroy.c b/src/c/memory/destroy.c new file mode 100644 index 0000000..01e493d --- /dev/null +++ b/src/c/memory/destroy.c @@ -0,0 +1,65 @@ +/** + * memory/free.c + * + * Centralised point for despatching free methods to types. + * + * TODO: In the long run, we need a type for tags, which defines a constructor + * and a free method, along with the minimum and maximum size classes + * allowable for that tag; and we need a namespace in which tags are + * canonically stored, probably ::system:tags, in which the tag is bound to + * the type record describing it. And this all needs to work in Lisp, not + * in the substrate. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/tags.h" + +#include "payloads/cons.h" +#include "payloads/exception.h" +#include "payloads/stack.h" +#include "payloads/psse_string.h" + +/** + * @brief Despatch destroy message to the handler for the type of the + * object indicated by `p`, if there is one. What the destroy handler + * needs to do is dec_ref all the objects pointed to by it. + * + * The handler has 0.1.0 lisp calling convention, since + * 1. we should be able to write destroy handlers in Lisp; and + * 2. in the long run this whole system should be rewritten in Lisp. + * + * The handler returns `nil` on success, an exception pointer on + * failure. This function returns that exception pointer. How we + * handle that exception pointer I simply don't know yet. + */ +struct pso_pointer destroy( struct pso_pointer p ) { + struct pso_pointer result = nil; + struct pso_pointer f = make_frame( nil, p ); + inc_ref( f ); + + switch ( get_tag_value( p ) ) { + case CONSTV: + destroy_cons( f, nil ); + break; + case EXCEPTIONTV: + destroy_exception( f, nil ); + break; + case KEYTV: + case STRINGTV: + case SYMBOLTV: + destroy_string( f, nil ); + break; + case STACKTV: + destroy_stack_frame( f, nil ); + break; + // TODO: others. + } + + dec_ref( f ); + return result; +} diff --git a/src/c/memory/destroy.h b/src/c/memory/destroy.h new file mode 100644 index 0000000..d85013e --- /dev/null +++ b/src/c/memory/destroy.h @@ -0,0 +1,17 @@ +/** + * memory/destroy.h + * + * Despatcher for destructor functions when objects are freed. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_destroy_h +#define __psse_memory_destroy_h + +#include "memory/pointer.h" + +struct pso_pointer destroy( struct pso_pointer p ); + +#endif diff --git a/src/c/memory/header.h b/src/c/memory/header.h new file mode 100644 index 0000000..c470074 --- /dev/null +++ b/src/c/memory/header.h @@ -0,0 +1,44 @@ +/** + * memory/header.h + * + * Header for all page space objects + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_header_h +#define __psse_memory_header_h + +#include + +#include "memory/pointer.h" + +#define TAGLENGTH 3 + +#define MAXREFERENCE 4294967295 + +/** + * @brief Header for all paged space objects. + * + */ +struct pso_header { + union { + /** the tag (type) of this object, + * considered as bytes */ + struct { + /** mnemonic for this type; */ + char mnemonic[TAGLENGTH]; + /** size class for this object */ + uint8_t size_class; + } bytes; + /** the tag considered as a number */ + uint32_t value; + } tag; + /** the count of the number of references to this object */ + uint32_t count; + /** pointer to the access control list of this object */ + struct pso_pointer access; +}; + +#endif diff --git a/src/c/memory/memory.c b/src/c/memory/memory.c new file mode 100644 index 0000000..ca41d67 --- /dev/null +++ b/src/c/memory/memory.c @@ -0,0 +1,47 @@ +/** + * memory/memory.c + * + * The memory management subsystem. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include + +#include "memory/memory.h" +#include "memory/node.h" +#include "memory/pointer.h" + +/** + * @brief Freelists for each size class. + */ +struct pso_pointer freelists[MAX_SIZE_CLASS]; + +/** + * @brief Flag to prevent re-initialisation. + */ +bool memory_initialised = false; + +/** + * @brief Initialise the memory allocation system. + * + * Essentially, just set up the freelists; allocating pages will then happen + * automatically as objects are requested. + * + * @param node the index number of the node we are initialising. + * @return int + */ +struct pso_pointer initialise_memory( uint32_t node ) { + if ( memory_initialised ) { + // TODO: throw an exception + } else { + for ( uint8_t i = 0; i <= MAX_SIZE_CLASS; i++ ) { + freelists[i] = nil; + } + memory_initialised = true; + } + + return t; +} diff --git a/src/c/memory/memory.h b/src/c/memory/memory.h new file mode 100644 index 0000000..5911f2f --- /dev/null +++ b/src/c/memory/memory.h @@ -0,0 +1,30 @@ +/** + * memory/memory.h + * + * The memory management subsystem. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_memory_h +#define __psse_memory_memory_h + +#include "memory/pointer.h" + +/** + * @brief Maximum size class + * + * Size classes are poweres of 2, in words; so an object of size class 2 + * has an allocation size of four words; of size class 3, of eight words, + * and so on. Size classes of 0 and 1 do not work for managed objects, + * since managed objects require a two word header; it's unlikely that + * these undersized size classes will be used at all. + */ +#define MAX_SIZE_CLASS 0xf + +struct pso_pointer initialise_memory( ); + +extern struct pso_pointer out_of_memory_exception; +extern struct pso_pointer freelists[]; +#endif diff --git a/src/c/memory/node.c b/src/c/memory/node.c new file mode 100644 index 0000000..2a650a0 --- /dev/null +++ b/src/c/memory/node.c @@ -0,0 +1,65 @@ +/** + * memory/node.c + * + * Top level data about the actual node on which this memory system sits. + * May not belong in `memory`. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "node.h" + +#include + +#include "environment/environment.h" +#include "memory/memory.h" +#include "memory/pointer.h" +#include "ops/eq.h" + +/** + * @brief Flag to prevent the node being initialised more than once. + * + */ +bool node_initialised = false; + +/** + * @brief The index of this node in the hypercube. + * + * TODO: once we have a hypercube, this must be set to the correct value + * IMMEDIATELY on startup, before starting to initalise any other part of + * the Lisp system. + */ +uint32_t node_index = 0; + + +/** + * @brief The canonical `nil` pointer + * + */ +struct pso_pointer nil = ( struct pso_pointer ) { 0, 0, 0 }; + +/** + * @brief the canonical `t` (true) pointer. + * + */ +struct pso_pointer t = ( struct pso_pointer ) { 0, 0, 1 }; + + +/** + * @brief Set up the basic informetion about this node. + * + * @param index + * @return struct pso_pointer + */ +struct pso_pointer initialise_node( uint32_t index ) { + node_index = index; + + struct pso_pointer result = initialise_memory( index ); + + if ( c_eq( result, t ) ) { + result = initialise_environment( index ); + } + + return result; +} diff --git a/src/c/memory/node.h b/src/c/memory/node.h new file mode 100644 index 0000000..fbc177a --- /dev/null +++ b/src/c/memory/node.h @@ -0,0 +1,36 @@ +/** + * memory/node.h + * + * Top level data about the actual node on which this memory system sits. + * May not belong in `memory`. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_node_h +#define __psse_memory_node_h + +#include + +/** + * @brief The index of this node in the hypercube. + * + */ +extern uint32_t node_index; + +/** + * @brief The canonical `nil` pointer + * + */ +extern struct pso_pointer nil; + +/** + * @brief the canonical `t` (true) pointer. + * + */ +extern struct pso_pointer t; + +struct pso_pointer initialise_node( uint32_t index ); + +#endif diff --git a/src/c/memory/page.c b/src/c/memory/page.c new file mode 100644 index 0000000..60771b4 --- /dev/null +++ b/src/c/memory/page.c @@ -0,0 +1,169 @@ +/** + * memory/page.c + * + * Page for paged space psoects. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include +#include + +#include "debug.h" + +#include "memory/memory.h" +#include "memory/node.h" +#include "memory/page.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/pso3.h" +#include "memory/pso4.h" +#include "memory/pso5.h" +#include "memory/pso6.h" +#include "memory/pso7.h" +#include "memory/pso8.h" +#include "memory/pso9.h" +#include "memory/psoa.h" +#include "memory/psob.h" +#include "memory/psoc.h" +#include "memory/psod.h" +#include "memory/psoe.h" +#include "memory/psof.h" +#include "memory/tags.h" + +#include "payloads/free.h" + +/** + * @brief The pages which have so far been initialised. + * + * TODO: This is temporary. We cannot afford to allocate an array big enough + * to hold the number of pages we *might* create at start up time. We need a + * way to grow the number of pages, while keeping access to them cheap. + */ +union page *pages[NPAGES]; + +/** + * @brief the number of pages which have thus far been allocated. + * + */ +uint32_t npages_allocated = 0; + +/** + * @brief private to allocate_page; do not use. + * + * @param page_addr address of the newly allocated page to be initialised; + * @param page_index its location in the pages[] array; + * @param size_class the size class of objects in this page; + * @param freelist the freelist for objects of this size class. + * @return struct pso_pointer the new head for the freelist for this size_class, + */ +struct pso_pointer initialise_page( union page *page_addr, uint16_t page_index, + uint8_t size_class, + struct pso_pointer freelist ) { + struct pso_pointer result = freelist; + int obj_size = pow( 2, size_class ); + int obj_bytes = obj_size * sizeof( uint64_t ); + int objs_in_page = PAGE_BYTES / obj_bytes; + + // we do this backwards (i--) so that object {0, 0, 0} will be first on the + // freelist when the first page is initiated, so we can grab that one for + // `nil` and the next on for `t`. + for ( int i = objs_in_page - 1; i >= 0; i-- ) { + // it should be safe to cast any pso object to a pso2 + struct pso2 *object = + ( struct pso2 * ) ( page_addr + ( i * obj_bytes ) ); + + object->header.tag.bytes.size_class = size_class; + strncpy( &( object->header.tag.bytes.mnemonic[0] ), FREETAG, + TAGLENGTH ); + object->payload.free.next = result; + + result = + make_pointer( node_index, page_index, + ( uint16_t ) ( i * obj_size ) ); + } + + return result; +} + +/** + * @brief Allocate a page for objects of this size class, initialise it, and + * link the objects in it into the freelist for this size class. + * + * @param size_class an integer in the range 0...MAX_SIZE_CLASS. + * @return t on success, an exception if an error occurred. + */ +struct pso_pointer allocate_page( uint8_t size_class ) { + struct pso_pointer result = t; + + if ( npages_allocated == 0 ) { + for ( int i = 0; i < NPAGES; i++ ) { + pages[i] = NULL; + } + debug_print( L"Pages array zeroed.\n", DEBUG_ALLOC, 0 ); + } + + if ( npages_allocated < NPAGES ) { + if ( size_class >= 2 && size_class <= MAX_SIZE_CLASS ) { + void *pg = malloc( sizeof( union page ) ); + + if ( pg != NULL ) { + memset( pg, 0, sizeof( union page ) ); + pages[npages_allocated] = pg; + debug_printf( DEBUG_ALLOC, 0, + L"Allocated page %d for objects of size class %x.\n", + npages_allocated, size_class ); + + freelists[size_class] = + initialise_page( ( union page * ) pg, npages_allocated, + size_class, freelists[size_class] ); + + debug_printf( DEBUG_ALLOC, 0, + L"Initialised page %d; freelist for size class %x updated.\n", + npages_allocated, size_class ); + + if ( npages_allocated == 0 ) { + // first page allocated; initialise nil and t + nil = lock_object( allocate( NILTAG, 2 ) ); + t = lock_object( allocate( TRUETAG, 2 ) ); + } + + npages_allocated++; + } else { + // TODO: exception when we have one. + result = nil; + fwide( stderr, 1 ); + fwprintf( stderr, + L"\nCannot allocate page: heap exhausted,\n", + size_class, MAX_SIZE_CLASS ); + } + } else { + // TODO: exception when we have one. + result = nil; + fwide( stderr, 1 ); + fwprintf( stderr, + L"\nCannot allocate page for size class %x, min is 2 max is %x.\n", + size_class, MAX_SIZE_CLASS ); + } + } else { + // TODO: exception when we have one. + result = nil; + fwide( stderr, 1 ); + fwprintf( stderr, + L"\nCannot allocate page: page space exhausted.\n", + size_class, MAX_SIZE_CLASS ); + } + + return result; +} + +/** + * @brief allow other files to see the current value of npages_allocated, but not + * change it. + */ +uint32_t get_pages_allocated( ) { + return npages_allocated; +} diff --git a/src/c/memory/page.h b/src/c/memory/page.h new file mode 100644 index 0000000..d30befb --- /dev/null +++ b/src/c/memory/page.h @@ -0,0 +1,79 @@ +/** + * memory/page.h + * + * Page for paged space psoects. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_page_h +#define __psse_memory_page_h + +#include "memory/pointer.h" +#include "memory/pso2.h" +#include "memory/pso3.h" +#include "memory/pso4.h" +#include "memory/pso5.h" +#include "memory/pso6.h" +#include "memory/pso7.h" +#include "memory/pso8.h" +#include "memory/pso9.h" +#include "memory/psoa.h" +#include "memory/psob.h" +#include "memory/psoc.h" +#include "memory/psod.h" +#include "memory/psoe.h" +#include "memory/psof.h" + +/** + * the size of a page, **in bytes**. + */ +#define PAGE_BYTES 1048576 + +/** + * the number of pages we will initially allow for. For + * convenience we'll set up an array of cons pages this big; however, + * TODO: later we will want a mechanism for this to be able to grow + * dynamically to the maximum we can allow. + */ +#define NPAGES 64 + +extern union page *pages[NPAGES]; + +/** + * @brief A page is a megabyte of memory which contains objects all of which + * are of the same size class. + * + * No page will contain both pso2s and pso4s, for example. We know what size + * objects are in a page by looking at the size tag of the first object, which + * will always be the fourth byte in the page (i.e page.bytes[3]). However, we + * will not normally have to worry about what size class the objects on a page + * are, since on creation all objects will be linked onto the freelist for + * their size class, they will be allocated from that free list, and on garbage + * collection they will be returned to that freelist. + */ +union page { + uint8_t bytes[PAGE_BYTES]; + uint64_t words[PAGE_BYTES / 8]; + struct pso2 pso2s[PAGE_BYTES / 32]; + struct pso3 pso3s[PAGE_BYTES / 64]; + struct pso4 pso4s[PAGE_BYTES / 128]; + struct pso5 pso5s[PAGE_BYTES / 256]; + struct pso6 pso6s[PAGE_BYTES / 512]; + struct pso7 pso7s[PAGE_BYTES / 1024]; + struct pso8 pso8s[PAGE_BYTES / 2048]; + struct pso9 pso9s[PAGE_BYTES / 4096]; + struct psoa psoas[PAGE_BYTES / 8192]; + struct psob psobs[PAGE_BYTES / 16384]; + struct psoc psocs[PAGE_BYTES / 32768]; + struct psod psods[PAGE_BYTES / 65536]; + struct psoe psoes[PAGE_BYTES / 131072]; + struct psof psofs[PAGE_BYTES / 262144]; +}; + +struct pso_pointer allocate_page( uint8_t size_class ); + +uint32_t get_pages_allocated( ); + +#endif diff --git a/src/c/memory/pointer.c b/src/c/memory/pointer.c new file mode 100644 index 0000000..b76f92c --- /dev/null +++ b/src/c/memory/pointer.c @@ -0,0 +1,117 @@ +/** + * memory/node.h + * + * The node on which this instance resides. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/page.h" +#include "memory/pointer.h" +#include "memory/pso.h" + +/** + * @brief Make a pointer to a paged-space object. + * + * @param node The index of the node on which the object is curated; + * @param page The memory page in which the object resides; + * @param offset The offset, in words, within that page, of the object. + * @return struct pso_pointer a pointer referencing the specified object. + */ +struct pso_pointer make_pointer( uint32_t node, uint16_t page, + uint16_t offset ) { + return ( struct pso_pointer ) { node, page, offset }; +} + +/** + * @brief returns the in-memory address of the object indicated by this + * pointer `p`. + * + * NOTE THAT: It's impossible, with our calling conventions, to pass an + * exception back from this function. Consequently, if anything goes wrong + * we return NULL. The caller *should* check for that and throw an exception. + * + * NOTE THAT: The return signature of these functions is pso2, because it is + * safe to cast any paged space object to a pso2, but safe to cast an object + * of a smaller size class to a larger one. If you know what size class you + * want, you should prefer `pointer_to_object_of_size_class()`, q.v. + * + * TODO: The reason I'm doing it this way is because I'm not + * certain reference counter updates work right it we work with 'the object' + * rather than 'the address of the object'. I really ought to have a + * conversation with someone who understands this bloody language. + * + * @param p a pso_pointer which references an object. + * + * @return the actual address in memory of that object, or NULL if `p` is + * invalid. + */ +struct pso2 *pointer_to_object( struct pso_pointer p ) { + struct pso2 *result = NULL; + + if ( p.node == node_index ) { + if ( p.page < get_pages_allocated( ) + && p.offset < ( PAGE_BYTES / 8 ) ) { + // TODO: that's not really a safe test of whether this is a valid pointer. + union page *pg = pages[p.page]; + result = ( struct pso2 * ) &pg->words[p.offset]; + } + } + // TODO: else if we have a copy of the object in cache, return that; + // else request a copy of the object from the node which curates it. + + return result; +} + +/** + * @brief returns the memory address of the object indicated by this pointer + * `p`, if it is of this `size_class`. + * + * NOTE THAT: It's impossible, with our calling conventions, to pass an + * exception back from this function. Consequently, if anything goes wrong + * we return NULL. The caller *should* check for that and throw an exception. + * + * NOTE THAT: The return signature of these functions is pso2, because it is + * safe to cast any paged space object to a pso2, but safe to cast an object + * of a smaller size class to a larger one. You should check that the object + * returned has the size class you expect. + * + * @param p a pointer to an object; + * @param size_class a size class. + * + * @return the memory address of the object, provided it is a valid object and + * of the specified size class, else NULL. + */ +struct pso2 *pointer_to_object_of_size_class( struct pso_pointer p, + uint8_t size_class ) { + struct pso2 *result = pointer_to_object( p ); + + if ( result->header.tag.bytes.size_class != size_class ) { + result = NULL; + } + + return result; +} + +/** + * @brief returns the memory address of the object indicated by this pointer + * `p`, if it has this `tag_value`. + * + * NOTE THAT: It's impossible, with our calling conventions, to pass an + * exception back from this function. Consequently, if anything goes wrong + * we return NULL. The caller *should* check for that and throw an exception. + */ +struct pso2 *pointer_to_object_with_tag_value( struct pso_pointer p, + uint32_t tag_value ) { + struct pso2 *result = pointer_to_object( p ); + + if ( ( result->header.tag.value & 0xffffff ) != tag_value ) { + result = NULL; + } + + return result; +} diff --git a/src/c/memory/pointer.h b/src/c/memory/pointer.h new file mode 100644 index 0000000..827bb95 --- /dev/null +++ b/src/c/memory/pointer.h @@ -0,0 +1,53 @@ +/** + * memory/pointer.h + * + * A pointer to a paged space object. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pointer_h +#define __psse_memory_pointer_h + +#include + +/** + * @brief A pointer to an object in page space. + * + */ +struct pso_pointer { + /** + * @brief The index of the node on which this object is curated. + * + * NOTE: This will always be NULL until we have the hypercube router + * working. + */ + uint32_t node; + /** + * @brief The index of the allocated page in which this object is stored. + */ + uint16_t page; + /** + * @brief The offset of the object within the page **in words**. + * + * NOTE THAT: This value is always **in words**, regardless of the size + * class of the objects stored in the page, because until we've got hold + * of the page we don't know its size class. + */ + uint16_t offset; +}; + + +struct pso_pointer make_pointer( uint32_t node, uint16_t page, + uint16_t offset ); + +struct pso2 *pointer_to_object( struct pso_pointer pointer ); + +struct pso2 *pointer_to_object_of_size_class( struct pso_pointer p, + uint8_t size_class ); + +struct pso2 *pointer_to_object_with_tag_value( struct pso_pointer p, + uint32_t tag_value ); + +#endif diff --git a/src/c/memory/pso.c b/src/c/memory/pso.c new file mode 100644 index 0000000..7409e51 --- /dev/null +++ b/src/c/memory/pso.c @@ -0,0 +1,194 @@ +/** + * memory/pso.c + * + * Paged space objects. + * + * Broadly, it should be save to cast any paged space object to a pso2, since + * that is the smallest actually used size class. This should work to extract + * the tag and size class fields from the header, for example. I'm not + * confident enough of my understanding of C to know whether it is similarly + * safe to cast something passed to you as a pso2 up to something larger, even + * if you know from the size class field that it actually is something larger. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include + +#include "debug.h" +#include "memory/destroy.h" +#include "memory/header.h" +#include "memory/memory.h" +#include "memory/node.h" +#include "memory/page.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/tags.h" + +#include "ops/truth.h" + + /** + * @brief Allocate an object of this size_class with this tag. + * + * @param tag The tag. Only the first three bytes will be used; + * @param size_class The size class for the object to be allocated; + * @return struct pso_pointer a pointer to the newly allocated object + */ +struct pso_pointer allocate( char *tag, uint8_t size_class ) { + struct pso_pointer result = nil; + + if ( size_class <= MAX_SIZE_CLASS ) { + if ( nilp( freelists[size_class] ) ) { + result = allocate_page( size_class ); + } + + if ( !exceptionp( result ) && not( freelists[size_class] ) ) { + result = freelists[size_class]; + struct pso2 *object = pointer_to_object( result ); + freelists[size_class] = object->payload.free.next; + + strncpy( ( char * ) ( object->header.tag.bytes.mnemonic ), tag, + TAGLENGTH ); + + /* the object ought already to have the right size class in its tag + * because it was popped off the freelist for that size class. */ + if ( object->header.tag.bytes.size_class != size_class ) { + // TODO: return an exception instead? Or warn, set it, and continue? + } + /* the objext ought to have a reference count ot zero, because it's + * on the freelist, but again we should sanity check. */ + if ( object->header.count != 0 ) { + // TODO: return an exception instead? Or warn, set it, and continue? + } + + } + } // TODO: else throw exception + + return result; +} + +uint32_t payload_size( struct pso2 *object ) { + // TODO: Unit tests DEFINITELY needed! + return ( ( 1 << object->header.tag.bytes.size_class ) - + sizeof( struct pso_header ) ); +} + +/** + * increment the reference count of the object at this cons pointer. + * + * You can't roll over the reference count. Once it hits the maximum + * value you cannot increment further. + * + * Returns the `pointer`. + */ +struct pso_pointer inc_ref( struct pso_pointer pointer ) { + struct pso2 *object = pointer_to_object( pointer ); + + if ( object->header.count < MAXREFERENCE ) { + object->header.count++; +#ifdef DEBUG + debug_printf( DEBUG_ALLOC, 0, + L"\nIncremented object of type %3.3s at page %u, offset %u to count %u", + ( ( char * ) &object->header.tag.bytes.mnemonic[0] ), + pointer.page, pointer.offset, object->header.count ); + if ( vectorpointp( pointer ) ) { + debug_printf( DEBUG_ALLOC, 0, + L"; pointer to vector object of type %3.3s.\n", + ( ( char * ) + &( object->payload.vectorp.tag.bytes[0] ) ) ); + } else { + debug_println( DEBUG_ALLOC ); + } +#endif + } + + return pointer; +} + +/** + * Decrement the reference count of the object at this cons pointer. + * + * If a count has reached MAXREFERENCE it cannot be decremented. + * If a count is decremented to zero the object should be freed. + * + * Returns the `pointer`, or, if the object has been freed, a pointer to `nil`. + */ +struct pso_pointer dec_ref( struct pso_pointer pointer ) { + struct pso2 *object = pointer_to_object( pointer ); + + if ( object->header.count > 0 && object->header.count != MAXREFERENCE ) { + object->header.count--; +#ifdef DEBUG + debug_printf( DEBUG_ALLOC, 0, + L"\nDecremented object of type %4.4s at page %d, offset %d to count %d", + ( ( char * ) ( object->header.tag.bytes.mnemonic ) ), + pointer.page, pointer.offset, object->header.count ); + if ( vectorpointp( pointer ) ) { + debug_printf( DEBUG_ALLOC, 0, + L"; pointer to vector object of type %3.3s.\n", + ( ( char * ) + &( object->payload.vectorp.tag.bytes ) ) ); + } else { + debug_println( DEBUG_ALLOC ); + } +#endif + + if ( object->header.count == 0 ) { + free_object( pointer ); + pointer = nil; + } + } + + return pointer; +} + +/** + * @brief Prevent an object ever being dereferenced. + * + * @param pointer pointer to an object to lock. + * + * @return the `pointer` + */ +struct pso_pointer lock_object( struct pso_pointer pointer ) { + struct pso2 *object = pointer_to_object( pointer ); + + object->header.count = MAXREFERENCE; + + return pointer; +} + +/** + * @brief decrement all pointers pointed to by the object at this pointer; + * clear its memory, and return it to the freelist. + */ +struct pso_pointer free_object( struct pso_pointer p ) { + struct pso_pointer result = nil; + struct pso2 *obj = pointer_to_object( p ); + uint32_t array_size = payload_size( obj ); + uint8_t size_class = obj->header.tag.bytes.size_class; + + result = destroy( p ); + + /* will C just let me cheerfully walk off the end of the array I've declared? */ + for ( int i = 0; i < array_size; i++ ) { + obj->payload.words[i] = 0; + } + + + + strncpy( ( char * ) ( obj->header.tag.bytes.mnemonic ), FREETAG, + TAGLENGTH ); +#ifdef DEBUG + debug_printf( DEBUG_ALLOC, 0, + L"Freeing object of size class %d at {%d, %d, %d}", + size_class, p.node, p.page, p.offset ); +#endif + + /* TODO: obtain mutex on freelist */ + obj->payload.free.next = freelists[size_class]; + freelists[size_class] = p; + + return result; +} diff --git a/src/c/memory/pso.h b/src/c/memory/pso.h new file mode 100644 index 0000000..928a6aa --- /dev/null +++ b/src/c/memory/pso.h @@ -0,0 +1,28 @@ +/** + * memory/pso.h + * + * Paged space objects. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso_h +#define __psse_memory_pso_h + +#include + +#include "memory/header.h" +#include "memory/pointer.h" + +struct pso_pointer allocate( char *tag, uint8_t size_class ); + +struct pso_pointer dec_ref( struct pso_pointer pointer ); + +struct pso_pointer inc_ref( struct pso_pointer pointer ); + +struct pso_pointer lock_object( struct pso_pointer pointer ); + +struct pso_pointer free_object( struct pso_pointer p ); + +#endif diff --git a/src/c/memory/pso2.h b/src/c/memory/pso2.h new file mode 100644 index 0000000..812d582 --- /dev/null +++ b/src/c/memory/pso2.h @@ -0,0 +1,56 @@ +/** + * memory/pso2.h + * + * Paged space object of size class 2, four words total, two words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso2_h +#define __psse_memory_pso2_h + +#include + +#include "../payloads/psse_string.h" +#include "memory/header.h" +#include "payloads/character.h" +#include "payloads/cons.h" +#include "payloads/free.h" +#include "payloads/function.h" +#include "payloads/integer.h" +#include "payloads/keyword.h" +#include "payloads/lambda.h" +#include "payloads/nlambda.h" +#include "payloads/read_stream.h" +#include "payloads/symbol.h" +#include "payloads/time.h" +#include "payloads/vector_pointer.h" +#include "payloads/write_stream.h" + +/** + * @brief A paged space object of size class 2, four words total, two words + * payload. + * + */ +struct pso2 { + struct pso_header header; + union { + char bytes[16]; + uint64_t words[2]; + struct character_payload character; + struct cons_payload cons; + struct free_payload free; + struct function_payload function; + struct integer_payload integer; + struct lambda_payload lambda; + struct function_payload special; + struct stream_payload stream; + struct string_payload string; +// TODO: this isn't working and I don't know why (error: field ‘time’ has incomplete type) +// struct time_payload time; + struct vectorp_payload vectorp; + } payload; +}; + +#endif diff --git a/src/c/memory/pso3.h b/src/c/memory/pso3.h new file mode 100644 index 0000000..c4975b1 --- /dev/null +++ b/src/c/memory/pso3.h @@ -0,0 +1,37 @@ +/** + * memory/pso3.h + * + * Paged space object of size class 3, 8 words total, 6 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso3_h +#define __psse_memory_pso3_h + +#include + +#include "memory/header.h" +#include "payloads/exception.h" +#include "payloads/free.h" +#include "payloads/mutex.h" + + +/** + * @brief A paged space object of size class 3, 8 words total, 6 words + * payload. + * + */ +struct pso3 { + struct pso_header header; + union { + char bytes[48]; + uint64_t words[6]; + struct exception_payload exception; + struct free_payload free; + struct mutex_payload mutex; + } payload; +}; + +#endif diff --git a/src/c/memory/pso4.c b/src/c/memory/pso4.c new file mode 100644 index 0000000..cfe6722 --- /dev/null +++ b/src/c/memory/pso4.c @@ -0,0 +1,18 @@ +/** + * memory/pso4.h + * + * Paged space object of size class 4, 16 words total, 14 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/pso4.h" + +struct pso4 *pointer_to_pso4( struct pso_pointer p ) { + struct pso4 *result = + ( struct pso4 * ) pointer_to_object_of_size_class( p, 4 ); +} diff --git a/src/c/memory/pso4.h b/src/c/memory/pso4.h new file mode 100644 index 0000000..bafda3f --- /dev/null +++ b/src/c/memory/pso4.h @@ -0,0 +1,36 @@ +/** + * memory/pso4.h + * + * Paged space object of size class 4, 16 words total, 14 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso4_h +#define __psse_memory_pso4_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" +#include "payloads/stack.h" + +/** + * @brief A paged space object of size class 4, 16 words total, 14 words + * payload. + * + */ +struct pso4 { + struct pso_header header; + union { + char bytes[112]; + uint64_t words[14]; + struct free_payload free; + struct stack_frame_payload stack_frame; + } payload; +}; + +struct pso4 *pointer_to_pso4( struct pso_pointer p ); + +#endif diff --git a/src/c/memory/pso5.h b/src/c/memory/pso5.h new file mode 100644 index 0000000..585332c --- /dev/null +++ b/src/c/memory/pso5.h @@ -0,0 +1,32 @@ +/** + * memory/pso5.h + * + * Paged space object of size class 5, 32 words total, 30 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso5_h +#define __psse_memory_pso5_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class 5, 32 words total, 30 words + * payload. + * + */ +struct pso5 { + struct pso_header header; + union { + char bytes[240]; + uint64_t words[30]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/pso6.h b/src/c/memory/pso6.h new file mode 100644 index 0000000..3bd9290 --- /dev/null +++ b/src/c/memory/pso6.h @@ -0,0 +1,32 @@ +/** + * memory/pso6.h + * + * Paged space object of size class 6, 64 words total, 62 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso6_h +#define __psse_memory_pso6_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class 6, 64 words total, 62 words + * payload. + * + */ +struct pso6 { + struct pso_header header; + union { + char bytes[496]; + uint64_t words[62]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/pso7.h b/src/c/memory/pso7.h new file mode 100644 index 0000000..04ee61b --- /dev/null +++ b/src/c/memory/pso7.h @@ -0,0 +1,32 @@ +/** + * memory/pso7.h + * + * Paged space object of size class 7, 128 words total, 126 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso7_h +#define __psse_memory_pso7_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class 7, 128 words total, 126 words + * payload. + * + */ +struct pso7 { + struct pso_header header; + union { + char bytes[1008]; + uint64_t words[126]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/pso8.h b/src/c/memory/pso8.h new file mode 100644 index 0000000..b3a00bc --- /dev/null +++ b/src/c/memory/pso8.h @@ -0,0 +1,32 @@ +/** + * memory/pso8.h + * + * Paged space object of size class 8, 256 words total, 254 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso8_h +#define __psse_memory_pso8_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class 8, 256 words total, 254 words + * payload. + * + */ +struct pso8 { + struct pso_header header; + union { + char bytes[2032]; + uint64_t words[254]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/pso9.h b/src/c/memory/pso9.h new file mode 100644 index 0000000..3fa5eab --- /dev/null +++ b/src/c/memory/pso9.h @@ -0,0 +1,32 @@ +/** + * memory/pso9.h + * + * Paged space object of size class 9, 512 words total, 510 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_pso9_h +#define __psse_memory_pso9_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class 9, 512 words total, 510 words + * payload. + * + */ +struct pso9 { + struct pso_header header; + union { + char bytes[4080]; + uint64_t words[510]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/psoa.h b/src/c/memory/psoa.h new file mode 100644 index 0000000..1c8e9c7 --- /dev/null +++ b/src/c/memory/psoa.h @@ -0,0 +1,32 @@ +/** + * memory/psoa.h + * + * Paged space object of size class a, 1024 words total, 1022 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_psoa_h +#define __psse_memory_psoa_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class a, 1024 words total, 1022 words + * payload. + * + */ +struct psoa { + struct pso_header header; + union { + char bytes[8176]; + uint64_t words[1022]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/psob.h b/src/c/memory/psob.h new file mode 100644 index 0000000..d6b235a --- /dev/null +++ b/src/c/memory/psob.h @@ -0,0 +1,32 @@ +/** + * memory/psob.h + * + * Paged space object of size class b, 2048 words total, 2046 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_psob_h +#define __psse_memory_psob_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class b, 2048 words total, 2046 words + * payload. + * + */ +struct psob { + struct pso_header header; + union { + char bytes[16368]; + uint64_t words[2046]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/psoc.h b/src/c/memory/psoc.h new file mode 100644 index 0000000..934c8b3 --- /dev/null +++ b/src/c/memory/psoc.h @@ -0,0 +1,32 @@ +/** + * memory/psoc.h + * + * Paged space object of size class c, 4096 words total, 4094 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_psoc_h +#define __psse_memory_psoc_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class c, 4096 words total, 4094 words + * payload. + * + */ +struct psoc { + struct pso_header header; + union { + char bytes[32752]; + uint64_t words[4094]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/psod.h b/src/c/memory/psod.h new file mode 100644 index 0000000..5ed7711 --- /dev/null +++ b/src/c/memory/psod.h @@ -0,0 +1,32 @@ +/** + * memory/psod.h + * + * Paged space object of size class d, 8192 words total, 8190 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_psod_h +#define __psse_memory_psod_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class d, 8192 words total, 8190 words + * payload. + * + */ +struct psod { + struct pso_header header; + union { + char bytes[65520]; + uint64_t words[8190]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/psoe.h b/src/c/memory/psoe.h new file mode 100644 index 0000000..5f2b619 --- /dev/null +++ b/src/c/memory/psoe.h @@ -0,0 +1,32 @@ +/** + * memory/psoe.h + * + * Paged space object of size class e, 16384 words total, 16382 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_psoe_h +#define __psse_memory_psoe_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class e, 16384 words total, 16382 words + * payload. + * + */ +struct psoe { + struct pso_header header; + union { + char bytes[131056]; + uint64_t words[16382]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/psof.h b/src/c/memory/psof.h new file mode 100644 index 0000000..58615de --- /dev/null +++ b/src/c/memory/psof.h @@ -0,0 +1,32 @@ +/** + * memory/psof.h + * + * Paged space object of size class f, 32768 words total, 32766 words payload. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_psof_h +#define __psse_memory_psof_h + +#include + +#include "memory/header.h" +#include "payloads/free.h" + +/** + * @brief A paged space object of size class f, 32768 words total, 32766 words + * payload. + * + */ +struct psof { + struct pso_header header; + union { + char bytes[262128]; + uint64_t words[32766]; + struct free_payload free; + } payload; +}; + +#endif diff --git a/src/c/memory/tags.c b/src/c/memory/tags.c new file mode 100644 index 0000000..6e4a7c5 --- /dev/null +++ b/src/c/memory/tags.c @@ -0,0 +1,54 @@ +/** + * memory/tags.h + * + * It would be nice if I could get the macros for tsg operations to work, + * but at present they don't and they're costing me time. So I'm going to + * redo them as functions. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include + +#include "memory/pointer.h" +#include "memory/pso2.h" + +uint32_t get_tag_value( struct pso_pointer p ) { + struct pso2 *object = pointer_to_object( p ); + + return object->header.tag.value & 0xffffff; +} + +/** + * @brief check that the tag of the object indicated by this poiner has this + * value. + * + * @param p must be a struct pso_pointer, indicating the appropriate object. + * @param v should be an integer, ideally uint32_t, the expected value of a tag. + * + * @return true if the tag at p matches v, else false. + */ +bool check_tag( struct pso_pointer p, uint32_t v ) { + return get_tag_value( p ) == v; +} + +/** + * @brief Like check_tag, q.v., but comparing with the string value of the tag + * rather than the integer value. Only the first TAGLENGTH characters of `s` + * are considered. + * + * @param p a pointer to an object; + * @param s a string, in C conventions; + * @return true if the first TAGLENGTH characters of `s` are equal to the tag + * of the object. + * @return false otherwise. + */ +bool check_type( struct pso_pointer p, char *s ) { + return ( strncmp + ( &( pointer_to_object( p )->header.tag.bytes.mnemonic[0] ), s, + TAGLENGTH ) + == 0 ); +} diff --git a/src/c/memory/tags.h b/src/c/memory/tags.h new file mode 100644 index 0000000..524e805 --- /dev/null +++ b/src/c/memory/tags.h @@ -0,0 +1,133 @@ +/** + * memory/tags.h + * + * Tags for all page space and vector objects known to the bootstrap layer. + * + * All macros! + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_memory_tags_h +#define __psse_memory_tags_h + +#include + +#define TAGLENGTH 3 + +#define CHARACTERTAG "CHR" +#define CONSTAG "CNS" +#define EXCEPTIONTAG "EXP" +#define FREETAG "FRE" +#define FUNCTIONTAG "FUN" +#define HASHTAG "HTB" +#define INTEGERTAG "INT" +#define KEYTAG "KEY" +#define LAMBDATAG "LMD" +#define LOOPTAG "LOP" +#define LAZYCONSTAG "LZY" +#define LAZYSTRTAG "LZS" +#define LAZYWRKRTAG "WRK" +#define MUTEXTAG "MTX" +#define NAMESPACETAG "NSP" +#define NILTAG "NIL" +#define NLAMBDATAG "NLM" +#define RATIOTAG "RAT" +#define READTAG "RED" +#define REALTAG "REA" +#define SPECIALTAG "SFM" +#define STACKTAG "STK" +#define STRINGTAG "STR" +#define SYMBOLTAG "SYM" +#define TIMETAG "TIM" +#define TRUETAG "TRU" +#define VECTORTAG "VEC" +#define VECTORPOINTTAG "VSP" +#define WRITETAG "WRT" + +#define CHARACTERTV 5392451 +#define CONSTV 5459523 +#define EXCEPTIONTV 5265477 +#define FREETV 4543046 +#define FUNCTIONTV 5133638 +#define HASHTV 4346952 +#define INTEGERTV 5525065 +#define KEYTV 5850443 +#define LAMBDATV 4345164 +#define LOOPTV 5263180 +#define MUTEXTV 5788749 +#define NAMESPACETV 5264206 +#define NILTV 4999502 +#define NLAMBDATV 5065806 +#define RATIOTV 5521746 +#define READTV 4474194 +#define REALTV 4277586 +#define SPECIALTV 5064275 +#define STACKTV 4936787 +#define STRINGTV 5395539 +#define SYMBOLTV 5069139 +#define TIMETV 5065044 +#define TRUETV 5591636 +#define VECTORTV 4408662 +#define VECTORPOINTTV 5264214 +#define WRITETV 5264214 + +/** + * @brief return the numerical value of the tag of the object indicated by + * pointer `p`. + * + * @param p must be a struct pso_pointer, indicating the appropriate object. + * + * @return the numerical value of the tag, as a uint32_t. + */ +// #define get_tag_value(p)((pointer_to_object(p)->header.tag.value) & 0xffffff) +uint32_t get_tag_value( struct pso_pointer p ); + +/** + * @brief check that the tag of the object indicated by this poiner has this + * value. + * + * @param p must be a struct pso_pointer, indicating the appropriate object. + * @param v should be an integer, ideally uint32_t, the expected value of a tag. + * + * @return true if the tag at p matches v, else false. + */ +// #define check_tag(p,v) (get_tag_value(p) == v) +bool check_tag( struct pso_pointer p, uint32_t v ); + +bool check_type( struct pso_pointer p, char *s ); + +#define characterp(p) (check_tag(p, CHARACTERTV)) +#define consp(p) (check_tag(p, CONSTV)) +#define exceptionp(p) (check_tag(p, EXCEPTIONTV)) +#define freep(p) (check_tag(p, FREETV)) +#define functionp(p) (check_tag(p, FUNCTIONTV)) +#define integerp(p) (check_tag(p, INTEGERTV)) +#define keywordp(p) (check_tag(p, KEYTV)) +#define lambdap(p) (check_tag(p,LAMBDATV)) +#define loopp(p) (check_tag(p,LOOPTV)) +#define namespacep(p)(check_tag(p,NAMESPACETV)) +// the version of nilp in ops/truth.c is better than this, because it does not +// require a fetch, and will see nils curated by other nodes as nil. +// #define nilp(p) (check_tag(p,NILTV)) +#define numberp(p) (check_tag(p,INTEGERTV)||check_tag(p,RATIOTV)||check_tag(p,REALTV)) +#define ratiop(p) (check_tag(p,RATIOTV)) +#define readp(p) (check_tag(p,READTV)) +#define realp(p) (check_tag(p,REALTV)) +#define sequencep(p) (check_tag(p,CONSTV)||check_tag(p,STRINGTV)||check_tag(p,SYMBOLTV)) +#define specialp(p) (check_tag(p,SPECIALTV)) +#define stackp(p) (check_tag(p, STACKTV)) +#define streamp(p) (check_tag(p,READTV)||check_tag(p,WRITETV)) +#define stringp(p) (check_tag(p,STRINGTV)) +#define symbolp(p) (check_tag(p,SYMBOLTV)) +#define timep(p) (check_tag(p,TIMETV)) +// the version of truep in ops/truth.c is better than this, because it does not +// require a fetch, and will see ntsils curated by other nodes as t. +// #define tp(p) (check_tag(p,TRUETV)) +// #define truep(p) ( !check_tag(p,NILTV)) +#define vectorpointp(p) (check_tag(p,VECTORPOINTTV)) +#define vectorp(p) (check_tag(p,VECTORTV)) +#define writep(p) (check_tag(p,WRITETV)) + +#endif diff --git a/src/c/ops/README.md b/src/c/ops/README.md new file mode 100644 index 0000000..80f3ccd --- /dev/null +++ b/src/c/ops/README.md @@ -0,0 +1,16 @@ +# README: PSSE substrate operations + +This folder/pseudo-package is for things which implement basic Lisp functions. +These will be the functions which make up the `:bootstrap` and `:substrate` +packages in Lisp. + +For each basic function the intention is that there should be one `.c` file +(and normally one `.h` file as well). This file will provide one version of the +function with Lisp calling conventions, called `lisp_xxxx`, and one with C +calling conventions, called `xxxx`. It does not matter whether the lisp version +calls the C version or vice versa, but one should call the other so there are +not two different versions of the logic. + +Substrate I/O functions will not be provided in this pseudo-package but in `io`. +Substrate arithmetic functions will not be provided in this pseudo-package but +in `arith`. \ No newline at end of file diff --git a/src/c/ops/assoc.c b/src/c/ops/assoc.c new file mode 100644 index 0000000..b1d6acb --- /dev/null +++ b/src/c/ops/assoc.c @@ -0,0 +1,90 @@ +/** + * ops/assoc.c + * + * Post Scarcity Software Environment: assoc. + * + * Search a store for the value associated with a key. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "payloads/cons.h" + +#include "ops/eq.h" +#include "ops/truth.h" + +/** + * @brief: fundamental search function; only knows about association lists + * + * @param key a pointer indicating the key to search for; + * @param store a pointer indicating the store to search; + * @param return_key if a binding is found for `key` in `store`, if true + * return the key found in the store, else return the value + * + * @return nil if no binding for `key` is found in `store`; otherwise, if + * `return_key` is true, return the key from the store; else + * return the binding. + */ +struct pso_pointer search( struct pso_pointer key, + struct pso_pointer store, bool return_key ) { + struct pso_pointer result = nil; + bool found = false; + + if ( consp( store ) ) { + for ( struct pso_pointer cursor = store; + consp( store ) && found == false; cursor = cdr( cursor ) ) { + struct pso_pointer pair = car( cursor ); + + if ( consp( pair ) && c_equal( car( pair ), key ) ) { + found = true; + result = return_key ? car( pair ) : cdr( pair ); + } + } + } + + return result; +} + +/** + * @prief: bootstap layer assoc; only knows about association lists. + * + * @param key a pointer indicating the key to search for; + * @param store a pointer indicating the store to search; + * + * @return a pointer to the value of the key in the store, or nil if not found + */ +struct pso_pointer assoc( struct pso_pointer key, struct pso_pointer store ) { + return search( key, store, false ); +} + +/** + * @prief: bootstap layer interned; only knows about association lists. + * + * @param key a pointer indicating the key to search for; + * @param store a pointer indicating the store to search; + * + * @return a pointer to the copy of the key in the store, or nil if not found. + */ +struct pso_pointer interned( struct pso_pointer key, struct pso_pointer store ) { + return search( key, store, true ); +} + +/** + * @prief: bootstap layer interned; only knows about association lists. + * + * @param key a pointer indicating the key to search for; + * @param store a pointer indicating the store to search; + * + * @return `true` if a pointer the key was found in the store.. + */ +bool internedp( struct pso_pointer key, struct pso_pointer store ) { + return !nilp( search( key, store, true ) ); +} diff --git a/src/c/ops/assoc.h b/src/c/ops/assoc.h new file mode 100644 index 0000000..52d8d08 --- /dev/null +++ b/src/c/ops/assoc.h @@ -0,0 +1,28 @@ +/** + * ops/assoc.h + * + * Post Scarcity Software Environment: assoc. + * + * Search a store for the value associated with a key. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_assoc_h +#define __psse_ops_assoc_h + +#include + +#include "memory/pointer.h" + +struct cons_pointer search( struct pso_pointer key, + struct pso_pointer store, bool return_key ); + +struct pso_pointer assoc( struct pso_pointer key, struct pso_pointer store ); + +struct pso_pointer interned( struct pso_pointer key, + struct pso_pointer store ); + +bool internedp( struct pso_pointer key, struct pso_pointer store ); +#endif diff --git a/src/c/ops/bind.c b/src/c/ops/bind.c new file mode 100644 index 0000000..f812c43 --- /dev/null +++ b/src/c/ops/bind.c @@ -0,0 +1,44 @@ +/** + * ops/bind.c + * + * Post Scarcity Software Environment: bind. + * + * Add a binding for a key/value pair to a store -- at this stage, just an + * association list. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +#include "ops/stack_ops.h" + +#include "payloads/cons.h" +#include "payloads/stack.h" + +struct pso_pointer bind( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer key = fetch_arg( frame, 0 ); + struct pso_pointer value = fetch_arg( frame, 1 ); + struct pso_pointer store = fetch_arg( frame, 2 ); + + return cons( cons( key, value ), store ); +} + +struct pso_pointer c_bind( struct pso_pointer key, + struct pso_pointer value, + struct pso_pointer store ) { + struct pso_pointer result = nil; + struct pso_pointer next = make_frame( nil, key, value, store ); + inc_ref( next ); + result = bind( next, nil ); + dec_ref( next ); + + return result; +} diff --git a/src/c/ops/bind.h b/src/c/ops/bind.h new file mode 100644 index 0000000..d7cdf42 --- /dev/null +++ b/src/c/ops/bind.h @@ -0,0 +1,25 @@ +/** + * ops/bind.h + * + * Post Scarcity Software Environment: bind. + * + * Test for pointer binduality. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_bind_h +#define __psse_ops_bind_h +#include + +#include "memory/pointer.h" + +struct pso_pointer bind( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +struct pso_pointer c_bind( struct pso_pointer key, + struct pso_pointer value, + struct pso_pointer store ); + +#endif diff --git a/src/c/ops/eq.c b/src/c/ops/eq.c new file mode 100644 index 0000000..b3789e5 --- /dev/null +++ b/src/c/ops/eq.c @@ -0,0 +1,110 @@ +/** + * ops/eq.c + * + * Post Scarcity Software Environment: eq. + * + * Test for pointer equality; bootstrap level tests for object equality. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/memory.h" +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "payloads/cons.h" +#include "payloads/integer.h" +#include "payloads/stack.h" +#include "ops/stack_ops.h" +#include "ops/truth.h" + +/** + * @brief Function; do these two pointers point to the same object? + * + * Shallow, cheap equality. + * + * TODO: if either of these pointers points to a cache cell, then what + * we need to check is the cached value, which is not so cheap. Ouch! + * + * @param a a pointer; + * @param b another pointer; + * @return `true` if they are the same, else `false` + */ +bool c_eq( struct pso_pointer a, struct pso_pointer b ) { + return ( a.node == b.node && a.page == b.page && a.offset == b.offset ); +} + +bool c_equal( struct pso_pointer a, struct pso_pointer b ) { + bool result = false; + + if ( c_eq( a, b ) ) { + result = true; + } else if ( get_tag_value( a ) == get_tag_value( b ) ) { + struct pso2 *oa = pointer_to_object( a ); + struct pso2 *ob = pointer_to_object( b ); + + switch ( get_tag_value( a ) ) { + case CHARACTERTV: + result = + ( oa->payload.character.character == + ob->payload.character.character ); + break; + case CONSTV: + result = ( c_equal( car( a ), car( b ) ) + && c_equal( cdr( a ), cdr( b ) ) ); + break; + case INTEGERTV: + result = ( oa->payload.integer.value + == ob->payload.integer.value ); + break; + case KEYTV: + case STRINGTV: + case SYMBOLTV: + while ( result == false && !nilp( a ) && !nilp( b ) ) { + if ( pointer_to_object( a )->payload.string.character == + pointer_to_object( b )->payload.string.character ) { + a = cdr( a ); + b = cdr( b ); + } + } + result = nilp( a ) && nilp( b ); + break; + } + } + + return result; +} + + +/** + * Function; do all arguments to this finction point to the same object? + * + * Shallow, cheap equality. + * + * * (eq? args...) + * + * @param frame my stack_frame. + * @param frame_pointer a pointer to my stack_frame. + * @param env my environment (ignored). + * @return `t` if all args are pointers to the same object, else `nil`; + */ +struct pso_pointer eq( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + + struct pso_pointer result = t; + + if ( frame->payload.stack_frame.args > 1 ) { + for ( int b = 1; + ( truep( result ) ) && ( b < frame->payload.stack_frame.args ); + b++ ) { + result = + c_eq( fetch_arg( frame, 0 ), fetch_arg( frame, b ) ) ? t : nil; + } + } + + return result; +} diff --git a/src/c/ops/eq.h b/src/c/ops/eq.h new file mode 100644 index 0000000..854e40c --- /dev/null +++ b/src/c/ops/eq.h @@ -0,0 +1,25 @@ +/** + * ops/eq.h + * + * Post Scarcity Software Environment: eq. + * + * Test for pointer equality. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_eq_h +#define __psse_ops_eq_h +#include + +#include "memory/pointer.h" +#include "memory/pso4.h" + +bool c_eq( struct pso_pointer a, struct pso_pointer b ); + +struct pso_pointer eq( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +bool c_equal( struct pso_pointer a, struct pso_pointer b ); +#endif diff --git a/src/c/ops/eval.c b/src/c/ops/eval.c new file mode 100644 index 0000000..830cceb --- /dev/null +++ b/src/c/ops/eval.c @@ -0,0 +1,76 @@ +/** + * ops/eval.c + * + * Post Scarcity Software Environment: eval. + * + * Evaluate an arbitrary Lisp expression. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/pso3.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +#include "payloads/cons.h" +#include "payloads/exception.h" +#include "payloads/function.h" +#include "payloads/keyword.h" +#include "payloads/lambda.h" +#include "payloads/nlambda.h" +#include "payloads/special.h" +#include "payloads/stack.h" + +#include "ops/truth.h" + +/** + * @brief Despatch eval based on tag of the form in the first position. + * + * @param frame The current stack frame; + * @param frame_pointer A pointer to the current stack frame; + * @param env the evaluation environment. + * @return struct pso_pointer + */ +struct pso_pointer lisp_eval( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + struct pso_pointer result = frame->payload.stack_frame.arg[0]; + + // switch ( get_tag_value( result)) { + // case CONSTV: + // result = eval_cons( frame, frame_pointer, env); + // break; + // case KEYTV: + // case SYMBOLTV: + // result = eval_symbol( frame, frame_pointer, env); + // break; + // case LAMBDATV: + // result = eval_lambda( frame, frame_pointer, env); + // break; + // case NLAMBDATV: + // result = eval_nlambda( frame, frame_pointer, env); + // break; + // case SPECIALTV: + // result = eval_special( frame, frame_pointer, env); + // break; + // } + + if ( exceptionp( result ) ) { + struct pso3 *x = + ( struct pso3 * ) pointer_to_object_with_tag_value( result, + EXCEPTIONTV ); + + if ( nilp( x->payload.exception.stack ) ) { + inc_ref( result ); + result = + make_exception( x->payload.exception.message, frame_pointer, + result ); + } + } + + return result; +} diff --git a/src/c/ops/repl.h b/src/c/ops/repl.h new file mode 100644 index 0000000..e5b1a9a --- /dev/null +++ b/src/c/ops/repl.h @@ -0,0 +1,15 @@ +/** + * ops/repl.h + * + * The read/eval/print loop. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_repl_h +#define __psse_ops_repl_h + +// struct pso_pointer repl( struct pso_pointer prompt, struct pso_pointer readtable); + +#endif diff --git a/src/c/ops/reverse.c b/src/c/ops/reverse.c new file mode 100644 index 0000000..5f59004 --- /dev/null +++ b/src/c/ops/reverse.c @@ -0,0 +1,75 @@ +/** + * ops/reverse.c + * + * Post Scarcity Software Environment: reverse. + * + * Reverse a sequence. Didn'e want to do this in the substrate, but I need + * if for reading atoms!. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "payloads/cons.h" +#include "payloads/exception.h" +#include "payloads/psse_string.h" + +#include "ops/string_ops.h" +#include "ops/truth.h" + +/** + * @brief reverse a sequence. + * + * A sequence is a list or a string-like-thing. A dotted pair is not a + * sequence. + * + * @param sequence a pointer to a sequence. + * @return a sequence like the `sequence` passed, but reversed; or `nil` if + * the argument was not a sequence. + */ +struct pso_pointer reverse( struct pso_pointer sequence ) { + struct pso_pointer result = nil; + + for ( struct pso_pointer cursor = sequence; !nilp( sequence ); + cursor = cdr( cursor ) ) { + struct pso2 *object = pointer_to_object( cursor ); + switch ( get_tag_value( cursor ) ) { + case CONSTV: + result = cons( car( cursor ), result ); + break; + case KEYTV: + result = + make_string_like_thing( object->payload.string.character, + result, KEYTAG ); + break; + case STRINGTV: + result = + make_string_like_thing( object->payload.string.character, + result, STRINGTAG ); + break; + case SYMBOLTV: + result = + make_string_like_thing( object->payload.string.character, + result, SYMBOLTAG ); + break; + default: + result = + make_exception( c_string_to_lisp_string + ( L"Invalid object in sequence" ), nil, + nil ); + goto exit; + break; + } + } + exit: + + return result; +} diff --git a/src/c/ops/reverse.h b/src/c/ops/reverse.h new file mode 100644 index 0000000..96f6b2f --- /dev/null +++ b/src/c/ops/reverse.h @@ -0,0 +1,21 @@ +/** + * ops/reverse.h + * + * Post Scarcity Software Environment: reverse. + * + * Reverse a sequence. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_reverse_h +#define __psse_ops_reverse_h + +#include + +#include "memory/pointer.h" + +struct pso_pointer reverse( struct pso_pointer sequence ); + +#endif diff --git a/src/c/ops/stack_ops.c b/src/c/ops/stack_ops.c new file mode 100644 index 0000000..0fd28c5 --- /dev/null +++ b/src/c/ops/stack_ops.c @@ -0,0 +1,42 @@ +/** + * payloads/stack.c + * + * The execution stack. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "memory/node.h" +#include "memory/pso2.h" +#include "memory/pso4.h" +#include "payloads/stack.h" + +/** + * @brief The maximum depth of stack before we throw an exception. + * + * `0` is interpeted as `unlimited`. + */ +uint32_t stack_limit = 0; + +/** + * Fetch a pointer to the value of the local variable at this index. + */ +struct pso_pointer fetch_arg( struct pso4 *frame, unsigned int index ) { + struct pso_pointer result = nil; + + // TODO check that the frame is indeed a frame! + if ( index < args_in_frame ) { + result = frame->payload.stack_frame.arg[index]; + } else { + struct pso_pointer p = frame->payload.stack_frame.more; + + for ( int i = args_in_frame; i < index; i++ ) { + p = pointer_to_object( p )->payload.cons.cdr; + } + + result = pointer_to_object( p )->payload.cons.car; + } + + return result; +} diff --git a/src/c/ops/stack_ops.h b/src/c/ops/stack_ops.h new file mode 100644 index 0000000..837d49a --- /dev/null +++ b/src/c/ops/stack_ops.h @@ -0,0 +1,30 @@ +/** + * ops/stack_ops.h + * + * Operations on a Lisp stack frame. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_stack_ops_h +#define __psse_ops_stack_ops_h + +#include "memory/pointer.h" +#include "memory/pso4.h" + +/* + * number of arguments stored in a stack frame + */ +#define args_in_frame 8 + +/** + * @brief The maximum depth of stack before we throw an exception. + * + * `0` is interpeted as `unlimited`. + */ +extern uint32_t stack_limit; + +struct pso_pointer fetch_arg( struct pso4 *frame, unsigned int index ); + +#endif diff --git a/src/c/ops/string_ops.c b/src/c/ops/string_ops.c new file mode 100644 index 0000000..14d12a3 --- /dev/null +++ b/src/c/ops/string_ops.c @@ -0,0 +1,164 @@ +/** + * ops/string_ops.h + * + * Operations on a Lisp string frame. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +/* + * wide characters + */ +#include +#include + +#include "debug.h" +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "ops/truth.h" + +#include "payloads/exception.h" + + +/** + * Return a hash value for this string like thing. + * + * What's important here is that two strings with the same characters in the + * same order should have the same hash value, even if one was created using + * `"foobar"` and the other by `(append "foo" "bar")`. I *think* this function + * has that property. I doubt that it's the most efficient hash function to + * have that property. + * + * returns 0 for things which are not string like. + */ +uint32_t calculate_hash( wint_t c, struct pso_pointer ptr ) { + struct pso2 *cell = pointer_to_object( ptr ); + uint32_t result = 0; + + switch ( get_tag_value( ptr ) ) { + case KEYTV: + case STRINGTV: + case SYMBOLTV: + if ( nilp( cell->payload.string.cdr ) ) { + result = ( uint32_t ) c; + } else { + result = + ( ( uint32_t ) c * + cell->payload.string.hash ) & 0xffffffff; + } + break; + } + + return result; +} + + /** + * Construct a string from this character (which later will be UTF) and + * this tail. A string is implemented as a flat list of cells each of which + * has one character and a pointer to the next; in the last cell the + * pointer to next is nil. + * + * NOTE THAT: in 0.1.X, we may allocate symbols and keywords as arrays of + * wchar_t in larger pso classes, so this function may be only for strings + * (and thus simpler). + */ +struct pso_pointer make_string_like_thing( wint_t c, struct pso_pointer tail, + char *tag ) { + struct pso_pointer pointer = nil; + + if ( check_type( tail, tag ) || check_tag( tail, NILTV ) ) { + pointer = allocate( tag, CONS_SIZE_CLASS ); + struct pso2 *cell = pointer_to_object( pointer ); + + cell->payload.string.character = c; + cell->payload.string.cdr = tail; + + cell->payload.string.hash = calculate_hash( c, tail ); + debug_dump_object( pointer, DEBUG_ALLOC, 0 ); + debug_println( DEBUG_ALLOC ); + } else { + // \todo should throw an exception! + debug_printf( DEBUG_ALLOC, 0, + L"Warning: only %4.4s can be prepended to %4.4s\n", + tag, tag ); + } + + return pointer; +} + +/** + * Construct a string from the character `c` and this `tail`. A string is + * implemented as a flat list of cells each of which has one character and a + * pointer to the next; in the last cell the pointer to next is NIL. + * + * @param c the character to add (prepend); + * @param tail the string which is being built. + */ +struct pso_pointer make_string( wint_t c, struct pso_pointer tail ) { + return make_string_like_thing( c, tail, STRINGTAG ); +} + +/** + * Construct a keyword from the character `c` and this `tail`. A string is + * implemented as a flat list of cells each of which has one character and a + * pointer to the next; in the last cell the pointer to next is NIL. + * + * @param c the character to add (prepend); + * @param tail the keyword which is being built. + */ +struct pso_pointer make_keyword( wint_t c, struct pso_pointer tail ) { + return make_string_like_thing( c, tail, KEYTAG ); +} + +/** + * Construct a symbol from the character `c` and this `tail`. A string is + * implemented as a flat list of cells each of which has one character and a + * pointer to the next; in the last cell the pointer to next is NIL. + * + * @param c the character to add (prepend); + * @param tail the symbol which is being built. + */ +struct pso_pointer make_symbol( wint_t c, struct pso_pointer tail ) { + return make_string_like_thing( c, tail, STRINGTAG ); +} + + +/** + * Return a lisp string representation of this wide character string. + */ +struct pso_pointer c_string_to_lisp_string( wchar_t *string ) { + struct pso_pointer result = nil; + + for ( int i = wcslen( string ) - 1; i >= 0; i-- ) { + if ( iswprint( string[i] ) && string[i] != '"' ) { + result = make_string( string[i], result ); + } + } + + return result; +} + +/** + * Return a lisp keyword representation of this wide character string. In + * keywords, I am accepting only lower case characters and numbers. + */ +struct pso_pointer c_string_to_lisp_keyword( wchar_t *symbol ) { + struct pso_pointer result = nil; + + for ( int i = wcslen( symbol ) - 1; i >= 0; i-- ) { + wchar_t c = towlower( symbol[i] ); + + if ( iswalnum( c ) || c == L'-' ) { + result = make_keyword( c, result ); + } + } + + return result; +} diff --git a/src/c/ops/string_ops.h b/src/c/ops/string_ops.h new file mode 100644 index 0000000..59ce837 --- /dev/null +++ b/src/c/ops/string_ops.h @@ -0,0 +1,32 @@ +/** + * ops/string_ops.h + * + * Operations on a Lisp string. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_string_ops_h +#define __psse_ops_string_ops_h + +/* + * wide characters + */ +#include +#include + +struct pso_pointer make_string_like_thing( wint_t c, struct pso_pointer tail, + char *tag ); + +struct pso_pointer make_string( wint_t c, struct pso_pointer tail ); + +struct pso_pointer make_keyword( wint_t c, struct pso_pointer tail ); + +struct pso_pointer make_symbol( wint_t c, struct pso_pointer tail ); + +struct pso_pointer c_string_to_lisp_string( wchar_t *string ); + +struct pso_pointer c_string_to_lisp_keyword( wchar_t *symbol ); + +#endif diff --git a/src/c/ops/truth.c b/src/c/ops/truth.c new file mode 100644 index 0000000..8ffb2f5 --- /dev/null +++ b/src/c/ops/truth.c @@ -0,0 +1,104 @@ +/** + * ops/truth.c + * + * Post Scarcity Software Environment: nil? true? not. + * + * Functions associated with truthiness. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso4.h" +#include "ops/stack_ops.h" + +/** + * @brief true if `p` points to `nil`, else false. + * + * Note that every node has its own copy of `t` and `nil`, and each instance of + * each is considered equivalent. So we don't check the node when considering + * whether `nil` really is `nil`, or `t` really is `t`. + * + * @param p a pointer + * @return true if `p` points to `nil`. + * @return false otherwise. + */ +bool nilp( struct pso_pointer p ) { + return ( p.page == 0 && p.offset == 0 ); +} + +/** + * @brief Return `true` if `p` points to `nil`, else `false`. + * + * @param p a pointer + * @return true if `p` points to `nil`; + * @return false otherwise. + */ +bool not( struct pso_pointer p ) { + return !nilp( p ); +} + +/** + * @brief `true` if `p` points to `t`, else `false`. + * + * Note that every node has its own copy of `t` and `nil`, and each instance of + * each is considered equivalent. So we don't check the node when considering + * whether `nil` really is `nil`, or `t` really is `t`. + * + * @param p a pointer + * @return true if `p` points to `t`. + * @return false otherwise. + */ +bool truep( struct pso_pointer p ) { + return ( p.page == 0 && p.offset == 1 ); +} + +/** + * @brief return `t` if the first argument in this frame is `nil`, else `t`. + * + * @param frame The current stack frame; + * @param frame_pointer A pointer to the current stack frame; + * @param env the evaluation environment. + * @return `t` if the first argument in this frame is `nil`, else `t` + */ +struct pso_pointer lisp_nilp( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + + return ( nilp( fetch_arg( frame, 0 ) ) ? t : nil ); +} + +/** + * @brief return `t` if the first argument in this frame is `t`, else `nil`. + * + * @param frame The current stack frame; + * @param frame_pointer A pointer to the current stack frame; + * @param env the evaluation environment. + * @return `t` if the first argument in this frame is `t`, else `nil`. + */ +struct pso_pointer lisp_truep( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + + return ( truep( fetch_arg( frame, 0 ) ) ? t : nil ); +} + +/** + * @brief return `t` if the first argument in this frame is not `nil`, else + * `t`. + * + * @param frame The current stack frame; + * @param frame_pointer A pointer to the current stack frame; + * @param env the evaluation environment. + * @return `t` if the first argument in this frame is not `nil`, else `t`. + */ +struct pso_pointer lisp_not( struct pso_pointer frame_pointer, + struct pso_pointer env ) { + struct pso4 *frame = pointer_to_pso4( frame_pointer ); + + return ( not( fetch_arg( frame, 0 ) ) ? t : nil ); +} diff --git a/src/c/ops/truth.h b/src/c/ops/truth.h new file mode 100644 index 0000000..0fa0574 --- /dev/null +++ b/src/c/ops/truth.h @@ -0,0 +1,34 @@ +/** + * ops/truth.h + * + * Post Scarcity Software Environment: truth functions. + * + * Tests for truth. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_ops_truth_h +#define __psse_ops_truth_h +#include + +#include "memory/pointer.h" +#include "memory/pso4.h" + +bool nilp( struct pso_pointer p ); + +struct pso_pointer lisp_nilp( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +bool not( struct pso_pointer p ); + +struct pso_pointer lisp_not( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +bool truep( struct pso_pointer p ); + +struct pso_pointer lisp_truep( struct pso_pointer frame_pointer, + struct pso_pointer env ); + +#endif diff --git a/src/c/payloads/character.c b/src/c/payloads/character.c new file mode 100644 index 0000000..aa370e4 --- /dev/null +++ b/src/c/payloads/character.c @@ -0,0 +1,35 @@ +/** + * payloads/character.c + * + * A character object. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +/* + * wide characters + */ +#include +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "ops/truth.h" + +#include "payloads/character.h" + +struct pso_pointer make_character( wint_t c ) { + struct pso_pointer result = allocate( CHARACTERTAG, 2 ); + + if ( !nilp( result ) ) { + pointer_to_object( result )->payload.character.character = + ( wchar_t ) c; + } + + return result; +} diff --git a/src/c/payloads/character.h b/src/c/payloads/character.h new file mode 100644 index 0000000..355b79a --- /dev/null +++ b/src/c/payloads/character.h @@ -0,0 +1,40 @@ +/** + * payloads/character.h + * + * A character object. + * + * Note that, instead of instantiating actual character objects, it would be + * possible to reserve one special page index, outside the normal page range, + * possibly even page 0, such that a character would be represented by a + * pso_pointer {node, special_page, character_code}. The special page wouldn't + * actually have to exist. This wouldn't prevent `nil` being 'the object at + * {n, 0, 0}, since the UTF character with the index 0 is NULL, which feels + * entirely compatible. UTF 1 is 'Start of heading', which is not used by + * anything I'm aware of these days, and is canonically not NULL, so I don't + * see why we should not continue to treat {n, 0, 1} as `t`. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_character_h +#define __psse_payloads_character_h +/* + * wide characters + */ +#include +#include + + +#define CHARTAG "CHR" +#define CHARTV 5392451 + +/** + * @brief a single character, as returned by the reader. + */ +struct character_payload { + wchar_t character; +}; + +struct pso_pointer make_character( wint_t c ); +#endif diff --git a/src/c/payloads/cons.c b/src/c/payloads/cons.c new file mode 100644 index 0000000..607cca4 --- /dev/null +++ b/src/c/payloads/cons.c @@ -0,0 +1,110 @@ +/** + * payloads/cons.h + * + * A cons cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +#include "payloads/cons.h" +#include "payloads/exception.h" + +#include "ops/string_ops.h" + +/** + * @brief allocate a cons cell with this car and this cdr, and return a pointer + * to it. + * + * @param car the pointer which should form the car of this cons cell; + * @param cdr the pointer which should form the cdr of this cons cell. + * @return struct pso_pointer a pointer to the newly allocated cons cell. + */ +struct pso_pointer cons( struct pso_pointer car, struct pso_pointer cdr ) { + struct pso_pointer result = allocate( CONSTAG, 2 ); + + struct pso2 *object = pointer_to_object( result ); + object->payload.cons.car = inc_ref( car ); + object->payload.cons.cdr = inc_ref( cdr ); + + return result; +} + + +/** + * @brief return the car of this cons cell. + * + * @param cons a pointer to the cell. + * @return the car of the indicated cell. + * @exception if the pointer does not indicate a cons cell. + */ +struct pso_pointer car( struct pso_pointer cons ) { + struct pso_pointer result = nil; + struct pso2 *object = pointer_to_object( result ); + + if ( consp( cons ) ) { + result = object->payload.cons.car; + } + // TODO: else throw an exception + + return result; +} + +/** + * @brief return the cdr of this cons (or other sequence) cell. + * + * @param cons a pointer to the cell. + * @return the cdr of the indicated cell. + * @exception if the pointer does not indicate a cons cell. + */ +struct pso_pointer cdr( struct pso_pointer p ) { + struct pso_pointer result = nil; + struct pso2 *object = pointer_to_object( result ); + + switch ( get_tag_value( p ) ) { + case CONSTV: + result = object->payload.cons.cdr; + break; + case KEYTV: + case STRINGTV: + case SYMBOLTV: + result = object->payload.string.cdr; + break; + default: + result = + make_exception( cons + ( c_string_to_lisp_string + ( L"Invalid type for cdr" ), p ), nil, nil ); + break; + } + + // TODO: else throw an exception + + return result; +} + +/** + * @brief When a cons cell is freed, its car and cdr pointers must be + * decremented. + * + * Lisp calling conventions; one expected arg, the pointer to the cell to + * be destroyed. + */ +struct pso_pointer destroy_cons( struct pso_pointer fp, + struct pso_pointer env ) { + if ( stackp( fp ) ) { + struct pso4 *frame = pointer_to_pso4( fp ); + struct pso_pointer p = frame->payload.stack_frame.arg[0]; + dec_ref( car( p ) ); + dec_ref( cdr( p ) ); + } +} diff --git a/src/c/payloads/cons.h b/src/c/payloads/cons.h new file mode 100644 index 0000000..9ba768f --- /dev/null +++ b/src/c/payloads/cons.h @@ -0,0 +1,38 @@ +/** + * payloads/cons.h + * + * A cons cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_cons_h +#define __psse_payloads_cons_h +#include + +#include "memory/pointer.h" + +#define CONS_SIZE_CLASS 2 + +/** + * @brief A cons cell. + * + */ +struct cons_payload { + /** Contents of the Address Register, naturally. */ + struct pso_pointer car; + /** Contents of the Decrement Register, naturally. */ + struct pso_pointer cdr; +}; + +struct pso_pointer car( struct pso_pointer cons ); + +struct pso_pointer cdr( struct pso_pointer cons ); + +struct pso_pointer cons( struct pso_pointer car, struct pso_pointer cdr ); + +struct pso_pointer destroy_cons( struct pso_pointer fp, + struct pso_pointer env ); + +#endif diff --git a/src/c/payloads/exception.c b/src/c/payloads/exception.c new file mode 100644 index 0000000..1b38a76 --- /dev/null +++ b/src/c/payloads/exception.c @@ -0,0 +1,42 @@ +/** + * payloads/exception.c + * + * An exception; required three pointers, so use object of size class 3. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +#include "payloads/exception.h" + +struct pso_pointer make_exception( struct pso_pointer message, + struct pso_pointer frame_pointer, + struct pso_pointer cause ) { + // TODO: not yet implemented + return nil; +} + +/** + * @brief When an exception is freed, all its pointers must be decremented. + * + * Lisp calling conventions; one expected arg, the pointer to the object to + * be destroyed. + */ +struct pso_pointer destroy_exception( struct pso_pointer fp, + struct pso_pointer env ) { + if ( stackp( fp ) ) { + struct pso4 *frame = pointer_to_pso4( fp ); + struct pso_pointer p = frame->payload.stack_frame.arg[0]; + + // TODO: decrement every pointer indicated by an exception. + } + + return nil; +} diff --git a/src/c/payloads/exception.h b/src/c/payloads/exception.h new file mode 100644 index 0000000..5b865e2 --- /dev/null +++ b/src/c/payloads/exception.h @@ -0,0 +1,34 @@ +/** + * payloads/exception.h + * + * An exception; required three pointers, so use object of size class 3. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_exception_h +#define __psse_payloads_exception_h + +#include "memory/pointer.h" + +/** + * @brief An exception; required three pointers, so use object of size class 3. + */ +struct exception_payload { + /** @brief the exception message. Expected to be a string, but may be anything printable. */ + struct pso_pointer message; + /** @brief the stack frame at which the exception was thrown. */ + struct pso_pointer stack; + /** @brief the cause; expected to be another exception, or (usually) `nil`. */ + struct pso_pointer cause; +}; + +struct pso_pointer make_exception( struct pso_pointer message, + struct pso_pointer frame_pointer, + struct pso_pointer cause ); + +struct pso_pointer destroy_exception( struct pso_pointer fp, + struct pso_pointer env ); + +#endif diff --git a/src/c/payloads/free.h b/src/c/payloads/free.h new file mode 100644 index 0000000..cf4706f --- /dev/null +++ b/src/c/payloads/free.h @@ -0,0 +1,25 @@ +/** + * payloads/free.h + * + * An unassigned object, on a freelist; may be of any size class. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_free_h +#define __psse_payloads_free_h + +#include "memory/pointer.h" + +/** + * @brief An unassigned object, on a freelist; may be of any size class. + * + */ +struct free_payload { + /** the next object on the free list for my size class */ + struct pso_pointer next; +}; + + +#endif diff --git a/src/c/payloads/function.h b/src/c/payloads/function.h new file mode 100644 index 0000000..ea54051 --- /dev/null +++ b/src/c/payloads/function.h @@ -0,0 +1,41 @@ +/** + * payloads/function.h + * + * an ordinary Lisp function - one whose arguments are pre-evaluated. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_function_h +#define __psse_payloads_function_h + +#include "memory/pointer.h" +#include "memory/pso4.h" + +/** + * @brief Payload of a function cell. + * `source` points to the source from which the function was compiled, or NIL + * if it is a primitive. + * `executable` points to a function which takes a pointer to a stack frame + * (representing its stack frame) and a cons pointer (representing its + * environment) as arguments and returns a cons pointer (representing its + * result). + */ +struct function_payload { + /** + * pointer to metadata (e.g. the source from which the function was compiled). + */ + struct pso_pointer meta; + /** pointer to a function which takes a cons pointer (representing + * its argument list) and a cons pointer (representing its environment) and a + * stack frame (representing the previous stack frame) as arguments and returns + * a cons pointer (representing its result). + * \todo check this documentation is current! + */ + struct pso_pointer ( *executable ) ( struct pso4 *, + struct pso_pointer, + struct pso_pointer ); +}; + +#endif diff --git a/src/c/payloads/hashtable.h b/src/c/payloads/hashtable.h new file mode 100644 index 0000000..6cf8144 --- /dev/null +++ b/src/c/payloads/hashtable.h @@ -0,0 +1,48 @@ +/** + * payloads/hashtable.h + * + * an ordinary Lisp hashtable - one whose contents are immutable. + * + * Can sensibly sit in any pso from size class 6 upwards. However, it's often + * considered a good thing to have a prime number of buckets in a hash table. + * Our total overhead on the full object size is two words header, and, for + * hashtables, one word for the pointer to the (optional) hash function, and + * one for the number of buckets, total four. + * + * | size class | words | less overhead | nearest prime | wasted | + * | ---------- | ----- | ------------- | ------------- | ------ | + * | 5 | 32 | 28 | 23 | 5 | + * | 6 | 64 | 60 | 59 | 1 | + * | 7 | 128 | 124 | 113 | 11 | + * | 8 | 256 | 252 | 251 | 1 | + * | 9 | 512 | 508 | 503 | 5 | + * | 10 | 1024 | 1020 | 1019 | 1 | + * + * So we can fit 59 buckets into a 64 word class 6 pso, wasting one word; + * 251 buckets in a 256 word class 8 again wasting one word; 1019 in a size + * class 10, also wasting only one word. In a 32 word class 5, the best prime + * we can do is 23 buckets, wasting five words. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_hashtable_h +#define __psse_payloads_hashtable_h + +#include "memory/pointer.h" + +/** + * The payload of a hashtable. The number of buckets is assigned at run-time, + * and is stored in n_buckets. Each bucket is something ASSOC can consume: + * i.e. either an assoc list or a further hashtable. + */ +struct hashtable_payload { + struct pso_pointer hash_fn; /* function for hashing values in this hashtable, or `NIL` to use + the default hashing function */ + uint32_t n_buckets; /* number of hash buckets */ + struct pso_pointer buckets[]; /* actual hash buckets, which should be `NIL` + * or assoc lists or (possibly) further hashtables. */ +}; + +#endif diff --git a/src/c/payloads/integer.c b/src/c/payloads/integer.c new file mode 100644 index 0000000..8437a8b --- /dev/null +++ b/src/c/payloads/integer.c @@ -0,0 +1,39 @@ +/** + * payloads/integer.c + * + * An integer. Doctrine here is that we are not implementing bignum arithmetic in + * the bootstrap layer; an integer is, for now, just a 64 bit integer. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + +#include "debug.h" + +/** + * Allocate an integer cell representing this `value` and return a pso_pointer to it. + * @param value an integer value; + * @param more `nil`, or a pointer to the more significant cell(s) of this number. + * *NOTE* that if `more` is not `nil`, `value` *must not* exceed `MAX_INTEGER`. + */ +struct pso_pointer make_integer( int64_t value ) { + struct pso_pointer result = nil; + debug_print( L"Entering make_integer\n", DEBUG_ALLOC, 0 ); + + result = allocate( INTEGERTAG, 2 ); + struct pso2 *cell = pointer_to_object( result ); + cell->payload.integer.value = value; + + debug_print( L"make_integer: returning\n", DEBUG_ALLOC, 0 ); + debug_dump_object( result, DEBUG_ALLOC, 0 ); + + return result; +} diff --git a/src/c/payloads/integer.h b/src/c/payloads/integer.h new file mode 100644 index 0000000..0a391aa --- /dev/null +++ b/src/c/payloads/integer.h @@ -0,0 +1,28 @@ +/** + * payloads/integer.h + * + * An integer. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_integer_h +#define __psse_payloads_integer_h + +#include + +/** + * @brief An integer . + * + * Integers can in principal store a 128 bit value, but in practice we'll start + * promoting them to bignums when they pass the 64 bit barrier. However, that's + * in the Lisp layer, not the substrate. + */ +struct integer_payload { + __int128_t value; +}; + +struct pso_pointer make_integer( int64_t value ); + +#endif diff --git a/src/c/payloads/keyword.h b/src/c/payloads/keyword.h new file mode 100644 index 0000000..4728066 --- /dev/null +++ b/src/c/payloads/keyword.h @@ -0,0 +1,19 @@ +/** + * payloads/keyword.h + * + * A keyword cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_keyword_h +#define __psse_payloads_keyword_h + +#include "memory/pointer.h" + +/* TODO: for now, Keyword shares a payload with String, but this may change. + * Strings are of indefinite length, but keywords are really not, and might + * fit into any size class. */ + +#endif diff --git a/src/c/payloads/lambda.h b/src/c/payloads/lambda.h new file mode 100644 index 0000000..cfa9bde --- /dev/null +++ b/src/c/payloads/lambda.h @@ -0,0 +1,33 @@ +/** + * payloads/lambda.h + * + * A lambda cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_lambda_h +#define __psse_payloads_lambda_h + +#include "memory/pointer.h" + +/** + * @brief Tag for lambda cell. Lambdas are the interpretable (source) versions of functions. + * \see FUNCTIONTAG. + */ +#define LAMBDATAG "LMD" +#define LAMBDATV 4345164 + +/** + * @brief payload for lambda and nlambda cells. + */ +struct lambda_payload { + /** the arument list */ + struct pso_pointer args; + /** the body of the function to be applied to the arguments. */ + struct pso_pointer body; +}; + + +#endif diff --git a/src/c/payloads/mutex.h b/src/c/payloads/mutex.h new file mode 100644 index 0000000..5b6346f --- /dev/null +++ b/src/c/payloads/mutex.h @@ -0,0 +1,63 @@ +/** + * payloads/mutex.h + * + * A mutex (mutual exclusion lock) cell. Requires a size class 3 object. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_mutex_h +#define __psse_payloads_mutex_h + +#include + +#include "memory/pointer.h" + +/** + * @brief payload for mutex objects. + * + * NOTE that the size of `pthread_mutex_t` is variable dependent on hardware + * architecture, but the largest known size is 40 bytes (five words). + */ +struct mutex_payload { + pthread_mutex_t mutex; +}; + +struct pso_pointer make_mutex( ); + +/** + * @brief evaluates these forms within the context of a thread-safe lock. + * + * 1. wait until the specified mutex can be locked; + * 2. evaluate each of the forms sequentially in the context of that locked + * mutex; + * 3. if evaluation of any of the forms results in the throwing of an + * exception, catch the exception, unlock the mutex, and then re-throw the + * exception; + * 4. on successful completion of the evaluation of the forms, unlock the mutex + * and return the value of the last form. + * + * @param lock the lock: a mutex (MTX) object; + * @param forms a list of arbitrary Lisp forms. + * @return struct pso_pointer the result. + */ +struct pso_pointer with_lock( struct pso_pointer lock, + struct pso_pointer forms ); + +/** + * @brief as with_lock, q.v. but attempts to obtain a lock and returns an + * exception on failure + * + * 1. attempt to lock the specified mutex; + * 2. if successful, proceed as `with_lock`; + * 3. otherwise, return a specific exception which can be trapped for. + * + * @param lock the lock: a mutex (MTX) object; + * @param forms a list of arbitrary Lisp forms. + * @return struct pso_pointer the result. + */ +struct pso_pointer attempt_with_lock( struct pso_pointer lock, + struct pso_pointer forms ); + +#endif diff --git a/src/c/payloads/namespace.h b/src/c/payloads/namespace.h new file mode 100644 index 0000000..cba0112 --- /dev/null +++ b/src/c/payloads/namespace.h @@ -0,0 +1,57 @@ +/** + * payloads/namespace.h + * + * a Lisp namespace - a hashtable whose contents are mutable. + * + * Can sensibly sit in any pso from size class 6 upwards. However, it's often + * considered a good thing to have a prime number of buckets in a hash table. + * Our total overhead on the full object size is two words header, and, for + * namespaces, one word for the pointer to the (optional) hash function, + * one for the number of buckets, one for the pointer to the write ACL, one + * for the pointer to the mutex, total six. + * + * There are no really good fits until you get up to class 9, which might + * make sense for some namespaces, but it's quite large! + * + * | size class | words | less overhead | nearest prime | wasted | + * | ---------- | ----- | ------------- | ------------- | ------ | + * | 5 | 32 | 26 | 23 | 3 | + * | 6 | 64 | 58 | 53 | 5 | + * | 7 | 128 | 122 | 113 | 9 | + * | 8 | 256 | 250 | 241 | 9 | + * | 9 | 512 | 506 | 503 | 3 | + * | 10 | 1024 | 1018 | 1013 | 5 | + * + * Although it may be *better* to have prime numbers of buckets, how much + * better is it? Is a bucket with 23 slots sufficiently better than one + * with 26 slots to make up for its inevitably-longer hash buckets? + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_namespace_h +#define __psse_payloads_namespace_h + +#include "memory/pointer.h" + +/** + * The payload of a namespace. The number of buckets is assigned at run-time, + * and is stored in n_buckets. Each bucket is something ASSOC can consume: + * i.e. either an assoc list or a further namespace. + */ +struct namespace_payload { + struct pso_pointer hash_fn; /* function for hashing values in this namespace, or + * `NIL` to use the default hashing function */ + uint32_t n_buckets; /* number of hash buckets */ + uint32_t unused; /* for word alignment and possible later expansion */ + struct pso_pointer write_acl; /* it seems to me that it is likely that the + * principal difference between a hashtable and a + * namespace is that a hashtable has a write ACL + * of `NIL`, meaning not writeable by anyone */ + struct pso_pointer mutex; /* the mutex to lock when modifying this namespace. */ + struct pso_pointer buckets[]; /* actual hash buckets, which should be `NIL` + * or assoc lists or (possibly) further hashtables. */ +}; + +#endif diff --git a/src/c/payloads/nlambda.h b/src/c/payloads/nlambda.h new file mode 100644 index 0000000..d82d2e3 --- /dev/null +++ b/src/c/payloads/nlambda.h @@ -0,0 +1,17 @@ +/** + * payloads/nlambda.h + * + * A nlambda cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_nlambda_h +#define __psse_payloads_nlambda_h + +#include "memory/pointer.h" + +/* nlambda shares a payload with lambda */ + +#endif diff --git a/src/c/payloads/psse_string.c b/src/c/payloads/psse_string.c new file mode 100644 index 0000000..a75037b --- /dev/null +++ b/src/c/payloads/psse_string.c @@ -0,0 +1,45 @@ +/** + * payloads/string.c + * + * A string cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + + +#include + + /* + * wide characters + */ +#include +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +#include "ops/string_ops.h" +#include "ops/truth.h" + +/** + * @brief When an string is freed, its cdr pointer must be decremented. + * + * Lisp calling conventions; one expected arg, the pointer to the object to + * be destroyed. + */ +struct pso_pointer destroy_string( struct pso_pointer fp, + struct pso_pointer env ) { + if ( stackp( fp ) ) { + struct pso4 *frame = pointer_to_pso4( fp ); + struct pso_pointer p = frame->payload.stack_frame.arg[0]; + + dec_ref( cdr( p ) ); + } + + return nil; +} diff --git a/src/c/payloads/psse_string.h b/src/c/payloads/psse_string.h new file mode 100644 index 0000000..ea232ae --- /dev/null +++ b/src/c/payloads/psse_string.h @@ -0,0 +1,41 @@ +/** + * payloads/string.h + * + * A string cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_string_h +#define __psse_payloads_string_h +/* + * wide characters + */ +#include +#include + +#include "memory/pointer.h" + +/** + * @brief payload of a string cell. + * + * At least at first, only one UTF character will be stored in each cell. At + * present the payload of a symbol or keyword cell is identical + * to the payload of a string cell. + */ +struct string_payload { + /** the actual character stored in this cell */ + wint_t character; + /** a hash of the string value, computed at store time. */ + uint32_t hash; + /** the remainder of the string following this character. */ + struct pso_pointer cdr; +}; + +struct pso_pointer make_string( wint_t c, struct pso_pointer tail ); + +struct pso_pointer destroy_string( struct pso_pointer fp, + struct pso_pointer env ); + +#endif diff --git a/src/c/payloads/read_stream.c b/src/c/payloads/read_stream.c new file mode 100644 index 0000000..995d454 --- /dev/null +++ b/src/c/payloads/read_stream.c @@ -0,0 +1,36 @@ +/** + * payloads/read_stream.h + * + * A read stream. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include + +#include "io/fopen.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/tags.h" + + +/** + * Construct a cell which points to a stream open for reading. + * @param input the C stream to wrap. + * @param metadata a pointer to an associaton containing metadata on the stream. + * @return a pointer to the new read stream. + */ +struct pso_pointer make_read_stream( URL_FILE *input, + struct pso_pointer metadata ) { + struct pso_pointer pointer = allocate( READTAG, 2 ); + struct pso2 *cell = pointer_to_object( pointer ); + + cell->payload.stream.stream = input; + cell->payload.stream.meta = metadata; + + return pointer; +} diff --git a/src/c/payloads/read_stream.h b/src/c/payloads/read_stream.h new file mode 100644 index 0000000..1ea0adb --- /dev/null +++ b/src/c/payloads/read_stream.h @@ -0,0 +1,35 @@ +/** + * payloads/read_stream.h + * + * A read stream. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_read_stream_h +#define __psse_payloads_read_stream_h + +#include + +#include + +#include "io/fopen.h" +#include "memory/pointer.h" + +/** + * payload of a read or write stream cell. + */ +struct stream_payload { + /** the stream to read from or write to. */ + URL_FILE *stream; + /** metadata on the stream (e.g. its file attributes if a file, its HTTP + * headers if a URL, etc). Expected to be an association, or nil. Not yet + * implemented. */ + struct pso_pointer meta; +}; + +struct pso_pointer make_read_stream( URL_FILE * input, + struct pso_pointer metadata ); + +#endif diff --git a/src/c/payloads/special.h b/src/c/payloads/special.h new file mode 100644 index 0000000..5ccdb1f --- /dev/null +++ b/src/c/payloads/special.h @@ -0,0 +1,25 @@ +/** + * payloads/special.h + * + * A special form. + * + * No payload here; it would be identical to function_payload, q.v., so + * use that instead. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_special_h +#define __psse_payloads_special_h + +#include "memory/pointer.h" +#include "memory/pso4.h" + +/** + * A special form - one whose arguments are not pre-evaluated but passed as + * provided. + * \see NLAMBDATAG. + */ + +#endif diff --git a/src/c/payloads/stack.c b/src/c/payloads/stack.c new file mode 100644 index 0000000..a0591ab --- /dev/null +++ b/src/c/payloads/stack.c @@ -0,0 +1,94 @@ +/** + * payloads/stack.h + * + * a Lisp stack frame. + * + * Sits in a pso4. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include + +#include "memory/node.h" +#include "memory/pointer.h" +#include "memory/pso.h" +#include "memory/pso2.h" +#include "memory/pso4.h" +#include "memory/tags.h" + +/** + * @brief Construct a stack frame with this `previous` pointer, and arguments + * taken from the remaining arguments to this function, which should all be + * struct pso_pointer. + * + * @return a pso_pointer to the stack frame. + */ +struct pso_pointer make_frame( struct pso_pointer previous, ... ) { + va_list args; + va_start( args, previous ); + int count = va_arg( args, int ); + + struct pso_pointer frame_pointer = allocate( STACKTAG, 4 ); + struct pso4 *frame = ( struct pso4 * ) pointer_to_object( frame_pointer ); + + frame->payload.stack_frame.previous = previous; + + // I *think* the count starts with the number of args, so there are + // one fewer actual args. Need to test to verify this! + count--; + int cursor = 0; + frame->payload.stack_frame.args = count; + + for ( ; cursor < count && cursor < args_in_frame; cursor++ ) { + struct pso_pointer argument = va_arg( args, struct pso_pointer ); + + frame->payload.stack_frame.arg[cursor] = inc_ref( argument ); + } + if ( cursor < count ) { + struct pso_pointer more_args = nil; + + for ( ; cursor < count; cursor++ ) { + more_args = cons( va_arg( args, struct pso_pointer ), more_args ); + } + + // should be frame->payload.stack_frame.more = reverse( more_args), but + // we don't have reverse yet. TODO: fix. + frame->payload.stack_frame.more = more_args; + } else { + for ( ; cursor < args_in_frame; cursor++ ) { + frame->payload.stack_frame.arg[cursor] = nil; + } + } + + return frame_pointer; +} + +/** + * @brief When a stack frame is freed, all its pointers must be decremented. + * + * Lisp calling conventions; one expected arg, the pointer to the object to + * be destroyed. + */ +struct pso_pointer destroy_stack_frame( struct pso_pointer fp, + struct pso_pointer env ) { + if ( stackp( fp ) ) { + struct pso4 *frame = pointer_to_pso4( fp ); + struct pso4 *casualty = + pointer_to_pso4( frame->payload.stack_frame.arg[0] ); + + dec_ref( casualty->payload.stack_frame.previous ); + dec_ref( casualty->payload.stack_frame.function ); + dec_ref( casualty->payload.stack_frame.more ); + + for ( int i = 0; i < args_in_frame; i++ ) { + dec_ref( casualty->payload.stack_frame.arg[0] ); + } + + casualty->payload.stack_frame.args = 0; + casualty->payload.stack_frame.depth = 0; + } + + return nil; +} diff --git a/src/c/payloads/stack.h b/src/c/payloads/stack.h new file mode 100644 index 0000000..7333809 --- /dev/null +++ b/src/c/payloads/stack.h @@ -0,0 +1,45 @@ +/** + * payloads/stack.h + * + * a Lisp stack frame. + * + * Sits in a pso4. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_stack_h +#define __psse_payloads_stack_h + +#include "memory/pointer.h" + +/* + * number of arguments stored in a stack frame + */ +#define args_in_frame 8 + +/** + * A stack frame. + */ +struct stack_frame_payload { + /** the previous frame. */ + struct pso_pointer previous; + /** first 8 arument bindings. */ + struct pso_pointer arg[args_in_frame]; + /** list of any further argument bindings. */ + struct pso_pointer more; + /** the function to be called. */ + struct pso_pointer function; + /** the number of arguments provided. */ + uint32_t args; + /** the depth of the stack below this frame */ + uint32_t depth; +}; + +struct pso_pointer make_frame( struct pso_pointer previous, ... ); + +struct pso_pointer destroy_stack_frame( struct pso_pointer fp, + struct pso_pointer env ); + +#endif diff --git a/src/c/payloads/symbol.h b/src/c/payloads/symbol.h new file mode 100644 index 0000000..cddd293 --- /dev/null +++ b/src/c/payloads/symbol.h @@ -0,0 +1,19 @@ +/** + * payloads/symbol.h + * + * A symbol cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_symbol_h +#define __psse_payloads_symbol_h + +#include "memory/pointer.h" + +/* TODO: for now, Symbol shares a payload with String, but this may change. + * Strings are of indefinite length, but symbols are really not, and might + * fit into any size class. */ + +#endif diff --git a/src/c/payloads/time.h b/src/c/payloads/time.h new file mode 100644 index 0000000..d808c0e --- /dev/null +++ b/src/c/payloads/time.h @@ -0,0 +1,34 @@ +/** + * payloads/cons.h + * + * A cons cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_cons_h +#define __psse_payloads_cons_h + +#include +#include +#include + +#include "memory/pointer.h" + +/** + * @brief Tag for a time stamp. + */ +#define TIMETAG "TIM" +#define TIMETV 5065044 + +/** + * The payload of a time cell: an unsigned 128 bit value representing micro- + * seconds since the estimated date of the Big Bang (actually, for + * convenience, 14Bn years before 1st Jan 1970 (the UNIX epoch)) + */ +struct time_payload { + unsigned __int128 value; +}; + +#endif diff --git a/src/c/payloads/vector_pointer.h b/src/c/payloads/vector_pointer.h new file mode 100644 index 0000000..4be88b6 --- /dev/null +++ b/src/c/payloads/vector_pointer.h @@ -0,0 +1,42 @@ +/** + * payloads/vector_pointer.h + * + * A pointer to an object in vector space. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_vector_pointer_h +#define __psse_payloads_vector_pointer_h + +#include + +#include "memory/pointer.h" + +/** + * A pointer to an object in vector space. + */ +#define VECTORPOINTTAG "VSP" +#define VECTORPOINTTV 5264214 + +/** + * @brief payload of a vector pointer cell. + */ +struct vectorp_payload { + /** the tag of the vector-space object. NOTE that the vector space object + * should itself have the identical tag. */ + union { + /** the tag (type) of the vector-space object this cell + * points to, considered as bytes. */ + char bytes[TAGLENGTH]; + /** the tag considered as a number */ + uint32_t value; + } tag; + /** unused padding to word-align the address */ + uint32_t padding; + /** the address of the actual vector space object */ + void *address; +}; + +#endif diff --git a/src/c/payloads/write_stream.h b/src/c/payloads/write_stream.h new file mode 100644 index 0000000..d647575 --- /dev/null +++ b/src/c/payloads/write_stream.h @@ -0,0 +1,14 @@ +/** + * payloads/write_stream.h + * + * A write_stream cell. + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_payloads_write_stream_h +#define __psse_payloads_write_stream_h + +/* write stream shares a payload with /see read_streem.h */ +#endif diff --git a/src/c/psse.c b/src/c/psse.c new file mode 100644 index 0000000..b234103 --- /dev/null +++ b/src/c/psse.c @@ -0,0 +1,111 @@ + +/** + * psse.c + * + * Post Scarcity Software Environment: entry point. + * + * Start up and initialise the environement - just enough to get working + * and (ultimately) hand off to the executive. + * + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include "psse.h" +#include "io/io.h" +#include "memory/node.h" + +#include "ops/stack_ops.h" +#include "ops/truth.h" + +void print_banner( ) { + fwprintf( stdout, L"Post-Scarcity Software Environment version %s\n\n", + VERSION ); +} + +/** + * Print command line options to this `stream`. + * + * @stream the stream to print to. + */ +void print_options( FILE *stream ) { + fwprintf( stream, L"Expected options are:\n" ); + fwprintf( stream, + L"\t-d\tDump memory to standard out at end of run (copious!);\n" ); + fwprintf( stream, L"\t-h\tPrint this message and exit;\n" ); + fwprintf( stream, L"\t-p\tShow a prompt (default is no prompt);\n" ); + fwprintf( stream, + L"\t-s LIMIT\n\t\tSet the maximum stack depth to this LIMIT (int)\n" ); +#ifdef DEBUG + fwprintf( stream, + L"\t-v LEVEL\n\t\tSet verbosity to the specified level (0...512)\n" ); + fwprintf( stream, L"\t\tWhere bits are interpreted as follows:\n" ); + fwprintf( stream, L"\t\t1\tALLOC;\n" ); + fwprintf( stream, L"\t\t2\tARITH;\n" ); + fwprintf( stream, L"\t\t4\tBIND;\n" ); + fwprintf( stream, L"\t\t8\tBOOTSTRAP;\n" ); + fwprintf( stream, L"\t\t16\tEVAL;\n" ); + fwprintf( stream, L"\t\t32\tINPUT/OUTPUT;\n" ); + fwprintf( stream, L"\t\t64\tLAMBDA;\n" ); + fwprintf( stream, L"\t\t128\tREPL;\n" ); + fwprintf( stream, L"\t\t256\tSTACK;\n" ); + fwprintf( stream, L"\t\t512\tEQUAL.\n" ); +#endif +} + + + /** + * main entry point; parse command line arguments, initialise the environment, + * and enter the read-eval-print loop. + */ +int main( int argc, char *argv[] ) { + int option; + bool dump_at_end = false; + bool show_prompt = false; + char *infilename = NULL; + + setlocale( LC_ALL, "" ); + if ( initialise_io( ) != 0 ) { + fputs( "Failed to initialise I/O subsystem\n", stderr ); + exit( 1 ); + } + + while ( ( option = getopt( argc, argv, "dhi:ps:v:" ) ) != -1 ) { + switch ( option ) { + case 'd': + dump_at_end = true; + break; + case 'h': + print_banner( ); + print_options( stdout ); + exit( 0 ); + break; + case 'i': + infilename = optarg; + break; + case 'p': + show_prompt = true; + break; + case 's': + stack_limit = atoi( optarg ); + break; + case 'v': + verbosity = atoi( optarg ); + break; + default: + fwprintf( stderr, L"Unexpected option %c\n", option ); + print_options( stderr ); + exit( 1 ); + break; + } + } + + if ( nilp( initialise_node( 0 ) ) ) { + fputs( "Failed to initialise node\n", stderr ); + exit( 1 ); + } + // repl( ); + + exit( 0 ); +} diff --git a/src/c/psse.h b/src/c/psse.h new file mode 100644 index 0000000..0fe9b43 --- /dev/null +++ b/src/c/psse.h @@ -0,0 +1,30 @@ +/** + * psse.h + * + * Post Scarcity Software Environment: entry point. + * + * Start up and initialise the environement - just enough to get working + * and (ultimately) hand off to the executive. + * + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_psse_h +#define __psse_psse_h + +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "memory/memory.h" +#include "payloads/stack.h" +#include "version.h" + +#endif diff --git a/src/c/utils.c b/src/c/utils.c new file mode 100644 index 0000000..9919dbe --- /dev/null +++ b/src/c/utils.c @@ -0,0 +1,33 @@ +/* + * utils.c + * + * little generally useful functions which aren't in any way special to PSSE. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#include +#include +#include + + +int index_of( char c, const char *s ) { + int i; + + for ( i = 0; s[i] != c && s[i] != 0; i++ ); + + return s[i] == c ? i : -1; +} + +char *trim( char *s ) { + int i; + + for ( i = strlen( s ); ( isblank( s[i] ) || iscntrl( s[i] ) ) && i >= 0; + i-- ) { + s[i] = '\0'; + } + for ( i = 0; s[i] != '\0' && ( isblank( s[i] ) || iscntrl( s[i] ) ); i++ ); + + return ( char * ) &s[i]; +} diff --git a/src/c/utils.h b/src/c/utils.h new file mode 100644 index 0000000..456e4d0 --- /dev/null +++ b/src/c/utils.h @@ -0,0 +1,17 @@ +/* + * utils.h + * + * little generally useful functions which aren't in any way special to PSSE. + * + * (c) 2019 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#ifndef __psse_utils_h +#define __psse_utils_h + +int index_of( char c, const char *s ); + +char *trim( char *s ); + +#endif diff --git a/src/c/version.h b/src/c/version.h new file mode 100644 index 0000000..d6b3f2b --- /dev/null +++ b/src/c/version.h @@ -0,0 +1,11 @@ +/** + * version.h + * + * Just the version number. There's DEFINITELY a better way to do this! + * + * + * (c) 2026 Simon Brooke + * Licensed under GPL version 2.0, or, at your option, any later version. + */ + +#define VERSION "0.1.0-SNAPSHOT" diff --git a/src/sed/convert.sed b/src/sed/convert.sed new file mode 100644 index 0000000..d7d681a --- /dev/null +++ b/src/sed/convert.sed @@ -0,0 +1,17 @@ +# sed script to help converting snippets of code from 0.0.X to 0.1.X + +s?allocate_cell( *\([A-Z]*\) *)?allocate( \1, 2)?g +s?c_car(?car(?g +s?c_cdr(?cdr(?g +s?cons_pointer?pso_pointer?g +s?consspaceobject\.h?pso2\.h? +s?cons_space_object?pso2?g +s?debug_print(\([^)]*\))?debug_print(\1, 0)?g +s?frame->arg?frame->payload.stack_frame.arg?g +s?make_cons?cons?g +s?NIL?nil?g +s?nilTAG?NILTAG?g +s?&pointer2cell?pointer_to_object?g +s?stack_frame?pso4?g +s?stack\.h?pso4\.h? +s?tag.value?header.tag.bytes.value \& 0xfffff?g \ No newline at end of file diff --git a/src/zig/memory/page.zig b/src/zig/memory/page.zig new file mode 100644 index 0000000..25ff3e2 --- /dev/null +++ b/src/zig/memory/page.zig @@ -0,0 +1,9 @@ +/// A Page is an area of memory in which objects are stored. Every page has +/// a header, and every page header has common structure. The objects stored +/// on any page are all PagedObjects, q.v. and, on any given page, all the +/// objects stored on that page are of the same size. +const Page = struct { + const content = union { + const bytes = [1048576]u8; + }; +}; diff --git a/src/zig/memory/paged-space-objects.zig b/src/zig/memory/paged-space-objects.zig new file mode 100644 index 0000000..8c06f5c --- /dev/null +++ b/src/zig/memory/paged-space-objects.zig @@ -0,0 +1,17 @@ +/// Header for objects which are allocated in pages. +const PagedSpaceObjectHeader = struct { + const tag = union { + const bytes = [4]u8; + const value = u32; + }; + var count = u32; + const acl = u64; // later when we have a pointer object defined this will be substituted +}; + +const PSO4: type = struct { + const PagedSpaceObjectHeader: header; + const payload = union { + var bytes: [8]u8; + var words: [2]u64; + }; +}; diff --git a/src/zig/memory/version.zig b/src/zig/memory/version.zig new file mode 100644 index 0000000..ecf82a9 --- /dev/null +++ b/src/zig/memory/version.zig @@ -0,0 +1 @@ +const version: []const u8 = "0.1.0-SNAPSHOT"; diff --git a/unit-tests/allocation-tests/allocation-tests.ods b/unit-tests/allocation-tests/allocation-tests.ods new file mode 100644 index 0000000..32a99d6 Binary files /dev/null and b/unit-tests/allocation-tests/allocation-tests.ods differ diff --git a/unit-tests/allocation-tests/feature-2.test.tmp b/unit-tests/allocation-tests/feature-2.test.tmp new file mode 100644 index 0000000..5d198dd --- /dev/null +++ b/unit-tests/allocation-tests/feature-2.test.tmp @@ -0,0 +1,30 @@ +Case, Summary, Allocated, Deallocated, Not deallocated, Delta Allocated, Delta Deallocated, Delta Not Deallocated +"Basecase", "Allocation summary allocated 19986 deallocated 245 not deallocated 19741", 19986, 245, 19741 +"", "Allocation summary allocated 19986 deallocated 245 not deallocated 19741", 19986, 245, 19741, 0, 0, 0 +"nil", "Allocation summary allocated 20019 deallocated 253 not deallocated 19766", 20019, 253, 19766, 33, 8, 25 +"()", "Allocation summary allocated 19990 deallocated 249 not deallocated 19741", 19990, 249, 19741, 4, 4, 0 +"(quote ())", "Allocation summary allocated 20025 deallocated 247 not deallocated 19778", 20025, 247, 19778, 39, 2, 37 +"(list)", "Allocation summary allocated 20023 deallocated 257 not deallocated 19766", 20023, 257, 19766, 37, 12, 25 +"(list )", "Allocation summary allocated 20023 deallocated 257 not deallocated 19766", 20023, 257, 19766, 37, 12, 25 +"(list 1)", "Allocation summary allocated 20033 deallocated 262 not deallocated 19771", 20033, 262, 19771, 47, 17, 30 +"(list 1 1)", "Allocation summary allocated 20043 deallocated 267 not deallocated 19776", 20043, 267, 19776, 57, 22, 35 +"(list 1 1 1)", "Allocation summary allocated 20053 deallocated 272 not deallocated 19781", 20053, 272, 19781, 67, 27, 40 +"(list 1 2 3)", "Allocation summary allocated 20053 deallocated 272 not deallocated 19781", 20053, 272, 19781, 67, 27, 40 +"(+)", "Allocation summary allocated 20022 deallocated 255 not deallocated 19767", 20022, 255, 19767, 36, 10, 26 +"(+ 1)", "Allocation summary allocated 20030 deallocated 260 not deallocated 19770", 20030, 260, 19770, 44, 15, 29 +"(+ 1 1)", "Allocation summary allocated 20039 deallocated 265 not deallocated 19774", 20039, 265, 19774, 53, 20, 33 +"(+ 1 1 1)", "Allocation summary allocated 20048 deallocated 270 not deallocated 19778", 20048, 270, 19778, 62, 25, 37 +"(+ 1 2 3)", "Allocation summary allocated 20048 deallocated 270 not deallocated 19778", 20048, 270, 19778, 62, 25, 37 +"(list 'a 'a 'a)", "Allocation summary allocated 20137 deallocated 278 not deallocated 19859", 20137, 278, 19859, 151, 33, 118 +"(list 'a 'b 'c)", "Allocation summary allocated 20137 deallocated 278 not deallocated 19859", 20137, 278, 19859, 151, 33, 118 +"(list :a :b :c)", "Allocation summary allocated 20107 deallocated 260 not deallocated 19847", 20107, 260, 19847, 121, 15, 106 +"(list :aa :bb :cc)", "Allocation summary allocated 20185 deallocated 260 not deallocated 19925", 20185, 260, 19925, 199, 15, 184 +"(list :aaa :bbb :ccc)", "Allocation summary allocated 20263 deallocated 260 not deallocated 20003", 20263, 260, 20003, 277, 15, 262 +"(list :alpha :bravo :charlie)", "Allocation summary allocated 20471 deallocated 260 not deallocated 20211", 20471, 260, 20211, 485, 15, 470 +"{}", "Allocation summary allocated 19992 deallocated 251 not deallocated 19741", 19992, 251, 19741, 6, 6, 0 +"{:z 0}", "Allocation summary allocated 20029 deallocated 257 not deallocated 19772", 20029, 257, 19772, 43, 12, 31 +"{:zero 0}", "Allocation summary allocated 20107 deallocated 257 not deallocated 19850", 20107, 257, 19850, 121, 12, 109 +"{:z 0 :o 1}", "Allocation summary allocated 20066 deallocated 261 not deallocated 19805", 20066, 261, 19805, 80, 16, 64 +"{:zero 0 :one 1}", "Allocation summary allocated 20196 deallocated 263 not deallocated 19933", 20196, 263, 19933, 210, 18, 192 +"{:z 0 :o 1 :t 2}", "Allocation summary allocated 20103 deallocated 265 not deallocated 19838", 20103, 265, 19838, 117, 20, 97 +"{:zero 0 :one 1 :two 2 :three 3 :four 4 :five five :six 6 :seven 7 :eight 8 :nine 9}", "Allocation summary allocated 21164 deallocated 306 not deallocated 20858", 21164, 306, 20858, 1178, 61, 1117 diff --git a/unit-tests/allocation-tests/grep.bb b/unit-tests/allocation-tests/grep.bb new file mode 100755 index 0000000..23b187a --- /dev/null +++ b/unit-tests/allocation-tests/grep.bb @@ -0,0 +1,19 @@ +#!/home/simon/bin/bb + +(require '[clojure.java.io :as io]) +(import '[java.lang ProcessBuilder$Redirect]) + +(defn grep [input pattern] + (let [proc (-> (ProcessBuilder. ["grep" pattern]) + (.redirectOutput ProcessBuilder$Redirect/INHERIT) + (.redirectError ProcessBuilder$Redirect/INHERIT) + (.start)) + proc-input (.getOutputStream proc)] + (with-open [w (io/writer proc-input)] + (binding [*out* w] + (print input) + (flush))) + (.waitFor proc) + nil)) + +(grep "hello\nbye\n" "e") diff --git a/unit-tests/assoc.sh b/unit-tests/assoc.sh new file mode 100644 index 0000000..339c023 --- /dev/null +++ b/unit-tests/assoc.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +result=0 + +expected='1' +actual=`echo "(assoc 'foo '((foo . 1) (bar . 2) {ban 3 froboz 4 foo 5} (foobar . 6)))" | target/psse | tail -1` + + +echo -n "$0 $1: assoc list binding... " + +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + result=`echo "${result} + 1" | bc` +fi + +expected='4' +actual=`echo "(assoc 'froboz '((foo . 1) (bar . 2) {ban 3 froboz 4 foo 5} (foobar . 6)))" | target/psse | tail -1` + + +echo -n "$0 $1: hashmap binding... " + +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + result=`echo "${result} + 1" | bc` +fi + +expected='nil' +actual=`echo "(assoc 'ban '((foo . 1) (bar . 2) {ban nil froboz 4 foo 5} (foobar . 6) (ban . 7)))" | target/psse | tail -1` + + +echo -n "$0 $1: key bound to 'nil' (1)... " + +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + result=`echo "${result} + 1" | bc` +fi + +expected='nil' +actual=`echo "(assoc 'foo '((foo . nil) (bar . 2) {ban 3 froboz 4 foo 5} (foobar . 6)))" | target/psse | tail -1` + + +echo -n "$0 $1: key bound to nil (2)... " + +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + result=`echo "${result} + 1" | bc` +fi + diff --git a/unit-tests/mapcar.sh b/unit-tests/mapcar.sh new file mode 100644 index 0000000..70b41b0 --- /dev/null +++ b/unit-tests/mapcar.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +result=0 + +##################################################################### +# Create an empty map using map notation +expected='(2 3 4)' +actual=`echo "(mapcar (lambda (n) (+ n 1)) '(1 2 3))" | target/psse | tail -1` + +echo -n "$0: Mapping interpreted function across list: " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + result=1 +fi + +##################################################################### +# Create an empty map using make-map +expected='("INTR" "REAL" "RTIO" "KEYW")' +actual=`echo "(mapcar type '(1 1.0 1/2 :one))" | target/psse | tail -1` + +echo -n "$0: Mapping primitive function across list: " +if [ "${expected}" = "${actual}" ] +then + echo "OK" +else + echo "Fail: expected '${expected}', got '${actual}'" + result=1 +fi diff --git a/unit-tests/string-allocation.sh b/unit-tests/string-allocation.sh index 1790788..4e83a5c 100755 --- a/unit-tests/string-allocation.sh +++ b/unit-tests/string-allocation.sh @@ -3,9 +3,9 @@ value='"Fred"' expected="String cell: character 'F'" # set! protects "Fred" from the garbage collector. -actual=`echo "(set! x ${value})" | target/psse -d 2>&1 | grep "$expected" | sed 's/ *\(.*\) next.*$/\1/'` +actual=`echo "(set! x ${value})" | target/psse -d 2>&1 | grep "$expected" # | sed 's/ *\(.*\) next.*$/\1/'` -if [ $? -eq 0 ] +if [ "${expected}" = "${actual}" ] then echo "OK" exit 0 diff --git a/utils_src/tagvalcalc/tagvalcalc.c b/utils_src/tagvalcalc/tagvalcalc.c index 67828bd..ad2e1a9 100644 --- a/utils_src/tagvalcalc/tagvalcalc.c +++ b/utils_src/tagvalcalc/tagvalcalc.c @@ -3,7 +3,7 @@ #include #include -#define TAGLENGTH 4 +#define TAGLENGTH 3 struct dummy { union { @@ -21,6 +21,6 @@ int main( int argc, char *argv[] ) { strncpy( &buffer.tag.bytes[0], argv[i], TAGLENGTH ); - printf( "%4.4s:\t%d\n", argv[i], buffer.tag.value); + printf( "%4.4s:\t%d\n", argv[i], buffer.tag.value & 0xffffff); } } diff --git a/utils_src/tagvalcalc/tvc b/utils_src/tagvalcalc/tvc index acd850a..374be1a 100755 Binary files a/utils_src/tagvalcalc/tvc and b/utils_src/tagvalcalc/tvc differ