Compare commits

...
Sign in to create a new pull request.

25 commits

Author SHA1 Message Date
f5f8e38b91 Added a note on things to read for the compiler. 2026-04-03 11:14:39 +01:00
b6480aebd5 Converted everything to the new lisp calling convention.
Fixes #19
2026-04-01 17:11:10 +01:00
f3a26bc02e Added bind; but mainly, tactical commit before changinh lisp calling
convention
2026-04-01 16:35:06 +01:00
9eb0d3c5a0 I think read will now read integers and symbols, but it's untested.
Everything compiles.
2026-04-01 16:06:16 +01:00
cc8e96eda4 Further small changes on the way to a reader. 2026-04-01 08:50:35 +01:00
a302663b32 Well, I really made a mess with the last commit; this one sorts it out. 2026-03-31 20:09:37 +01:00
1196b3eb1d read isn't written yet, but I think all the building blocks I need for it are.
Compiles and runs; does nothing yet.
2026-03-31 20:01:01 +01:00
364d7d2c7b Compiles again, now with bootstrap-layer print implemented, but not yet tested.
To get print implemented, I also had to implement a lot of other things.
2026-03-31 15:05:44 +01:00
2b22780ccf This once again does NOT compile. I've done work on macros; they don't work yet.. 2026-03-30 21:49:08 +01:00
e3f922a8bf Added character as a first class object. Stepped through a run; it all works. 2026-03-30 13:29:26 +01:00
a8b4a6e69d My monster, it not only compiles, it now runs! 2026-03-30 11:52:41 +01:00
60921be3d4 Much more progress, still doesn't compile. 2026-03-30 09:35:34 +01:00
1ce9fbda77 Still not fixed... 2026-03-29 17:25:08 +01:00
04bf001652 Progress, but it still doesn't build. I think I'm close, now... 2026-03-29 12:03:31 +01:00
00997d3c90 Down to to compilation errors. Had to reinstate individual size-class headers. 2026-03-29 11:07:30 +01:00
cae27731b7 Huge amount of work. Does not even nearly compile, but it's nearer. 2026-03-28 23:46:14 +00:00
1afb1b9fad Added work on making namespaces threadsafe. 2026-03-28 11:56:36 +00:00
154cda8da3 Added a 'state of play' update; changed the strapline in Home.md 2026-03-26 09:20:41 +00:00
57c5fe314a Things which should have been saved before the last commit. Sigh. 2026-03-26 09:03:27 +00:00
6c4be8f283 Lots more code written, and I think most of it's OK; but it doesn't compile yet. 2026-03-26 09:01:46 +00:00
604fca3c24 Got most of the new memory architecture roughed out. 2026-03-25 11:24:33 +00:00
19d6b0df29 Firming up the roadmap for the 0.1.X prototype 2026-03-24 16:53:54 +00:00
914c35ead0 Moved legacy code into archive, ready for a new rapid(?) prototype.
I may regret doing this!
2026-03-24 16:25:09 +00:00
09051a3e63 Added an essay on the design of paged space objects; started experimenting in Zig. 2026-03-23 18:47:00 +00:00
99d4794f3b Upversioned the C source tree to '0.0.7-SNAPSHOT', but proposing to start experimental
work towards 0.1.0 in separate source trees.
2026-03-19 13:59:06 +00:00
172 changed files with 6664 additions and 40 deletions

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "munit"]
path = munit
url = https://github.com/nemequ/munit.git

View file

@ -778,7 +778,7 @@ WARN_FORMAT = "$file:$line: $text"
# messages should be written. If left blank the output is written to standard
# error (stderr).
WARN_LOGFILE = doxy.log
WARN_LOGFILE = tmp/doxy.log
#---------------------------------------------------------------------------
# Configuration options related to the input files

View file

@ -1,5 +1,5 @@
TARGET ?= target/psse
SRC_DIRS ?= ./src
SRC_DIRS ?= ./src/c
SRCS := $(shell find $(SRC_DIRS) -name *.cpp -or -name *.c -or -name *.s)
HDRS := $(shell find $(SRC_DIRS) -name *.h)
@ -8,8 +8,9 @@ DEPS := $(OBJS:.o=.d)
TESTS := $(shell find unit-tests -name *.sh)
INC_DIRS := $(shell find $(SRC_DIRS) -type d)
INC_FLAGS := $(addprefix -I,$(INC_DIRS))
# INC_DIRS := $(shell find $(SRC_DIRS) -type d)
# INC_FLAGS := $(addprefix -I,$(INC_DIRS))
INC_FLAGS := -I $(SRC_DIRS)
TMP_DIR ?= ./tmp
@ -20,13 +21,14 @@ INDENT_FLAGS := -nbad -bap -nbc -br -brf -brs -c33 -cd33 -ncdb -ce -ci4 -cli4 \
CPPFLAGS ?= $(INC_FLAGS) -MMD -MP -g -DDEBUG
LDFLAGS := -lm -lcurl
DEBUGFLAGS := -g3
GCCFLAGS := -std=gnu23
all: $(TARGET)
Debug: $(TARGET)
$(TARGET): $(OBJS) Makefile
$(CC) $(DEBUGFLAGS) $(LDFLAGS) $(OBJS) -o $@ $(LDFLAGS) $(LOADLIBES) $(LDLIBS)
$(CC) $(GCCFLAGS) $(DEBUGFLAGS) $(LDFLAGS) $(OBJS) -o $@ $(LDFLAGS) $(LOADLIBES) $(LDLIBS)
doc: $(SRCS) Makefile Doxyfile
doxygen

View file

@ -272,7 +272,7 @@ bool equal_map_map( struct cons_pointer a, struct cons_pointer b ) {
for ( struct cons_pointer i = keys_a; !nilp( i ); i = c_cdr( i ) ) {
struct cons_pointer key = c_car( i );
if ( !equal
if ( !c_equal
( hashmap_get( a, key, false ),
hashmap_get( b, key, false ) ) ) {
result = false;
@ -331,7 +331,7 @@ bool equal_vector_vector( struct cons_pointer a, struct cons_pointer b ) {
* Deep, and thus expensive, equality: true if these two objects have
* identical structure, else false.
*/
bool equal( struct cons_pointer a, struct cons_pointer b ) {
bool c_equal( struct cons_pointer a, struct cons_pointer b ) {
debug_print( L"\nequal: ", DEBUG_EQUAL );
debug_print_object( a, DEBUG_EQUAL );
debug_print( L" = ", DEBUG_EQUAL );
@ -353,8 +353,8 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) {
* structures can be of indefinite extent. It *must* be done by
* iteration (and even that is problematic) */
result =
equal( cell_a->payload.cons.car, cell_b->payload.cons.car )
&& equal( cell_a->payload.cons.cdr,
c_equal( cell_a->payload.cons.car, cell_b->payload.cons.car )
&& c_equal( cell_a->payload.cons.cdr,
cell_b->payload.cons.cdr );
break;
case KEYTV:
@ -401,7 +401,7 @@ bool equal( struct cons_pointer a, struct cons_pointer b ) {
* isn't sufficient. So we recurse at least once. */
result = ( wcsncmp( a_buff, b_buff, i ) == 0 )
&& equal( c_cdr( a ), c_cdr( b ) );
&& c_equal( c_cdr( a ), c_cdr( b ) );
}
break;
case VECTORPOINTTV:

View file

@ -31,6 +31,6 @@ bool eq( struct cons_pointer a, struct cons_pointer b );
* Deep, and thus expensive, equality: true if these two objects have
* identical structure, else false.
*/
bool equal( struct cons_pointer a, struct cons_pointer b );
bool c_equal( struct cons_pointer a, struct cons_pointer b );
#endif

View file

@ -334,7 +334,7 @@ struct cons_pointer search_store( struct cons_pointer key,
switch ( get_tag_value( entry_ptr ) ) {
case CONSTV:
if ( equal( key, c_car( entry_ptr ) ) ) {
if ( c_equal( key, c_car( entry_ptr ) ) ) {
result =
return_key ? c_car( entry_ptr )
: c_cdr( entry_ptr );
@ -441,7 +441,7 @@ struct cons_pointer internedp( struct cons_pointer key,
for ( struct cons_pointer pair = c_car( store );
eq( result, NIL ) && !nilp( pair ); pair = c_car( store ) ) {
if ( consp( pair ) ) {
if ( equal( c_car( pair ), key ) ) {
if ( c_equal( c_car( pair ), key ) ) {
// yes, this should be `eq`, but if symbols are correctly
// interned this will work efficiently, and if not it will
// still work.

View file

@ -987,7 +987,7 @@ lisp_equal( struct stack_frame *frame, struct cons_pointer frame_pointer,
if ( frame->args > 1 ) {
for ( int b = 1; ( truep( result ) ) && ( b < frame->args ); b++ ) {
result =
equal( frame->arg[0], fetch_arg( frame, b ) ) ? TRUE : NIL;
c_equal( frame->arg[0], fetch_arg( frame, b ) ) ? TRUE : NIL;
}
}

View file

@ -8,4 +8,4 @@
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#define VERSION "0.0.6-SNAPSHOT"
#define VERSION "0.0.7-SNAPSHOT"

View file

@ -0,0 +1,89 @@
# Design decisions for 0.1.0
This is a document that is likely to be revisited, probably frequently.
## Retire the 0.0.X codebase
Move the existing codebase out of the compile space altogether; it is to be
treated as a finished rapid prototype, not extended further, and code largely
not copied but learned from.
## Remain open to new substrate languages, but continue in C for now
I'm disappointed with [Zig](https://ziglang.org/). While the language
concepts are beautiful, and if it were stable it would be an excellent tool, it
isn't stable. I'm still open to build some of the 0.1.X prototype in Zig, but
it isn't the main tool.
I haven't yet evaluated [Nim](https://nim-lang.org/). I'm prejudiced against
its syntax, but, again, I'm open to using it for some of this prototype.
But for now, I will continue to work in C.
## Substrate is shallow
In the 0.0.X prototype, I tried to do too much in the substrate. I tried to
write bignums in C, and in this I failed; I would have done much better to
get a very small Lisp working well sooner, and build new features in that.
In 0.1.X the substrate will be much less feature rich, but support the creation
of novel types of data object in Lisp.
## Paged Space Objects
Paged space objects will be implemented largely in line with [this document](Paged-space-objects.md).
## Tags
Tags will continue to be 32 bit objects, which can be considered as unsigned
integer values or as four bytes. However, only the first three bytes will be
mnemonic. The fourth byte will indicate the size class of the object; where
the size class represents the allocation size, *not* the payload size. The
encoding is as in this table:
| Tag | | | Size of payload | |
| ---- | ----------- | --- | --------------- | --------------- |
| Bits | Field value | Hex | Number of words | Number of bytes |
| ---- | ----------- | --- | --------------- | --------------- |
| 0000 | 0 | 0 | 1 | 8 |
| 0001 | 1 | 1 | 2 | 16 |
| 0010 | 2 | 2 | 4 | 32 |
| 0011 | 3 | 3 | 8 | 64 |
| 0100 | 4 | 4 | 16 | 128 |
| 0101 | 5 | 5 | 32 | 256 |
| 0110 | 6 | 6 | 64 | 512 |
| 0111 | 7 | 7 | 128 | 1024 |
| 1000 | 8 | 8 | 256 | 2048 |
| 1001 | 9 | 9 | 512 | 4096 |
| 1010 | 10 | A | 1024 | 8192 |
| 1011 | 11 | B | 2048 | 16384 |
| 1100 | 12 | C | 4096 | 32768 |
| 1101 | 13 | D | 8192 | 65536 |
| 1110 | 14 | E | 16384 | 131072 |
| 1111 | 15 | F | 32768 | 262144 |
Consequently, an object of size class F will have an allocation size of 32,768 words, but a payload size of 32,766 words. This obviously means that size classes 0 and 1 will not exist, since they would not have any payload.
## Page size
Every page will be 1,048,576 bytes.
## Namespaces
Namespaces will be implemented; in addition to the root namespace, there will be at least the following namespaces:
### :bootstrap
Functions written in the substrate language, intended to be replaced for all normal purposes by functions written in Lisp which may call these bootstrap functions. Not ever available to user code.
### :substrate
Functions written in the substrate language which *may* be available to user-written code.
### :system
Functions, written either in Lisp or in the substrate language, which modify system memory in ways that only trusted and privileged users are permitted to do.
## Access control
Obviously, for this to work, access control lists must be implemented and must work.

108
docs/Compiler.md Normal file
View file

@ -0,0 +1,108 @@
# Towards a Compiler
Abdulaziz Ghuloum's paper [An Incremental Approach to Compiler Construction](https://bernsteinbear.com/assets/img/11-ghuloum.pdf) starts with the observation:
> Compilers are perceived to be magical artifacts, carefully crafted
> by the wizards, and unfathomable by the mere mortals. Books on
> compilers are better described as wizard-talk: written by and for
> a clique of all-knowing practitioners. Real-life compilers are too
> complex to serve as an educational tool. And the gap between
> real-life compilers and the educational toy compilers is too wide.
> The novice compiler writer stands puzzled facing an impenetrable
> barrier, “better write an interpreter instead.”
Well, yes. That *is* what I feel. But the thing is, I've written two Lisp interpreters (and interpreters for a few other languages into one dialect of Lisp or another) now. I still feel [imposter syndrome](https://en.wikipedia.org/wiki/Impostor_syndrome) &mdash; that my interpreters are not as good as they should be, that I haven't understood the ideas clearly enough or implemented them cleanly enough, but [Beowulf](https://git.journeyman.cc/simon/beowulf) works (and evaluates Lisp) very well; the [`0.0.6` Post Scarcity](https://git.journeyman.cc/simon/post-scarcity) prototype works, after a fashion; and, after only a week of work, the `0.1.0` Post Scarcity prototype is close to working now.
Further back in my history, the [MicroWorld rule language](https://git.journeyman.cc/simon/mw-parser) is still easily buildable and works well; and, long before that, my LemonADE adventure game writing language did work well; and KnacqTools suite of rule 'compilers,' which although not strictly speaking either interpreters or compilers in this sense were very similar technology, also worked extremely well. Interpreters &mdash; even reasonably good interpreters &mdash; are a done problem, but I have really no idea where to start building a compiler.
So why bother?
Beowulf is *mostly* written in Lisp &mdash; which is to say, it is mostly written in itself. If you check the [list of functions](https://git.journeyman.cc/simon/beowulf#functions-and-symbols-implemented), you'll see that the overwhelming majority of them are described as 'Lisp lambda functions'. This means, they're Beowulf functions written in Beowulf &mdash; and you can read the source code of them [here](https://git.journeyman.cc/simon/beowulf/src/branch/master/resources/lisp1.5.lsp).
But Post Scarcity `0.0.6` is written almost entirely in C. It never got to the point, as Beowulf did, where you could start a Lisp session, hack up a few functions, and save out your system to persistent storage to start again later with the work you'd written already incorporated. And this is mainly because I tried to do too many of the hard parts, like the sophisticated reader and bignum arithmetic, in C.
I'm not a confident C programmer. Post Scarcity `0.0.6`'s bignum arithmetic doesn't work, and I've failed to make it work. Post Scarcity `0.0.6`'s garbage collector works unacceptably poorly. My goal, in `0.1.0`, is to write far less in the substrate and far more in Lisp.
Which means, the Lisp must be as performant as possible. Which means, I think, that I need a compiler. Which means I need to learn to be (more of a) wizard.
So, where do I start? Where is my grimoire?
## Online tutorials on Lisp compilers
### Ghuloum
I've mentioned Abdulaziz Ghuloum's [An Incremental Approach to Compiler Construction](https://bernsteinbear.com/assets/img/11-ghuloum.pdf) at the top. It's PDF, of course. Why do people publish things as PDF? It makes them *so hard* to read!
However, I very much like his approach: small incremental steps. He writes mainly in Scheme, which is similar enough to Post Scarcity Lisp that it should be reasonably simple to carry over ideas; he targets what he describes as 'Intel-x86' assembler, but I don't yet know whether that means 16, 32 or 64 bit &mdash; since the paper dates from 2006 I'm guessing 32 bit. However, his method is to write a C fragment that implements a small step of his process, and then examine assembler output from GCC; that's an approach I could follow.
He uses test driven development, which should make things easy to reproduce.
He implements tail-call optimisation.
The paper is quite brief, and does not include source code; I have not found source code relating to it.
The paper contains a link to the author's home page at Indiana.edu, but that link is now dead. Archive.org has snapshots dated from [18th September 2006](https://web.archive.org/web/20060918162504/https://www.cs.indiana.edu/~aghuloum/) (the paper is dated from the 16th) to [March 10th 2011](https://web.archive.org/web/20110310092701/http://www.cs.indiana.edu/~aghuloum/). Although the lecture notes appear in both the listed snapshots, the paper itself is not in the first of them.
Ghuloum appears to have recently been teaching at the American University of Kuwait; he has a [GitHub presence](https://github.com/azizghuloum), but his Scheme compiler is not listed there. He published [a number of technical papers on Scheme](https://scholar.google.com/citations?user=5rd6dWUAAAAJ&hl=en) between 2006 and 2009, but does not appear to have published anything since.
### Healey
This blog post by [Andrew Healey](https://github.com/healeycodes), [Compiling Lisp to Bytecode and Running It](https://healeycodes.com/compiling-lisp-to-bytecode-and-running-it) is essentially 'write your own virtual machine,' which, given that I've been thinking about the ideal instruction set for the Post Scarcity processor, isn't a bad idea. [This repository](https://github.com/healeycodes/lisp-to-js) appears to be his implementation.
His code has virtually no internal documentation, and is in a language I don't even recognise (it might be Rust &mdash; it builds and tests with `cargo`); however, it's clearly written in nice small functions, and there is really surprisingly little of it. It does build, and all its tests pass.
Healey is still active on GitHub, and currently works for Vercel, an 'AI Cloud' company, apparently as a software engineer.
### Bernstein
There's a [blog series](https://bernsteinbear.com/blog/lisp/) by [Max Bernstein](https://github.com/tekknolagi) which is nicely clear. He references Ghuloum's work (and indeed the link I found to Ghuloum's paper is on his site), but builds his compiler in C. His repository for the compiler posts appears to be [this one](https://github.com/tekknolagi/ghuloum).
His code is mainly in C, with a test harness in Python. Again, his code is internally largely undocumented, but builds cleanly, and all his unit tests pass. The way he implements his unit tests is new to me, and worth studying; it's certainly better than the scrappy mess of shell scripts I used for the `0.0.X` series.
### Others
That's the list of things I've found so far that look useful to me. If I find others, I'll add them here.
## Things which inevitably make the Post Scarcity compiler different
### Tag location
Objects in Lisp have to know that they are. This is what makes it possible to compute with an 'untyped' language: the type is not encoded in the program but in the data. In most conventional Lisp systems, things are typed by having a tag. Back in the day, when we had hardware specially built to run Lisp, Lisp specific hardware often had a word size &mdash; and thus registers, and a data bus &mdash; wider than the address bus, wider by the number of bits in the tag, and stored the tag on the pointer.
Modern Lisps still, I think, mostly store the tag on the pointer, but they run on commodity hardware which doesn't have those extra bits in the word size. That means that the size of an integer, or the precision of a real, that you can store in one word of memory is much less. It also means either that they can address much less memory than other programming languages on the same hardware, because for every bit you steal out of the address bus you halve the amount of memory you can address; or else that they bit shift up every address before they fetch it.
The bit shift works if all memory objects are powers of two words wide, which, in Post Scarcity `0.1.0` they are, see [Paged Space Objects](Paged-space-objects.md); but as I am already doing the upshifting trick so that I can address more than 64 (actually 104, on the current sketch of how memory works) 'bits wide' of memory, this doesn't help me.
Consequently, in both the `0.0.X` series of prototypes and now in the `0.1.0` prototype, I have the tag in the object, not in the pointer.
#### Is that a good decision?
There's a really big inefficiency in that decision. In early versions of Java, numbers (and a few other things) were not objects, but 'primitives'. That is to say, the word of memory which, for objects, would be a pointer, is, for primitives, the actual data; and thus you can operate on it without doing an additional fetch. In modern Java, those primitives still exist, as [unboxed types](https://en.wikipedia.org/wiki/Boxing_(computer_programming)). Java can do this because it is a typed language. Every method knows the type of its arguments.
In Lisp we don't. So we either have the tag on the pointer, reducing, as I pointed out above, the number of addresses that can be addressed and the amount of data that can be stored in each object, or we have the tag on the object, meaning that (the header of) every object has to be fetched before we even know what it is, and thus how to despatch it further. And, in the Post Scarcity architecture as I conceive it now, in the case of an object which is curated on a node somewhere far distant across the hypercube and not yet in local cache, that means it has to be fetched hoppity hop across the mesh, which is extremely costly.
But, not only does Post Scarcity need a bigger tag than most Lisps in order to have user extensible types, it also needs to have an access control list on every object in order to have security between users; and, although I failed to make the reference counting garbage collector work in `0.0.X`, and although the thinking I've been doing about the 'mark but don't sweep' garbage collector may make it unnecessary, I still want to experiment with reference counting. So I need space in every header for a reference count.
So I can't really have unboxed objects, I think[^1] &mdash; at least, allowing unboxed integers, reals, and characters would need a very thorough rethink of the security model.
[^1]: except that, in compiled functions, local variables could potentially be the equivalent of unboxed. That's one of the main speed increases I hope to get from compiling.
All decisions in engineering are compromises. At present, I am content to proceed with this compromise.
### Reifying compiled functions
I don't honestly know where most modern Lisps allocate space for compiled functions, but I suspect that it's on the heap. In the `0.1.0` prototype I'm really trying to limit the use of 'raw' heap allocation, to prevent heap fragmentation, to reduce garbage collection problems. So I want to put each compiled function into a paged space object. Which means they have to be relocatable in memory.
And certainly, when a compiled function is copied from the node on which it is curated to another node where it will be cached, it will be at a different place in the memory of that node.
*(Question: should we copy only source functions across the mesh, and compile them 'just in time' on the node where they will be used? Doing that would allow each compiled function to incorporate raw pointers to every other function it called, which would greatly speed execution. However, if any of those functions were subsequently redefined, it would not update to use the new definition without recompilation.)*
I don't *think* relocatability is a problem. Lisps which use heap-allocated compiled functions and run mark and sweep garbage collectors on their heap, as I'm almost certain Portable Standard Lisp does and imagine most other conventional Lisps must, must have relocatable functions.
However, it may be. I certainly need to think about relocatability in this design.
## Conclusion
Post Scarcity's compiler won't be &mdash; can't be &mdash; a straight lift of anyone else's Lisp compiler. Post Scarcity is just inevitably a very different beast. The whole idea of a multiple instruction, multiple data, massively parallel processor is one that has not been very much explored because it is hard; and I don't have the technical or mathematical understanding to demonstrate whether, even if a Post Scarcity machine really could use four billion processor nodes petabytes of memory, it could do so efficiently.
But the compiler is doable; none of the peculiarities of the architecture is a blocker. And even if this won't be a conventional compiler, there is a great deal that can be learned from conventional compilers.

View file

@ -0,0 +1,71 @@
# Don't know, don't care
![The famous XKCD cartoon showing all modern digital infrastructure depending on a single person's spare-time project](https://imgs.xkcd.com/comics/dependency.png)
One of the key design principles of the Post Scarcity computing project since my 2006 essay, [Post Scarcity Software](Post-scarcity-software.md), has been "don't know, don't care."
The reason for this is simple. Modern computing systems are extremely complex. It is impossible for someone to be expert on every component of the system. To produce excellent work, it is necessary to specialise, to avoid being distracted by the necessary intricacies of the things on which your work depends, or of the (not yet conceived) intricacies of the work of other people which will ultimately depend on yours. It is necessary to trust.
Randal Munroe's graphic which I've used to illustrate this essay looks like a joke, but it isn't.
[Daniel Stenberg](https://en.wikipedia.org/wiki/Daniel_Stenberg) lives not in Nebraska, but in Sweden. He wrote what became [libcurl](https://curl.se/) in 1996, not 2003. He is still its primary maintainer. It pretty much is true to say that all modern digital infrastructure depends on it. It is a basic component which fetches data over a broad range of internet protocols, negotiating the appropriate security. There *are* alternatives to libcurl in (some) other software environments, but it is extremely widely used. Because it deals with security, it is critical; any vulnerability in it needs to be fixed quickly, because it has very major impact.
The current [post-scarcity software environment](https://git.journeyman.cc/simon/post-scarcity) depends on libcurl, because of course it does. You certainly use libcurl yourself, even if you don't know it. You probably used it to fetch this document, in order to read it.
I don't need to know the intricacies of URL schemae, or of Internet protocols, or of security, to the level of detail Daniel does. I've never even reviewed his code. I trust him to know what he's doing.
Daniel's not alone, of course. Linus Torvalds wrote Linux in a university dorm room in Finland; now it powers the vast majority of servers on the Internet, and the vast majority of mobile phones in the world, and, quite incidentally, a cheap Chinese camera drone I bought to film bike rides. Linux is now an enormous project with thousands of contributors, but Linus is still the person who holds it together. [Rasmus Lerdorf](https://en.wikipedia.org/wiki/Rasmus_Lerdorf), from Greenland, wrote PHP to run his personal home page (the clue is in the name); Mark Zuckerberg used PHP to write Facebook; Michel Valdrighi used PHP to write something called b/cafelog, which Matt Mullenweg further developed into WordPress.
There are thousands of others, of course; and, at the layer of hardware, on which all software depends, there are thousands of others whose names I do not even know. I'm vaguely aware of the architects of the ARM chip, but I had to look them up just now because I couldn't remember their names. I know that the ARM is at least a spiritual descendant of the 6502, but I don't know who designed that or anything of their story; and the antecedents behind that I don't know at all. The people behind all the many other chips which make up a working computer? I know nothing about them.
(In any case, if one seriously wanted to build this thing, it would be better to have custom hardware &mdash; one would probably have to have custom hardware at least for the router &mdash; and if one were to have custom hardware it would be nice if it ran something very close to Lisp right down on the silicon, as the [Symbolics Ivory](https://gwern.net/doc/cs/hardware/1987-baker.pdf) chips did; so you probably wouldn't use ARM cores at all.)
I have met and personally spoken with most of the people behind the Internet protocol stack, but I don't need to have done so in order to use it; and, indeed, the reason that [Jon Postel](https://en.wikipedia.org/wiki/Jon_Postel) bought me a beer was so that he could sit me down and very gently explain how badly I'd misunderstood something.
-----
But this is the point. We don't need to know, or have known, these people to build on their work. We don't have to, and cannot in detail, fully understand their work. There is simply too much of it, its complexity would overwhelm us.
We don't know. We don't care. And that is a protective mechanism, a mechanism which is necessary in order to allow us to focus on our own task, if we are to produce excellent work. If we are to create a meaningful contribution on which the creators of the future can build.
-----
But there is a paradox, here, one of many conceptual paradoxes that I have encountered working on the Post Scarcity project.
I am essentially a philosopher, or possibly a dilettante, rather than an engineer. When [Danny Hillis](https://longnow.org/people/board/danny0/) came up with the conception of the [Connection Machine](), a machine which is consciously one of the precursors of the post-scarcity project, he sought expert collaborators &mdash; and was so successful in doing so that [he persuaded Richard Feynman to join the project](https://longnow.org/ideas/richard-feynman-and-the-connection-machine/). I haven't recruited any collaborators. I don't have the social skills. And I don't have sufficient confidence that my idea is even good in itself.
In building the first software prototype, I realised that I don't even properly understand what it means to [intern](http://www.ai.mit.edu/projects/iiip/doc/CommonLISP/HyperSpec/Body/fun_intern.html) something. I realised that I still don't understand how in many Common Lisp implementations, for any integer number `n`, `(eq n n)` can return true. I note that in practice it *does*, but I don't understand how it's done.
In the current post scarcity prototype, it *is* true for very small values of `n`, because I cache an array of small positive integers as an optimisation hack to prevent memory churn, but that's very special case and I cannot believe that Common Lisp implementations are doing it for significantly larger numbers of integers. I note that in SBCL, two bignums of equal value are not `eq`, so presumably SBCL is doing some sort of hack similar to mine, but I do not know how it works and I *shouldn't* care.
Platonically, two instances of the same number *should be* the same object; but we do not live in a Platonic world and I don't want to. I'm perfectly happy that `eq` (which should perhaps be renamed `identical?`) should not work for numbers.
What the behaviour is of the functions that we use, at whatever layer in the stack we work, does matter. We do need to know that. But what happens under the surface in order to deliver that behaviour? We don't need to know. We don't need to care. And we shouldn't, because that way leads to runaway recursion: behind every component, there is another component, which makes other compromises with physical matter which make good engineering sense to the people who understand that component well enough to design and to maintain it.
The stack is not of infinite depth, of course. At its base is silicon, and traces of metals on silicon, and the behaviour of electrons as they interact with individual atoms in those traces. That is knowable, in principle, by someone. But there are sufficiently many layers in the stack, and sufficient complexity in each layer, that to have a good, clear, understanding of every layer is beyond the mental capacity of anyone I know, and, I believe, is generally beyond the mental capacity of any single person.
-----
But this is the point. The point is I do need to know, and do need to care, if I am to complete this project on my own; and I don't have sufficient faith in the utility of the project (or my ability to communicate that utility) that I believe that anyone else will ever care enough to contribute to it.
And I don't have the skills, or the energy, or, indeed, the remaining time, to build any of it excellently. If it is to be built, I need collaborators; but I don't have the social skills to attract collaborators, or probably to work with them; and, actually, if I did have expert collaborators there would probably be no place for me in the project, because I don't have excellence at anything.
-----
I realise that I don't even really understand what a hypercube is. I describe my architecture as a hypercube. It is a cube because it has three axes, even though each of those axes is conceptually circular. Because the axes are circular, the thing can only be approximated in three dimensional space by using links of flexible wire or glass fibres to join things which, in three dimensional topology, cannot otherwise be joined; it is therefore slightly more than three dimensional while being considerably less than four dimensional.
I *think* this is also Hillis' understanding of a hypercube, but I could be wrong on that.
Of course, my architecture could be generalised to have four, or five, or six, or more circular axes
[^1]: Could it? I'm reasonably confident that it could have *six* circular axes, but I cannot picture in my head how the grid intersections of a four-and-a-bit dimensional grid would work.
, and this would result in each node having more immediate neighbours, which would potentially speed up computation by shortening hop paths. But I cannot help feeling that with each additional axis there comes a very substantial increase in the complexity of physically routing the wires, so three-and-a-bit dimensions may be as good as you practically get.
I don't have the mathematical skill to mentally model how a computation would scale through this structure. It's more an 'if I build it I will find out whether this is computationally efficient' than an 'I have a principled idea of why this should be computationally efficient.' Intuitively, it *should be* more efficient than a [von Neumann architecture](https://en.wikipedia.org/wiki/Von_Neumann_architecture), and it's easy to give an account of how it can address (much) more memory than obvious developments of our current architectures. But I don't have a good feel of the actual time cost of copying data hoppity-hop across the structure, or the heuristics of when it will be beneficial to shard a computation between neighbours.
-----
Which brings me back to why I'm doing this. I'm doing it, principally, to quiet the noises in my brain; as an exercise in preventing my propensity for psychiatric melt-down from overwhelming me. It isn't, essentially, well-directed engineering. It is, essentially, self-prescribed therapy. There is no reason why anyone else should be interested.
Which is, actually, rather solipsistic. Not a thought I like!

View file

@ -1,6 +1,6 @@
# Post Scarcity Software Environment: general documentation
Work towards the implementation of a software system like that described in [Post Scarcity Software](https://www.journeyman.cc/blog/posts-output/2006-02-20-postscarcity-software/).
Work towards the implementation of a software system for the hardware of the deep future.
## Note on canonicity
@ -12,7 +12,7 @@ You can read about the current [state of play](State-of-play.md).
## Roadmap
There is now a [roadmap](Roadmap.md) for the project.
There is now a [roadmap](https://www.journeyman.cc/post-scarcity/html/md_workspace_2post-scarcity_2docs_2_roadmap.html) for the project.
## AWFUL WARNING 1

View file

@ -0,0 +1,141 @@
# Nodes, threads, locks and links
## The problem
Up to now, I've been building a single threaded Lisp. I haven't had to worry about who is mutating memory while I'm trying to read it. The idea that this is a mostly immutable Lisp has encouraged me to be blas&eacute; about this. But actually, it isn't entirely immutable, and that matters.
Whenever *any* new datum is created, the freelist pointers have to mutate; whenever any new value is written to any namespace, the namespace has to mutate. The freelist pointers also mutate when objects are allocated and when objects are freed.
Earlier in the design, I had the idea that in the hypercube system, each node would have a two core processor, one core doing execution &mdash; actually evaluating Lisp functions &mdash; the other handling inter-node communication. I had at one stage the idea that the memory on the node would be partitioned into fixed areas:
| Partition | Contents | Core written by |
| --------- | -------- | --------------- |
| Local cons space | Small objects curated locally | Execution |
| Local vector space | Large objects curated locally | Excecution |
| Cache cons space | Copies of small objects curated elsewhere | Communications |
| Cache vector space | Copies of large objects curated elsewhere | Communications |
So, the execution thread is chuntering merrily along, and it encounters a data item it needs to get from another node. This is intended to happen all the time: every time a function of more than one argument is evaluated, the node will seek to farm out some of the arguments to idle neighbours for evaluation. So the results will often be curated by them. My original vague idea was that the execution node would choose the argument which seemed most costly to evaluate to evaluate locally, pass off the others to neighbours, evaluate the hard one, and by the time that was done probably all the farmed out results would already be back.
The move from cons space objects to the more flexible [paged space objects](Paged-space-objects.md) doesn't really change this, in principle. There will still be a need for some objects which do not fit into pages, and will thus have to lurk in the outer darkness of vector space. Paged space should make the allocation of objects more efficient, but it doesn't change the fundamental issue
But there's an inevitable overhead to copying objects over inter-node links. Even if we have 64 bit (plus housekeeping) wide links, copying a four word object still takes four clock ticks. Of course, in the best case, we could be receiving six four word objects over the six links in those four clock ticks, but
1. The best case only applies to the node initiating a computation;
2. This ignores contention on the communication mesh consequent on hoppity-hop communications between more distant nodes.
So, even if the execution core correctly chose the most expensive argument to evaluate locally, it's quite likely that when it returns to the stack frame, some results from other nodes have still not arrived. What does it do then? Twiddle its thumbs?
It could start another thread, declare itself idle, accept a work request from a neighbour, execute that, and return to the frame to see whether its original task was ready to continue. One of the benefits of having the stack in managed space is that a single stack frame can have arbitrarily many 'next' frames, in arbitrarily many threads. This is exactly how [Interlisp](https://dl.acm.org/doi/10.1145/362375.362379) manages multitasking, after all.
If we do it like that I think we're still safe, because it can't have left any data item in a half-modified state when it switched contexts.
But nevertheless, we still have the issue of contention between the execution process and the communications process. They both need to be able to mutate freelist pointers; and they both need to be able to mutate explicitly mutable objects, which for the present is just namespaces but this will change.
We can work around the freelist problem by assigning separate freelists for each size of paged-space objects to each processor, that's just sixteen more words. But if a foreign node wants to change a value in a local namespace, then the communications process needs to be able to make that change.
Which means we have to be able to lock objects. Which is something I didn't want to have to do.
## Mutexes
It's part of the underlying philosophy of the post scarcity project that one person can't be expert in every part of the stack. I don't fully understand the subtleties of thread safe locking. In my initial draft of this essay, I was planning to reserve one bit in the tag of an object as a thread lock.
There is a well respected standard thread locking library, [`pthreads`](https://www.cs.cmu.edu/afs/cs/academic/class/15492-f07/www/pthreads.html), part of the [POSIX](https://en.wikipedia.org/wiki/POSIX) standard, which implements thread locks. The lock object it implements is called a `mutex` ('mutual exclusion'), and the size of a `mutex` is... complicated. It is declared as a union:
```c
typedef union
{
struct __pthread_mutex_s __data;
char __size[__SIZEOF_PTHREAD_MUTEX_T];
long int __align;
} pthread_mutex_t;
```
I guessed that the `long int __align` member was intended as a contract that this would be *no bigger* than a `long int`, but `long int` may mean 32 or 64 bits depending on context. The payload is clearly `__pthread_mutex_s`; so how big is that? Answer: it varies, dependent on the hardware architecture. But `__SIZEOF_PTHREAD_MUTEX_T` also varies dependent on architecture, and is defined as 40 *bytes* on 64 bit Intel machines:
```c
#ifdef __x86_64__
# if __WORDSIZE == 64
# define __SIZEOF_PTHREAD_MUTEX_T 40
...
```
The header file I have access to declares that for 32 bit Intel machines it's 32 bytes and for all non-Intel machines the size is only 24 bytes, but
1. the machines I'm working on are actually AMD, but x86 64 bit Intel architecture; and
2. I don't currently have a 64 bit ARM version of this library, and ARM is quite likely to be the architecture I would use for a hardware implementation;
So let's be cautious.
Let's also be realistic: what I'm building now is the 0.1.0 prototype, which is not planned to run on even a simulated hypercube, so it doesn't need to have locks at all. I am crossing a bridge I do not yet strictly need to cross.
## Where to put the lock?
Currently, we have namespaces implemented as hashtables (or hashmaps, if you prefer, but I appreciate that it's old fashioned). We have hashtables implemented as an array of buckets. We have buckets implemented, currently, as association lists (lists of dotted pairs), although they could later be implemented as further hashtables. We can always cons a new `(key . value)` pair onto the front of an association list; the fact that there may be a different binding of the same key further down the association list doesn't matter, except in so far as it slows further searches down that association list.
Changing the pointer to the bucket happens in one clock tick: we're writing one 64 bit word to memory over a 64 bit wide address bus. The replacement bucket can &mdash; must! &mdash; be prepared in advance. So changing the bucket is pretty much an atomic operation.
But the size of a mutex is uncertain, and **must** fit within the footprint of the namespace object.
Forty bytes is (on a 64 bit machine) five words; but, more relevantly, our `pso_pointer` object is 64 bits irrespective of hardware architecture, so forty bytes is the size of five (pointers to) buckets. This means that namespaces are no longer 'the same' as hashtables; hashtables can accommodate (at least) five more buckets within a given [paged space object](Paged-space-objects.md) size. But obviously we can &mdash; the whole paged space objects architecture is predicated on ensuring that we can &mdash; accommodate any moderately sized fixed size datum into a paged space object, so we can accommodate a mutex into the footprint of a namespace object.
Oh, but wait.
Oh, but wait, here's a more beautiful idea.
### First class mutexes
We can make the mutex a first class object in paged space in its own right.
This has a number of advantages:
1. the space we need to reserve in the namespace object is just a pointer like any other pointer, and is not implementation dependent;
2. we can change the implementation of the mutex object, if we need to do so when changing architecture, without changing the implementation of anything which relies on a mutex;
3. mutexes then become available as ordinary objects in the Lisp system, to be used by any Lisp functions which need to do thread-safe locking.
So we need a new Lisp function,
```lisp
(with-lock mutex forms...)
```
which, when called
1. waits until it can lock the specified mutex;
2. evaluates each of the forms sequentially in the context of that locked mutex;
3. if evaluation of any of the forms results in the throwing of an exception, catches the exception, unlocks the mutex, and then re-throws the exception;
4. on successful completion of the evaluation of the forms, unlocks the mutex and returns the value of the last form.
This means that I *could* write the bootstrap layer namespace handling code non-thread-safe, and then reimplement it for the user layer in Lisp, thread-safe. But it also means that users could write thread safe handlers for any new types of mutable object they need to define.
### Other types
We don't currently have any other mutable objects, but in future at least lazy objects will be mutable; we may have other things that are mutable. It doesn't seem silly to have a single consistent way to store locks, even if it will only be used in the case of a small minority of objects.
## Procedure for using the lock
### Reading namespaces
Secondly, reading from a namespace does not happen in a single clock tick, it takes quite a long time. So it's no good setting a lock bit on the namespace object itself and then immediately assuming that it's now mutable. A reading process could already have started, and be proceeding.
So what I think is, that we have a single top level function, `(::substrate:search-store key store return-key?)` (which we already sort of have in the 0.0.6 prototype, [here](https://www.journeyman.cc/post-scarcity/doc/html/intern_8c.html#a2189c0ab60e57a70adeb32aca99dbc43)). This searches a store (hashmap, namespace, association list, or hybrid association list) to find a binding for a key, and, having found that binding, then, if there is a namespace on the search path, checks whether the lock on the any namespace on the search path is set, and if it is, aborts the search and tries again; but otherwise returns either the key found (if `return-key?` is non-`nil`), or the value found otherwise.
This function implements the user-level Lisp functions `assoc`, `interned`, and `interned?`. It also implements *hashmap-in-function-position* and *keyword-in-function-position*, in so far as both of these are treated as calls to `assoc`.
### Writing namespaces
When writing to a namespace, top level function [`(::substrate:set key value store)`](https://www.journeyman.cc/post-scarcity/doc/html/intern_8c.html#af8e370c233928d41c268874a6aa5d9e2), we first try to acquire the lock on the namespace. If it is not available, we pause a short time, and try again. It it is clear, we lock it, then identify the right bucket, then cons the new `(key . value)` pair onto the front of the bucket[^1], then update the bucket pointer, and finally unlock the lock.
This function implements the user-level Lisp functions `set` and `set!`.
### Allocating/deallocating objects
When allocating a new object from a freelist... Actually, a lock on the tag of the `car` of the freelist doesn't work here. The lock has to be somewhere else. We could have a single lock for all freelists; that feels like a bad idea because it means e.g. that you can't allocate stack frames while allocating cons cells, and you're bound to get in a mess there. But actually, allocating and deallocating objects of size class 2 &mdash; cons cells, integers, other numbers, links in strings, many other small things &mdash; is going to be happening all the time, so I'm not sure that it makes much difference. Most of the contention is going to be in size class 2. Nevertheless, one lock per size class is probably not a bad idea, and doesn't take up much space.
So: one lock per freelist.
When allocating *or deallocating* objects, we first try to obtain the lock for the freelist. If it is already locked, wait and try again. If it is clear, lock it, make the necessary change to the freelist, then unlock it.
[^1]: We probably remove any older bindings of the same key from the bucket at this point, too, because it will speed later searches, but this is not critical.

View file

@ -0,0 +1,69 @@
# Paged space objects
*Antecedents for this essay:
1. [Reference counting, and the garbage collection of equal sized objects](https://www.journeyman.cc/blog/posts-output/2013-08-25-reference-counting-and-the-garbage-collection-of-equal-sized-objects/);
2. [Vector space, Pages, Mark-but-don't-sweep, and the world's slowest ever rapid prototype](https://www.journeyman.cc/blog/posts-output/2026-03-13-The-worlds-slowest-ever-rapid-prototype/).*
The post-scarcity software environment needs to store data in objects. Much of the data will be in objects which will fit in the memory footpring ot a cons cell, but some won't, and those that won't will be in a variety of sizes.
Conventionally, operating systems allocate memory as a heap. If you allocate objects of differing sizes from a heap, the heap becoms fragmented, like a [Sierpiński carpet] or [Cantor dust](https://en.wikipedia.org/wiki/Cantor_set#Cantor_dust) &mdash; there are lots of holes in it, but it becomes increasingly difficult to find a hole which will fit anything large.
If we store our objects in containers of standardised sizes, then, for each of those standardised sizes, we can maintain a freelisp of currently unused containers, from which new containers can be allocated. But we still don't want those relatively small objects floating around independently in memory, because we'll still get the fragmentation problem.
This was the initial motivation behind [cons pages](https://www.journeyman.cc/post-scarcity/html/conspage_8h.html#structcons__page). However, quite early in the development of the prototype, it became obvious that we were allocating and deallocating very many stack frames, and many hash tables, neither of which fit in the memory footprint of a cons cell; and that, going forward, it was likely that we would generate many other sorts of larger objects.
My first thought was to generalise the cons page idea, and generate pages of equal sized objects; that is, one set of pages for objects (like cons cells) with a two word payload, one for objects with a four word payload, one for objects with an eight word payload, and so on. The key idea was that each of these pages would be of equal size, so that if, say, we needed to allocate more eight word objects and there was a page for two word objects currently empty, the memory footprint could be reassigned: the hole in the carpet would be the right size.
If we have to allocate an object which needs a five word payload, it will have to be allocated as an eight word object in an eight word object page, which wastes some memory, for the lifetime of that object; but that memory can be efficiently recovered at the end of life, and the heap doesn't fragment. Any page will, at any time, be partly empty, which wastes more memory, but again, that memory can later be efficiently reused.
The potential problem is that you might end up, say, with many pages for two word objects each of which were partly empty, and have nowhere to allocate new eight word objects; and if this does prove in practice to be a problem, then a mark and sweep garbage collector &mdash; something I *really* don't want &mdash; will be needed. But that is not a problem for just now.
## Efficiently allocating pages
I cannot see how we can efficiently manage pages without each page having some housekeeping data, as every other data object in the system must have a header for housekeeping data. It may be that I am just stuck in my thinking and that the header for pages is not needed, but I *think* it is, and I am going to proceed for now as though it were.
The problem here is that, on an essentially binary machine, it makes sense to allocate things in powers of two; and, as that makes sense at the level of allocating objects in pages, so it makes sense at the level of the basic heap allocator. I'm proposing to allocate objects in standardised containers of these payload sizes:
| Tag | | | Size of payload | |
| ---- | ----------- | --- | --------------- | --------------- |
| Bits | Field value | Hex | Number of words | Number of bytes |
| ---- | ----------- | --- | --------------- | --------------- |
| 0000 | 0 | 0 | 1 | 8 |
| 0001 | 1 | 1 | 2 | 16 |
| 0010 | 2 | 2 | 4 | 32 |
| 0011 | 3 | 3 | 8 | 64 |
| 0100 | 4 | 4 | 16 | 128 |
| 0101 | 5 | 5 | 32 | 256 |
| 0110 | 6 | 6 | 64 | 512 |
| 0111 | 7 | 7 | 128 | 1024 |
| 1000 | 8 | 8 | 256 | 2048 |
| 1001 | 9 | 9 | 512 | 4096 |
| 1010 | 10 | A | 1024 | 8192 |
| 1011 | 11 | B | 2048 | 16384 |
| 1100 | 12 | C | 4096 | 32768 |
| 1101 | 13 | D | 8192 | 65536 |
| 1110 | 14 | E | 16384 | 131072 |
| 1111 | 15 | F | 32768 | 262144 |
This scheme allows me to store the allocation payload size of an object, and consequently the type of a page intended to store objects of that size, in four bits, which is pretty economic. But it's not nothing, and there's a cost to this. The irreducable minimum size of header that objects in the system need to have &mdash; in my current design &mdash; is two words. So the allocation size of an object with a payload of two words, is four words; but the allocation size of an object with a payload size of thirty two thousand, seven hundred and sixty eight words, is thirty two thousand, seven hundred and seventy words.
Why does that matter?
Well, suppose we allocate pages of a megabyte, and we take out of that megabyte a two word page header. Then we can fit 262,143 objects with a payload size of two into that page, and waste only two words. But we can fit only three objects of size 262,144 into such a page, and we waste 262,138 words, which feels bad.
When I first realised this, I thought, well, the idea was nice, but it doesn't work. There are three potential solutions, each of which feel inelegant to me:
1. We simply ignore the wasted space;
2. Given that the overwhelming majority of objects used by the system, especially of transient objects, will be of payload size two (allocation size four), we fill all 'spare' space in pages with objects of payload size two, and push them all onto the freelist of objects of payload size two;
(this feels ugly to me because it breaks the idea that all objects on a given page should be of the same size)
3. We treat the size signature of the page &mdash; that four bit value &mdash; as being related not to the payload size of the ojects to be allocated into the page, but to the allocation size; so that cons cells, with a payload size of two and thus an allocation size of four, would be allocated into pages with a size tag of 0001 and not a size tag of 0010; and we store the housekeeping data for the page itself (waves hands vaguely) somewhere else;
(this feels ugly to me because, for me, the size of an object is its payload size, and I'm deeply bothered by things foating about randomly in memory without identifying information).
There's a wee bit of autistic insistence on order in my design choices there, that I should not get hung up on. Some objects really do need allocation sizes in memory which are powers of two, but most in fact don't. Currently, the only objects which I commonly allocate and deallocate which are not cons-space objects &mdash; not objects with a payload size of two &mdash; are stack frames (current payload size 12) and hash tables (current payload size variable, but defaults to 34).
If we're storing the (encoded) allocation size of each object in the tag of the object &mdash; which I think that in the 0.1.0 prototype we will, and if every object on any given page is of the same size, which seems to me a good plan, then I'm not sure that we actually need to store any other housekeeping data on the page, because the header of every object is the same size, and the header of every object in the page holds the critical bit of housekeeping information about the page, so we can always get that value from the header of the first object in the page.
If we take these two pragmatic compromises together &mdash; that the size encoded in the tag of an object is its allocation saize not its payload size, and that the allocation size in the first object on a page is the allocation size for that page &mdash; then every page can fit an exact number of objects with no space wasted.
That's not beautiful but I think it's sensible.

View file

@ -1,17 +1,23 @@
# Roadmap
With the release of 0.0.6 close, it's time to look at a plan for the future development of the project.
With the release of 0.0.6 close, it's time to look at a plan for the future
development of the project.
I have an almost-working Lisp interpreter, which, as an interpreter, has many of the features of the language I want. It runs in one thread on one processor.
I have an almost-working Lisp interpreter, which, as an interpreter, has many
of the features of the language I want. It runs in one thread on one processor.
Given how experimental this all is, I don't think I need it to be a polished interpreter, and polished it isn't. Lots of things are broken.
Given how experimental this all is, I don't think I need it to be a polished
interpreter, and polished it isn't. Lots of things are broken.
* garbage collection is pretty broken, and I'n beginning to doubt my whole garbage collection strategy;
* garbage collection is pretty broken, and I'n beginning to doubt my whole
garbage collection strategy;
* bignums are horribly broken;
* there's something very broken in shallow-bound symbols, and that matters and wil have to be fixed;
* there's something very broken in shallow-bound symbols, and that matters
and will have to be fixed;
* there are undoubtedly many other bugs I don't know about.
However, while I will fix bugs where I can, it's good enough for other people to play with if they're mad enough, and it's time to move on.
However, while I will fix bugs where I can, it's good enough for other people
to play with if they're mad enough, and it's time to move on.
## Next major milestones
@ -50,44 +56,77 @@ So release 0.1.0, which I'll target for 1<sup>st</sup> January 2027, will
essentially be a Lisp interpreter running on the new substrate and memory
architecture, without any significant new features.
See [0.1.0 design decisions](0-1-0-design-decisions.md) for more detail.
### Simulated hypercube
There is really no point to this whole project while it remains a single thread running on a single processor. Until I can pass off computation to peer neighbours, I can't begin to understand what the right strategies are for when to do so.
There is really no point to this whole project while it remains a single thread
running on a single processor. Until I can pass off computation to peer
neighbours, I can't begin to understand what the right strategies are for when
to do so.
`cond` is explicitly sequential, since later clauses should not be executed at all if earlier ones succeed. `progn` is sort of implicitly sequential, since it's the value of the last form in the sequence which will be returned.
`cond` is explicitly sequential, since later clauses should not be executed at
all if earlier ones succeed. `progn` is sort of implicitly sequential, since
it's the value of the last form in the sequence which will be returned.
For `mapcar`, the right strategy might be to partition the list argument between each of the idle neighbours, and then reassemble the results that come bask.
For `mapcar`, the right strategy might be to partition the list argument
between each of the idle neighbours, and then reassemble the results that come
bask.
For most other things, my hunch is that you pass args which are not self-evaluating to idle neighbours, keeping (at least) one on the originating node to work on while they're busy.
For most other things, my hunch is that you pass args which are not
self-evaluating to idle neighbours, keeping (at least) one on the originating
node to work on while they're busy.
But before that can happen, we need a router on each node which can monitor concurrent traffic on six bidirectional links. I think at least initially what gets written across those links is just S-expressions.
But before that can happen, we need a router on each node which can monitor
concurrent traffic on six bidirectional links. I think at least initially what
gets written across those links is just S-expressions.
I think a working simulated hypercube is the key milestone for version 0.1.1.
I think a working simulated hypercube is the key milestone for version 0.2.0.
### Sysout, sysin, and system persistance
Doctrine is that the post scarcity computing environment doesn't have a file system, but nevertheless we need some way of making an image of a working system so that, after a catastrophic crash or a power outage, it can be brought back up to a known good state. This also really needs to be in 0.1.1.
Doctrine is that the post scarcity computing environment doesn't have a file
system, but nevertheless we need some way of making an image of a working
system so that, after a catastrophic crash or a power outage, it can be brought
back up to a known good state. This really needs to be in 0.1.1.
### Better command line experience
The current command line experience is embarrassingly poor. Recallable input history, input line editing, and a proper structure editor are all things that I will need for my comfort.
The current command line experience is embarrassingly poor. Recallable input
history, input line editing, and a proper structure editor are all things that
I will need for my comfort.
### Users, groups and ACLs
Allowing multiple users to work together within the same post scarcity computing environment while retaining security and privacy is a major goal. So working out ways for users to sign on and be authenticated, and to configure their own environment, and to set up their own access control lists on objects they create, needs to be another nearish term goal. Probably 0.1.2.
Allowing multiple users to work together within the same post scarcity
computing environment while retaining security and privacy is a major goal. So
working out ways for users to sign on and be authenticated, and to configure
their own environment, and to set up their own access control lists on objects
they create, needs to be another nearish term goal. Probably 0.1.2.
### Homogeneities, regularities, slots, migration, permeability
There are a lot of good ideas about the categorisation and organisation of data which are sketched in my original [Post scarcity software](Post-scarcity-software.md) essay which I've never really developed further because I didn't have the right software environment for them, which now I shall have. It would be good to build them.
There are a lot of good ideas about the categorisation and organisation of data
which are sketched in my original
[Post scarcity software](Post-scarcity-software.md) essay which I've never
really developed further because I didn't have the right software environment
for them, which now I shall have. It would be good to build them.
### Compiler
I do want this system to have a compiler. I do want compiled functions to be the default. And I do want to understand how to write my own compiler for a system like this. But until I know what the processor architecture of the system I'm targetting is, worrying too much about a compiler seems premature.
I do want this system to have a compiler. I do want compiled functions to be
the default. And I do want to understand how to write my own compiler for a
system like this. But until I know what the processor architecture of the
system I'm targetting is, worrying too much about a compiler seems premature.
### Graphical User Interface
Ultimately I want a graphical user interface at least as fluid and flexible as what we had on Interlisp machines 40 years ago. It's not a near term goal there.
Ultimately I want a graphical user interface at least as fluid and flexible as
what we had on Interlisp machines 40 years ago. It's not a near term goal yet.
### Real hardware
This machine would be **very** expensive to build, and there's no way I'm ever going to afford more than a sixty-four node machine. But it would be nice to have software which would run effectively on a four billion node machine, if one could ever be built. I think that has to be the target for version 1.0.0.
This machine would be **very** expensive to build, and there's no way I'm ever
going to afford more than a sixty-four node machine. But it would be nice to
have software which would run effectively on a four billion node machine, if
one could ever be built. I think that has to be the target for version 1.0.0.

View file

@ -1,5 +1,47 @@
# State of Play
## 20260331
Substrate layer `print` is written; all the building blocks for substrate
layer `read` is in place. This will read far less than the 0.0.6, but it
will be extensible with read macros *written in Lisp*, so much more flexible,
and will gradually grow to read more than the non-extensible 0.0.6 reader
was. Pleased with myself.
The new print may grow to be extensible in Lisp, as well. In fact, it will
have to!
## 20260326
Most of the memory architecture of the new prototype is now roughed out, but
in C, not in a more modern language. It doesn't compile yet.
My C is getting better... but it needed to!
## 20260323
I started an investigastion of the [Zig language](https://ziglang.org/) and
come away frustrated. It's definitely an interesting language, and *I think*
one capable of doing what I want. But in trying to learn, I checked out
someone else's [Lisp interpreter in Zig](https://github.com/cryptocode/bio).
The last commit to this project is six months ago, so fairly current; project
documentation is polished, implying the project is well advanced and by someone
competent.
It won't build.
It won't build because there are breaking changes to the build system in the
current version of Zig, and, according to helpful people on the Zig language
Discord, breaking changes in Zig versions are quite frequent.
Post-scarcity is a project which procedes slowly, and is very large indeed. I
will certainly not complete it before I die.
I don't feel unstable tools are a good choice.
I have, however, done more thinking about [Paged space objects], and think I
now have a buildable specification.
## 20260319
Right, the `member?` bug [is fixed](https://git.journeyman.cc/simon/post-scarcity/issues/11).

1
munit Submodule

@ -0,0 +1 @@
Subproject commit fbbdf1467eb0d04a6ee465def2e529e4c87f2118

24
src/c/arith/READMDE.md Normal file
View file

@ -0,0 +1,24 @@
# README: PSSE substrate arithmetic
This folder/pseudo package is to implement enough of arithmetic for bootstrap:
that is, enough that all more sophisticated arithmetic can be built on top of
it.
Ratio arithmetic will not be implemented in the substrate, but `make-ratio`
will. The signature for `make-ratio` will be:
`(make-ratio dividend divisor) => ratio`
Both divisor and dividend should be integers. If the divisor is `1` it will
return the dividend (as an integer). If the divisor is 0 it will return &infin;.
This implies we need a privileged data item representing infinity...
Bignum arithmetic will not be implemented in the substrate, but `make-bignum`
will be. The signature for `make-bignum` will be
`(make-bignum integer) => bignum`
If the integer argument is less than 64 bits, the argument will be returned
unmodified. If it is more than 64 bits, a bignum of the same value will be
returned.

138
src/c/debug.c Normal file
View file

@ -0,0 +1,138 @@
/**
* debug.c
*
* Post Scarcity Software Environment: debugging messages.
*
* Print debugging output.
*
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <stdarg.h>
#include "debug.h"
int verbosity = 0;
/**
* @brief print this debug `message` to stderr, if `verbosity` matches `level`.
*
* `verbosity` is a set of flags, see debug_print.h; so you can
* turn debugging on for only one part of the system.
*
* NOTE THAT: contrary to behaviour in the 0.0.X prototypes, a line feed is
* always printed before a debug_print message. Hopefully this will result
* in clearer formatting.
*
* @param message The message to be printed, in *wide* (32 bit) characters.
* @param level a mask for `verbosity`. If a bitwise and of `verbosity` and
* `level` is non-zero, print this `message`, else don't.
* @param indent print `indent` spaces before the message.
*/
void debug_print( wchar_t *message, int level, int indent ) {
#ifdef DEBUG
if ( level & verbosity ) {
fwide( stderr, 1 );
fputws( L"\n", stderr );
for ( int i = 0; i < indent; i++ ) {
fputws( L" ", stderr );
}
fputws( message, stderr );
}
#endif
}
void debug_print_object( struct pso_pointer object, int level, int indent ) {
// TODO: not yet implemented
}
void debug_dump_object( struct pso_pointer object, int level, int indent ) {
// TODO: not yet implemented
}
/**
* @brief print a 128 bit integer value to stderr, if `verbosity` matches `level`.
*
* `verbosity` is a set of flags, see debug_print.h; so you can
* turn debugging on for only one part of the system.
*
* stolen from https://stackoverflow.com/questions/11656241/how-to-print-uint128-t-number-using-gcc
*
* @param n the large integer to print.
* @param level a mask for `verbosity`. If a bitwise and of `verbosity` and
* `level` is non-zero, print this `message`, else don't.
*/
void debug_print_128bit( __int128_t n, int level ) {
#ifdef DEBUG
if ( level & verbosity ) {
if ( n == 0 ) {
fwprintf( stderr, L"0" );
} else {
char str[40] = { 0 }; // log10(1 << 128) + '\0'
char *s = str + sizeof( str ) - 1; // start at the end
while ( n != 0 ) {
if ( s == str )
return; // never happens
*--s = "0123456789"[n % 10]; // save last digit
n /= 10; // drop it
}
fwprintf( stderr, L"%s", s );
}
}
#endif
}
/**
* @brief print a line feed to stderr, if `verbosity` matches `level`.
*
* `verbosity` is a set of flags, see debug_print.h; so you can
* turn debugging on for only one part of the system.
*
* @param level a mask for `verbosity`. If a bitwise and of `verbosity` and
* `level` is non-zero, print this `message`, else don't.
*/
void debug_println( int level ) {
#ifdef DEBUG
if ( level & verbosity ) {
fwide( stderr, 1 );
fputws( L"\n", stderr );
}
#endif
}
/**
* @brief `wprintf` adapted for the debug logging system.
*
* Print to stderr only if `verbosity` matches `level`. All other arguments
* as for `wprintf`.
*
* @param level a mask for `verbosity`. If a bitwise and of `verbosity` and
* `level` is non-zero, print this `message`, else don't.
* @param indent print `indent` spaces before the message.
* @param format Format string in *wide characters*, but otherwise as used by
* `printf` and friends.
*
* Remaining arguments should match the slots in the format string.
*/
void debug_printf( int level, int indent, wchar_t *format, ... ) {
#ifdef DEBUG
if ( level & verbosity ) {
fwide( stderr, 1 );
fputws( L"\n", stderr );
for ( int i = 0; i < indent; i++ ) {
fputws( L" ", stderr );
}
va_list( args );
va_start( args, format );
vfwprintf( stderr, format, args );
}
#endif
}
// debug_dump_object, debug_print_binding, debug_print_exception, debug_print_object,
// not yet implemented but probably will be.

117
src/c/debug.h Normal file
View file

@ -0,0 +1,117 @@
/**
* debug.h
*
* Post Scarcity Software Environment: entry point.
*
* Print debugging output.
*
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_debug_h
#define __psse_debug_h
#include <ctype.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
#include "memory/pointer.h"
/**
* @brief Print messages debugging memory allocation.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_ALLOC 1
/**
* @brief Print messages debugging arithmetic operations.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_ARITH 2
/**
* @brief Print messages debugging symbol binding.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_BIND 4
/**
* @brief Print messages debugging bootstrapping and teardown.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_BOOTSTRAP 8
/**
* @brief Print messages debugging evaluation.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_EVAL 16
/**
* @brief Print messages debugging input/output operations.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_IO 32
/**
* @brief Print messages debugging lambda functions (interpretation).
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_LAMBDA 64
/**
* @brief Print messages debugging the read eval print loop.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_REPL 128
/**
* @brief Print messages debugging stack operations.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_STACK 256
/**
* @brief Print messages about equality tests.
*
* Flag interpretation for the value of `verbosity`, defined in `debug.c`, q.v.
*/
#define DEBUG_EQUAL 512
/**
* @brief Verbosity (and content) of debugging output
*
* Interpreted as a sequence of topic-specific flags, see above.
*/
extern int verbosity;
void debug_print( wchar_t *message, int level, int indent );
void debug_print_object( struct pso_pointer object, int level, int indent );
void debug_dump_object( struct pso_pointer object, int level, int indent );
void debug_print_128bit( __int128_t n, int level );
void debug_println( int level );
void debug_printf( int level, int indent, wchar_t *format, ... );
#endif

View file

@ -0,0 +1,36 @@
/**
* environment/environment.c
*
* Initialise a MINIMAL environment.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <stdbool.h>
#include "memory/node.h"
#include "memory/pointer.h"
/**
* @brief Flag to prevent re-initialisation.
*/
bool environment_initialised = false;
/**
* @brief Initialise a minimal environment, so that Lisp can be bootstrapped.
*
* @param node theindex of the node we are initialising.
* @return struct pso_pointer t on success, else an exception.
*/
struct pso_pointer initialise_environment( uint32_t node ) {
struct pso_pointer result = t;
if ( environment_initialised ) {
// TODO: throw an exception "Attempt to reinitialise environment"
} else {
// TODO: actually initialise it.
}
return result;
}

View file

@ -0,0 +1,15 @@
/**
* environment/environment.h
*
* Initialise a MINIMAL environment.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_environment_environment_h
#define __psse_environment_environment_h
struct pso_pointer initialise_environment( uint32_t node );
#endif

526
src/c/io/fopen.c Normal file
View file

@ -0,0 +1,526 @@
/*
* fopen.c
*
* adapted from https://curl.haxx.se/libcurl/c/fopen.html.
*
* Modifications to read/write wide character streams by
* Simon Brooke.
*
* NOTE THAT: for my purposes, I'm only interested in wide characters,
* and I always read them one character at a time.
*
* Copyright (c) 2003, 2017 Simtec Electronics
* Some portions (c) 2019 Simon Brooke <simon@journeyman.cc>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This example requires libcurl 7.9.7 or later.
*/
#include <stdio.h>
#include <string.h>
#ifndef WIN32
#include <sys/time.h>
#endif
#include <stdlib.h>
#include <errno.h>
#include <curl/curl.h>
#include "io/fopen.h"
#ifdef FOPEN_STANDALONE
CURLSH *io_share;
#else
#include "memory/pso2.h"
#include "io/io.h"
#include "utils.h"
#endif
/* exported functions */
URL_FILE *url_fopen( const char *url, const char *operation );
int url_fclose( URL_FILE * file );
int url_feof( URL_FILE * file );
size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file );
char *url_fgets( char *ptr, size_t size, URL_FILE * file );
void url_rewind( URL_FILE * file );
/* we use a global one for convenience */
static CURLM *multi_handle;
/* curl calls this routine to get more data */
static size_t write_callback( char *buffer,
size_t size, size_t nitems, void *userp ) {
char *newbuff;
size_t rembuff;
URL_FILE *url = ( URL_FILE * ) userp;
size *= nitems;
rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */
if ( size > rembuff ) {
/* not enough space in buffer */
newbuff = realloc( url->buffer, url->buffer_len + ( size - rembuff ) );
if ( newbuff == NULL ) {
fprintf( stderr, "callback buffer grow failed\n" );
size = rembuff;
} else {
/* realloc succeeded increase buffer size */
url->buffer_len += size - rembuff;
url->buffer = newbuff;
}
}
memcpy( &url->buffer[url->buffer_pos], buffer, size );
url->buffer_pos += size;
return size;
}
/* use to attempt to fill the read buffer up to requested number of bytes */
static int fill_buffer( URL_FILE *file, size_t want ) {
fd_set fdread;
fd_set fdwrite;
fd_set fdexcep;
struct timeval timeout;
int rc;
CURLMcode mc; /* curl_multi_fdset() return code */
/* only attempt to fill buffer if transactions still running and buffer
* doesn't exceed required size already
*/
if ( ( !file->still_running ) || ( file->buffer_pos > want ) )
return 0;
/* attempt to fill buffer */
do {
int maxfd = -1;
long curl_timeo = -1;
FD_ZERO( &fdread );
FD_ZERO( &fdwrite );
FD_ZERO( &fdexcep );
/* set a suitable timeout to fail on */
timeout.tv_sec = 60; /* 1 minute */
timeout.tv_usec = 0;
curl_multi_timeout( multi_handle, &curl_timeo );
if ( curl_timeo >= 0 ) {
timeout.tv_sec = curl_timeo / 1000;
if ( timeout.tv_sec > 1 )
timeout.tv_sec = 1;
else
timeout.tv_usec = ( curl_timeo % 1000 ) * 1000;
}
/* get file descriptors from the transfers */
mc = curl_multi_fdset( multi_handle, &fdread, &fdwrite, &fdexcep,
&maxfd );
if ( mc != CURLM_OK ) {
fprintf( stderr, "curl_multi_fdset() failed, code %d.\n", mc );
break;
}
/* On success the value of maxfd is guaranteed to be >= -1. We call
select(maxfd + 1, ...); specially in case of (maxfd == -1) there are
no fds ready yet so we call select(0, ...) --or Sleep() on Windows--
to sleep 100ms, which is the minimum suggested value in the
curl_multi_fdset() doc. */
if ( maxfd == -1 ) {
#ifdef _WIN32
Sleep( 100 );
rc = 0;
#else
/* Portable sleep for platforms other than Windows. */
struct timeval wait = { 0, 100 * 1000 }; /* 100ms */
rc = select( 0, NULL, NULL, NULL, &wait );
#endif
} else {
/* Note that on some platforms 'timeout' may be modified by select().
If you need access to the original value save a copy beforehand. */
rc = select( maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout );
}
switch ( rc ) {
case -1:
/* select error */
break;
case 0:
default:
/* timeout or readable/writable sockets */
curl_multi_perform( multi_handle, &file->still_running );
break;
}
} while ( file->still_running && ( file->buffer_pos < want ) );
return 1;
}
/* use to remove want bytes from the front of a files buffer */
static int use_buffer( URL_FILE *file, size_t want ) {
/* sort out buffer */
if ( ( file->buffer_pos - want ) <= 0 ) {
/* ditch buffer - write will recreate */
free( file->buffer );
file->buffer = NULL;
file->buffer_pos = 0;
file->buffer_len = 0;
} else {
/* move rest down make it available for later */
memmove( file->buffer,
&file->buffer[want], ( file->buffer_pos - want ) );
file->buffer_pos -= want;
}
return 0;
}
URL_FILE *url_fopen( const char *url, const char *operation ) {
/* this code could check for URLs or types in the 'url' and
basically use the real fopen() for standard files */
URL_FILE *file;
( void ) operation;
file = calloc( 1, sizeof( URL_FILE ) );
if ( !file )
return NULL;
file->handle.file = fopen( url, operation );
if ( file->handle.file ) {
file->type = CFTYPE_FILE; /* marked as file */
} else if ( index_of( ':', url ) > -1 ) {
file->type = CFTYPE_CURL; /* marked as URL */
file->handle.curl = curl_easy_init( );
curl_easy_setopt( file->handle.curl, CURLOPT_URL, url );
curl_easy_setopt( file->handle.curl, CURLOPT_WRITEDATA, file );
curl_easy_setopt( file->handle.curl, CURLOPT_VERBOSE, 0L );
curl_easy_setopt( file->handle.curl, CURLOPT_WRITEFUNCTION,
write_callback );
/* use the share object */
curl_easy_setopt( file->handle.curl, CURLOPT_SHARE, io_share );
if ( !multi_handle )
multi_handle = curl_multi_init( );
curl_multi_add_handle( multi_handle, file->handle.curl );
/* lets start the fetch */
curl_multi_perform( multi_handle, &file->still_running );
if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) ) {
/* if still_running is 0 now, we should return NULL */
/* make sure the easy handle is not in the multi handle anymore */
curl_multi_remove_handle( multi_handle, file->handle.curl );
/* cleanup */
curl_easy_cleanup( file->handle.curl );
free( file );
file = NULL;
}
} else {
file->type = CFTYPE_NONE;
/* not a file, and doesn't look like a URL. */
}
return file;
}
int url_fclose( URL_FILE *file ) {
int ret = 0; /* default is good return */
switch ( file->type ) {
case CFTYPE_FILE:
ret = fclose( file->handle.file ); /* passthrough */
break;
case CFTYPE_CURL:
/* make sure the easy handle is not in the multi handle anymore */
curl_multi_remove_handle( multi_handle, file->handle.curl );
/* cleanup */
curl_easy_cleanup( file->handle.curl );
break;
default: /* unknown or supported type - oh dear */
ret = EOF;
errno = EBADF;
break;
}
free( file->buffer ); /* free any allocated buffer space */
free( file );
return ret;
}
int url_feof( URL_FILE *file ) {
int ret = 0;
switch ( file->type ) {
case CFTYPE_FILE:
ret = feof( file->handle.file );
break;
case CFTYPE_CURL:
if ( ( file->buffer_pos == 0 ) && ( !file->still_running ) )
ret = 1;
break;
default: /* unknown or supported type - oh dear */
ret = -1;
errno = EBADF;
break;
}
return ret;
}
size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE *file ) {
size_t want;
switch ( file->type ) {
case CFTYPE_FILE:
want = fread( ptr, size, nmemb, file->handle.file );
break;
case CFTYPE_CURL:
want = nmemb * size;
fill_buffer( file, want );
/* check if there's data in the buffer - if not fill_buffer()
* either errored or EOF */
if ( !file->buffer_pos )
return 0;
/* ensure only available data is considered */
if ( file->buffer_pos < want )
want = file->buffer_pos;
/* xfer data to caller */
memcpy( ptr, file->buffer, want );
use_buffer( file, want );
want = want / size; /* number of items */
break;
default: /* unknown or supported type - oh dear */
want = 0;
errno = EBADF;
break;
}
return want;
}
char *url_fgets( char *ptr, size_t size, URL_FILE *file ) {
size_t want = size - 1; /* always need to leave room for zero termination */
size_t loop;
switch ( file->type ) {
case CFTYPE_FILE:
ptr = fgets( ptr, ( int ) size, file->handle.file );
break;
case CFTYPE_CURL:
fill_buffer( file, want );
/* check if there's data in the buffer - if not fill either errored or
* EOF */
if ( !file->buffer_pos )
return NULL;
/* ensure only available data is considered */
if ( file->buffer_pos < want )
want = file->buffer_pos;
/*buffer contains data */
/* look for newline or eof */
for ( loop = 0; loop < want; loop++ ) {
if ( file->buffer[loop] == '\n' ) {
want = loop + 1; /* include newline */
break;
}
}
/* xfer data to caller */
memcpy( ptr, file->buffer, want );
ptr[want] = 0; /* always null terminate */
use_buffer( file, want );
break;
default: /* unknown or supported type - oh dear */
ptr = NULL;
errno = EBADF;
break;
}
return ptr; /*success */
}
void url_rewind( URL_FILE *file ) {
switch ( file->type ) {
case CFTYPE_FILE:
rewind( file->handle.file ); /* passthrough */
break;
case CFTYPE_CURL:
/* halt transaction */
curl_multi_remove_handle( multi_handle, file->handle.curl );
/* restart */
curl_multi_add_handle( multi_handle, file->handle.curl );
/* ditch buffer - write will recreate - resets stream pos */
free( file->buffer );
file->buffer = NULL;
file->buffer_pos = 0;
file->buffer_len = 0;
break;
default: /* unknown or supported type - oh dear */
break;
}
}
#ifdef FOPEN_STANDALONE
#define FGETSFILE "fgets.test"
#define FREADFILE "fread.test"
#define REWINDFILE "rewind.test"
/* Small main program to retrieve from a url using fgets and fread saving the
* output to two test files (note the fgets method will corrupt binary files if
* they contain 0 chars */
int main( int argc, char *argv[] ) {
URL_FILE *handle;
FILE *outf;
size_t nread;
char buffer[256];
const char *url;
CURL *curl;
CURLcode res;
curl_global_init( CURL_GLOBAL_DEFAULT );
curl = curl_easy_init( );
if ( argc < 2 )
url = "http://192.168.7.3/testfile"; /* default to testurl */
else
url = argv[1]; /* use passed url */
/* copy from url line by line with fgets */
outf = fopen( FGETSFILE, "wb+" );
if ( !outf ) {
perror( "couldn't open fgets output file\n" );
return 1;
}
handle = url_fopen( url, "r" );
if ( !handle ) {
printf( "couldn't url_fopen() %s\n", url );
fclose( outf );
return 2;
}
while ( !url_feof( handle ) ) {
url_fgets( buffer, sizeof( buffer ), handle );
fwrite( buffer, 1, strlen( buffer ), outf );
}
url_fclose( handle );
fclose( outf );
/* Copy from url with fread */
outf = fopen( FREADFILE, "wb+" );
if ( !outf ) {
perror( "couldn't open fread output file\n" );
return 1;
}
handle = url_fopen( "testfile", "r" );
if ( !handle ) {
printf( "couldn't url_fopen() testfile\n" );
fclose( outf );
return 2;
}
do {
nread = url_fread( buffer, 1, sizeof( buffer ), handle );
fwrite( buffer, 1, nread, outf );
} while ( nread );
url_fclose( handle );
fclose( outf );
/* Test rewind */
outf = fopen( REWINDFILE, "wb+" );
if ( !outf ) {
perror( "couldn't open fread output file\n" );
return 1;
}
handle = url_fopen( "testfile", "r" );
if ( !handle ) {
printf( "couldn't url_fopen() testfile\n" );
fclose( outf );
return 2;
}
nread = url_fread( buffer, 1, sizeof( buffer ), handle );
fwrite( buffer, 1, nread, outf );
url_rewind( handle );
buffer[0] = '\n';
fwrite( buffer, 1, 1, outf );
nread = url_fread( buffer, 1, sizeof( buffer ), handle );
fwrite( buffer, 1, nread, outf );
url_fclose( handle );
fclose( outf );
return 0; /* all done */
}
#endif

83
src/c/io/fopen.h Normal file
View file

@ -0,0 +1,83 @@
/*
* io/fopen.h
*
* adapted from https://curl.haxx.se/libcurl/c/fopen.html.
*
*
* Modifications to read/write wide character streams by
* Simon Brooke.
*
* NOTE THAT: for my purposes, I'm only interested in wide characters,
* and I always read them one character at a time.
*
* Copyright (c) 2003, 2017 Simtec Electronics
* Some portions (c) 2019 Simon Brooke <simon@journeyman.cc>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This example requires libcurl 7.9.7 or later.
*/
#ifndef __fopen_h
#define __fopen_h
#include <curl/curl.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
#define url_fwprintf(f, ...) ((f->type = CFTYPE_FILE) ? fwprintf( f->handle.file, __VA_ARGS__) : -1)
#define url_fputws(ws, f) ((f->type = CFTYPE_FILE) ? fputws(ws, f->handle.file) : 0)
#define url_fputwc(wc, f) ((f->type = CFTYPE_FILE) ? fputwc(wc, f->handle.file) : 0)
enum fcurl_type_e {
CFTYPE_NONE = 0,
CFTYPE_FILE = 1,
CFTYPE_CURL = 2
};
struct fcurl_data {
enum fcurl_type_e type; /* type of handle */
union {
CURL *curl;
FILE *file;
} handle; /* handle */
char *buffer; /* buffer to store cached data */
size_t buffer_len; /* currently allocated buffer's length */
size_t buffer_pos; /* cursor into in buffer */
int still_running; /* Is background url fetch still in progress */
};
typedef struct fcurl_data URL_FILE;
/* exported functions */
URL_FILE *url_fopen( const char *url, const char *operation );
int url_fclose( URL_FILE * file );
int url_feof( URL_FILE * file );
size_t url_fread( void *ptr, size_t size, size_t nmemb, URL_FILE * file );
char *url_fgets( char *ptr, size_t size, URL_FILE * file );
void url_rewind( URL_FILE * file );
#endif

620
src/c/io/io.c Normal file
View file

@ -0,0 +1,620 @@
/*
* io.c
*
* Communication between PSSE and the outside world, via libcurl. NOTE
* that this file destructively changes metadata on URL connections,
* because the metadata is not available until the stream has been read
* from. It would be better to find a workaround!
*
* (c) 2019 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <grp.h>
#include <langinfo.h>
#include <pwd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <uuid/uuid.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
#include <curl/curl.h>
//#include "arith/integer.h"
#include "debug.h"
#include "io/fopen.h"
#include "io/io.h"
#include "memory/node.h"
#include "memory/pointer.h"
#include "memory/pso2.h"
#include "memory/pso4.h"
#include "memory/tags.h"
// #include "ops/intern.h"
// #include "ops/lispops.h"
#include "ops/stack_ops.h"
#include "ops/string_ops.h"
#include "ops/truth.h"
#include "payloads/character.h"
#include "payloads/cons.h"
#include "payloads/exception.h"
#include "payloads/integer.h"
#include "payloads/stack.h"
#include "utils.h"
/**
* The sharing hub for all connections. TODO: Ultimately this probably doesn't
* work for a multi-user environment and we will need one sharing hub for each
* user, or else we will need to not share at least cookies and ssl sessions.
*/
CURLSH *io_share;
/**
* @brief bound to the Lisp string representing C_IO_IN in initialisation.
*/
struct pso_pointer lisp_io_in;
/**
* @brief bound to the Lisp string representing C_IO_OUT in initialisation.
*/
struct pso_pointer lisp_io_out;
/**
* Allow a one-character unget facility. This may not be enough - we may need
* to allocate a buffer.
*/
wint_t ungotten = 0;
/**
* Initialise the I/O subsystem.
*
* @return 0 on success; any other value means failure.
*/
int initialise_io( ) {
int result = curl_global_init( CURL_GLOBAL_SSL );
io_share = curl_share_init( );
if ( result == 0 ) {
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT );
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE );
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS );
curl_share_setopt( io_share, CURLSHOPT_SHARE,
CURL_LOCK_DATA_SSL_SESSION );
curl_share_setopt( io_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_PSL );
}
return result;
}
/**
* Convert this lisp string-like-thing (also works for symbols, and, later
* keywords) into a UTF-8 string. NOTE that the returned value has been
* malloced and must be freed. TODO: candidate to moving into a utilities
* file.
*
* @param s the lisp string or symbol;
* @return the c string.
*/
char *lisp_string_to_c_string( struct pso_pointer s ) {
char *result = NULL;
if ( stringp( s ) || symbolp( s ) ) {
int len = 0;
for ( struct pso_pointer c = s; !nilp( c ); c = cdr( c ) ) {
len++;
}
wchar_t *buffer = calloc( len + 1, sizeof( wchar_t ) );
/* worst case, one wide char = four utf bytes */
result = calloc( ( len * 4 ) + 1, sizeof( char ) );
int i = 0;
for ( struct pso_pointer c = s; !nilp( c ); c = cdr( c ) ) {
buffer[i++] = pointer_to_object( c )->payload.string.character;
}
wcstombs( result, buffer, len );
free( buffer );
}
debug_print( L"lisp_string_to_c_string( ", DEBUG_IO, 0 );
debug_print_object( s, DEBUG_IO, 0 );
debug_printf( DEBUG_IO, 0, L") => '%s'\n", result );
return result;
}
/**
* given this file handle f, return a new url_file handle wrapping it.
*
* @param f the file to be wrapped;
* @return the new handle, or null if no such handle could be allocated.
*/
URL_FILE *file_to_url_file( FILE *f ) {
URL_FILE *result = ( URL_FILE * ) malloc( sizeof( URL_FILE ) );
if ( result != NULL ) {
result->type = CFTYPE_FILE, result->handle.file = f;
}
return result;
}
/**
* get one wide character from the buffer.
*
* @param file the stream to read from;
* @return the next wide character on the stream, or zero if no more.
*/
wint_t url_fgetwc( URL_FILE *input ) {
wint_t result = -1;
if ( ungotten != 0 ) {
/* TODO: not thread safe */
result = ungotten;
ungotten = 0;
} else {
switch ( input->type ) {
case CFTYPE_FILE:
fwide( input->handle.file, 1 ); /* wide characters */
result = fgetwc( input->handle.file ); /* passthrough */
break;
case CFTYPE_CURL:{
char *cbuff =
calloc( sizeof( wchar_t ) + 2, sizeof( char ) );
wchar_t *wbuff = calloc( 2, sizeof( wchar_t ) );
size_t count = 0;
debug_print( L"url_fgetwc: about to call url_fgets\n",
DEBUG_IO, 0 );
url_fgets( cbuff, 2, input );
debug_print( L"url_fgetwc: back from url_fgets\n",
DEBUG_IO, 0 );
int c = ( int ) cbuff[0];
// TODO: risk of reading off cbuff?
debug_printf( DEBUG_IO, 0,
L"url_fgetwc: cbuff is '%s'; (first) character = %d (%c)\n",
cbuff, c, c & 0xf7 );
/* The value of each individual byte indicates its UTF-8 function, as follows:
*
* 00 to 7F hex (0 to 127): first and only byte of a sequence.
* 80 to BF hex (128 to 191): continuing byte in a multi-byte sequence.
* C2 to DF hex (194 to 223): first byte of a two-byte sequence.
* E0 to EF hex (224 to 239): first byte of a three-byte sequence.
* F0 to FF hex (240 to 255): first byte of a four-byte sequence.
*/
if ( c <= 0xf7 ) {
count = 1;
} else if ( c >= 0xc2 && c <= 0xdf ) {
count = 2;
} else if ( c >= 0xe0 && c <= 0xef ) {
count = 3;
} else if ( c >= 0xf0 && c <= 0xff ) {
count = 4;
}
if ( count > 1 ) {
url_fgets( ( char * ) &cbuff[1], count, input );
}
mbstowcs( wbuff, cbuff, 2 ); //(char *)(&input->buffer[input->buffer_pos]), 1 );
result = wbuff[0];
free( wbuff );
free( cbuff );
}
break;
case CFTYPE_NONE:
break;
}
}
debug_printf( DEBUG_IO, 0, L"url_fgetwc returning %d (%C)\n", result,
result );
return result;
}
wint_t url_ungetwc( wint_t wc, URL_FILE *input ) {
wint_t result = -1;
switch ( input->type ) {
case CFTYPE_FILE:
fwide( input->handle.file, 1 ); /* wide characters */
result = ungetwc( wc, input->handle.file ); /* passthrough */
break;
case CFTYPE_CURL:{
ungotten = wc;
break;
case CFTYPE_NONE:
break;
}
}
return result;
}
/**
* @brief Read one character object from this `read_stream`.
*
* @param read_stream a pointer to an object which should be a read stream
* object,
*
* @return a pointer to a character object on success, or `nil` on failure.
*/
struct pso_pointer get_character( struct pso_pointer read_stream ) {
struct pso_pointer result = nil;
if ( readp( read_stream ) ) {
result =
make_character( url_fgetwc
( pointer_to_object_of_size_class
( read_stream, 2 )->payload.stream.stream ) );
}
return result;
}
/**
* @brief Push back this character `c` onto this read stream `r`.
*
* @param c a pointer to an object which should be a character object;
* @param r a pointer to an object which should be a read stream object,
*
* @return `t` on success, else `nil`.
*/
struct pso_pointer push_back_character( struct pso_pointer c,
struct pso_pointer r ) {
struct pso_pointer result = nil;
if ( characterp( c ) && readp( r ) ) {
if ( url_ungetwc( ( wint_t )
( pointer_to_object( c )->payload.character.
character ),
pointer_to_object( r )->payload.stream.stream ) >=
0 ) {
result = t;
}
}
return result;
}
/**
* Function, sort-of: close the file indicated by my first arg, and return
* nil. If the first arg is not a stream, does nothing. All other args are
* ignored.
*
* * (close stream)
*
* @param frame my stack frame.
* @param frame_pointer a pointer to my stack frame.
* @param env my environment.
* @return T if the stream was successfully closed, else nil.
*/
struct pso_pointer
lisp_close( struct pso_pointer frame_pointer, struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer result = nil;
if ( readp( fetch_arg( frame, 0 ) ) || writep( fetch_arg( frame, 0 ) ) ) {
if ( url_fclose
( pointer_to_object( fetch_arg( frame, 0 ) )->payload.
stream.stream )
== 0 ) {
result = t;
}
}
return result;
}
struct pso_pointer add_meta_integer( struct pso_pointer meta, wchar_t *key,
long int value ) {
return
cons( cons
( c_string_to_lisp_keyword( key ),
make_integer( value ) ), meta );
}
struct pso_pointer add_meta_string( struct pso_pointer meta, wchar_t *key,
char *value ) {
value = trim( value );
wchar_t buffer[strlen( value ) + 1];
mbstowcs( buffer, value, strlen( value ) + 1 );
return cons( cons( c_string_to_lisp_keyword( key ),
c_string_to_lisp_string( buffer ) ), meta );
}
struct pso_pointer add_meta_time( struct pso_pointer meta, wchar_t *key,
time_t *value ) {
/* I don't yet have a concept of a date-time object, which is a
* bit of an oversight! */
char datestring[256];
strftime( datestring,
sizeof( datestring ),
nl_langinfo( D_T_FMT ), localtime( value ) );
return add_meta_string( meta, key, datestring );
}
/**
* Callback to assemble metadata for a URL stream. This is naughty because
* it modifies data, but it's really the only way to create metadata.
*/
static size_t write_meta_callback( char *string, size_t size, size_t nmemb,
struct pso_pointer stream ) {
struct pso2 *cell = pointer_to_object( stream );
// TODO: reimplement
/* make a copy of the string that we can destructively change */
// char *s = calloc( strlen( string ), sizeof( char ) );
// strcpy( s, string );
// if ( check_tag( cell, READTV) ||
// check_tag( cell, WRITETV) ) {
// int offset = index_of( ':', s );
// if ( offset != -1 ) {
// s[offset] = ( char ) 0;
// char *name = trim( s );
// char *value = trim( &s[++offset] );
// wchar_t wname[strlen( name )];
// mbstowcs( wname, name, strlen( name ) + 1 );
// cell->payload.stream.meta =
// add_meta_string( cell->payload.stream.meta, wname, value );
// debug_printf( DEBUG_IO,
// L"write_meta_callback: added header '%s': value '%s'\n",
// name, value );
// } else if ( strncmp( "HTTP", s, 4 ) == 0 ) {
// int offset = index_of( ' ', s );
// char *value = trim( &s[offset] );
// cell->payload.stream.meta =
// add_meta_integer( add_meta_string
// ( cell->payload.stream.meta, L"status",
// value ), L"status-code", strtol( value,
// NULL,
// 10 ) );
// debug_printf( DEBUG_IO,
// L"write_meta_callback: added header 'status': value '%s'\n",
// value );
// } else {
// debug_printf( DEBUG_IO,
// L"write_meta_callback: header passed with no colon: '%s'\n",
// s );
// }
// } else {
// debug_print
// ( L"Pointer passed to write_meta_callback did not point to a stream: ",
// DEBUG_IO );
// debug_dump_object( stream, DEBUG_IO );
// }
// free( s );
return 0; // strlen( string );
}
void collect_meta( struct pso_pointer stream, char *url ) {
struct pso2 *cell = pointer_to_object( stream );
URL_FILE *s = pointer_to_object( stream )->payload.stream.stream;
struct pso_pointer meta =
add_meta_string( cell->payload.stream.meta, L"url", url );
struct stat statbuf;
int result = stat( url, &statbuf );
struct passwd *pwd;
struct group *grp;
switch ( s->type ) {
case CFTYPE_NONE:
break;
case CFTYPE_FILE:
if ( result == 0 ) {
if ( ( pwd = getpwuid( statbuf.st_uid ) ) != NULL ) {
meta = add_meta_string( meta, L"owner", pwd->pw_name );
} else {
meta = add_meta_integer( meta, L"owner", statbuf.st_uid );
}
if ( ( grp = getgrgid( statbuf.st_gid ) ) != NULL ) {
meta = add_meta_string( meta, L"group", grp->gr_name );
} else {
meta = add_meta_integer( meta, L"group", statbuf.st_gid );
}
meta =
add_meta_integer( meta, L"size",
( intmax_t ) statbuf.st_size );
meta = add_meta_time( meta, L"modified", &statbuf.st_mtime );
}
break;
case CFTYPE_CURL:
curl_easy_setopt( s->handle.curl, CURLOPT_VERBOSE, 1L );
curl_easy_setopt( s->handle.curl, CURLOPT_HEADERFUNCTION,
write_meta_callback );
curl_easy_setopt( s->handle.curl, CURLOPT_HEADERDATA, stream );
break;
}
/* this is destructive change before the cell is released into the
* wild, and consequently permissible, just. */
cell->payload.stream.meta = meta;
}
/**
* Resutn the current default input, or of `inputp` is false, output stream from
* this `env`ironment.
*/
struct pso_pointer get_default_stream( bool inputp, struct pso_pointer env ) {
struct pso_pointer result = nil;
// struct pso_pointer stream_name = inputp ? lisp_io_in : lisp_io_out;
// result = c_assoc( stream_name, env );
return result;
}
/**
* Function: return a stream open on the URL indicated by the first argument;
* if a second argument is present and is non-nil, open it for writing. At
* present, further arguments are ignored and there is no mechanism to open
* to append, or error if the URL is faulty or indicates an unavailable
* resource.
*
* * (open url)
*
* @param frame my stack frame.
* @param frame_pointer a pointer to my stack frame.
* @param env my environment.
* @return a string of one character, namely the next available character
* on my stream, if any, else nil.
*/
struct pso_pointer
lisp_open( struct pso_pointer frame_pointer, struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer result = nil;
// if ( stringp( fetch_arg( frame, 0) ) ) {
// char *url = lisp_string_to_c_string( fetch_arg( frame, 0) );
// if ( nilp( fetch_arg( frame, 1) ) ) {
// URL_FILE *stream = url_fopen( url, "r" );
// debug_printf( DEBUG_IO, 0,
// L"lisp_open: stream @ %ld, stream type = %d, stream handle = %ld\n",
// ( long int ) &stream, ( int ) stream->type,
// ( long int ) stream->handle.file );
// switch ( stream->type ) {
// case CFTYPE_NONE:
// return
// make_exception( c_string_to_lisp_string
// ( L"Could not open stream" ),
// frame_pointer , nil );
// break;
// case CFTYPE_FILE:
// if ( stream->handle.file == NULL ) {
// return
// make_exception( c_string_to_lisp_string
// ( L"Could not open file" ),
// frame_pointer , nil);
// }
// break;
// case CFTYPE_CURL:
// /* can't tell whether a URL is bad without reading it */
// break;
// }
// result = make_read_stream( stream, nil );
// } else {
// // TODO: anything more complex is a problem for another day.
// URL_FILE *stream = url_fopen( url, "w" );
// result = make_write_stream( stream, nil );
// }
// if ( pointer_to_object( result )->payload.stream.stream == NULL ) {
// result = nil;
// } else {
// collect_meta( result, url );
// }
// free( url );
// }
return result;
}
/**
* Function: return the next character from the stream indicated by arg 0;
* further arguments are ignored.
*
* * (read-char stream)
*
* @param frame my stack frame.
* @param frame_pointer a pointer to my stack frame.
* @param env my environment.
* @return a string of one character, namely the next available character
* on my stream, if any, else nil.
*/
struct pso_pointer
lisp_read_char( struct pso_pointer frame_pointer, struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer result = nil;
if ( readp( fetch_arg( frame, 0 ) ) ) {
result =
make_string( url_fgetwc
( pointer_to_object( fetch_arg( frame, 0 ) )->payload.
stream.stream ), nil );
}
return result;
}
/**
* Function: return a string representing all characters from the stream
* indicated by arg 0; further arguments are ignored.
*
* TODO: it should be possible to optionally pass a string URL to this function,
*
* * (slurp stream)
*
* @param frame my stack frame.
* @param frame_pointer a pointer to my stack frame.
* @param env my environment.
* @return a string of one character, namely the next available character
* on my stream, if any, else nil.
*/
struct pso_pointer
lisp_slurp( struct pso_pointer frame_pointer, struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer result = nil;
if ( readp( fetch_arg( frame, 0 ) ) ) {
URL_FILE *stream =
pointer_to_object( fetch_arg( frame, 0 ) )->payload.stream.stream;
struct pso_pointer cursor = make_string( url_fgetwc( stream ), nil );
result = cursor;
for ( wint_t c = url_fgetwc( stream ); !url_feof( stream ) && c != 0;
c = url_fgetwc( stream ) ) {
debug_print( L"slurp: cursor is: ", DEBUG_IO, 0 );
debug_dump_object( cursor, DEBUG_IO, 0 );
debug_print( L"; result is: ", DEBUG_IO, 0 );
debug_dump_object( result, DEBUG_IO, 0 );
debug_println( DEBUG_IO );
struct pso2 *cell = pointer_to_object( cursor );
cursor = make_string( ( wchar_t ) c, nil );
cell->payload.string.cdr = cursor;
}
}
return result;
}

50
src/c/io/io.h Normal file
View file

@ -0,0 +1,50 @@
/*
* io.h
*
* Communication between PSSE and the outside world, via libcurl.
*
* (c) 2019 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_io_io_h
#define __psse_io_io_h
#include <curl/curl.h>
#include "memory/pointer.h"
#include "memory/pso2.h"
#include "memory/pso4.h"
extern CURLSH *io_share;
int initialise_io( );
#define C_IO_IN L"*in*"
#define C_IO_OUT L"*out*"
extern struct pso_pointer lisp_io_in;
extern struct pso_pointer lisp_io_out;
URL_FILE *file_to_url_file( FILE * f );
wint_t url_fgetwc( URL_FILE * input );
wint_t url_ungetwc( wint_t wc, URL_FILE * input );
struct pso_pointer get_character( struct pso_pointer read_stream );
struct pso_pointer push_back_character( struct pso_pointer c,
struct pso_pointer r );
struct pso_pointer get_default_stream( bool inputp, struct pso_pointer env );
struct pso_pointer
lisp_close( struct pso_pointer frame_pointer, struct pso_pointer env );
struct pso_pointer
lisp_open( struct pso_pointer frame_pointer, struct pso_pointer env );
struct pso_pointer
lisp_read_char( struct pso_pointer frame_pointer, struct pso_pointer env );
struct pso_pointer
lisp_slurp( struct pso_pointer frame_pointer, struct pso_pointer env );
char *lisp_string_to_c_string( struct pso_pointer s );
#endif

128
src/c/io/print.c Normal file
View file

@ -0,0 +1,128 @@
/**
* io/print.c
*
* Post Scarcity Software Environment: print.
*
* Print basic Lisp objects..This is :bootstrap layer print; it needs to be
* able to print characters, symbols, integers, lists and dotted pairs. I
* don't think it needs to be able to print anything else.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
/* libcurl, used for io */
#include <curl/curl.h>
#include "io/fopen.h"
#include "io/io.h"
#include "memory/node.h"
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/pso2.h"
#include "memory/tags.h"
#include "payloads/character.h"
#include "payloads/cons.h"
#include "payloads/integer.h"
struct pso_pointer in_print( struct pso_pointer p, URL_FILE * output );
struct pso_pointer print_list_content( struct pso_pointer p, URL_FILE *output ) {
struct pso_pointer result = nil;
if ( consp( p ) ) {
for ( ; consp( p ); p = cdr( p ) ) {
struct pso2 *object = pointer_to_object( p );
result = in_print( object->payload.cons.car, output );
if ( exceptionp( result ) )
break;
switch ( get_tag_value( object->payload.cons.cdr ) ) {
case NILTV:
break;
case CONSTV:
url_fputwc( L' ', output );
break;
default:
url_fputws( L" . ", output );
result = in_print( object->payload.cons.cdr, output );
}
}
} else {
// TODO: return exception
}
return result;
}
struct pso_pointer in_print( struct pso_pointer p, URL_FILE *output ) {
struct pso2 *object = pointer_to_object( p );
struct pso_pointer result = nil;
if ( object != NULL ) {
switch ( get_tag_value( p ) ) {
case CHARACTERTV:
url_fputwc( object->payload.character.character, output );
break;
case CONSTV:
url_fputwc( L'(', output );
result = print_list_content( p, output );
url_fputwc( L')', output );
break;
case INTEGERTV:
url_fwprintf( output, L"%d",
( int64_t ) ( object->payload.integer.value ) );
break;
case TRUETV:
url_fputwc( L't', output );
break;
case NILTV:
url_fputws( L"nil", output );
default:
// TODO: return exception
}
} else {
// TODO: return exception
}
return result;
}
/**
* @brief Simple print for bootstrap layer.
*
* @param p pointer to the object to print.
* @param stream if a pointer to an open write stream, print to there.
* @return struct pso_pointer `nil`, or an exception if some erroe occurred.
*/
struct pso_pointer print( struct pso_pointer p, struct pso_pointer stream ) {
URL_FILE *output = writep( stream ) ?
pointer_to_object( stream )->payload.stream.stream :
file_to_url_file( stdout );
if ( writep( stream ) ) {
inc_ref( stream );
}
struct pso_pointer result = in_print( p, output );
if ( writep( stream ) ) {
dec_ref( stream );
}
return result;
}

19
src/c/io/print.h Normal file
View file

@ -0,0 +1,19 @@
/**
* io/print.c
*
* Post Scarcity Software Environment: print.
*
* Print basic Lisp objects..This is :bootstrap layer print; it needs to be
* able to print characters, symbols, integers, lists and dotted pairs. I
* don't think it needs to be able to print anything else.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_io_print_h
#define __psse_io_print_h
struct pso_pointer print( struct pso_pointer p, struct pso_pointer stream );
#endif

232
src/c/io/read.c Normal file
View file

@ -0,0 +1,232 @@
/**
* read.c
*
* Read basic Lisp objects..This is :bootstrap layer print; it needs to be
* able to read characters, symbols, integers, lists and dotted pairs. I
* don't think it needs to be able to read anything else. It must, however,
* take a readtable as argument and expand reader macros.
*
*
* (c) 2017 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <math.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
/*
* wide characters
*/
#include <wchar.h>
#include <wctype.h>
#include "debug.h"
#include "io/io.h"
#include "io/read.h"
#include "memory/node.h"
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/pso2.h"
#include "memory/tags.h"
#include "payloads/integer.h"
#include "payloads/read_stream.h"
#include "ops/assoc.h"
#include "ops/reverse.h"
#include "ops/stack_ops.h"
#include "ops/string_ops.h"
#include "ops/truth.h"
// TODO: what I've copied from 0.0.6 is *wierdly* over-complex for just now.
// I think I'm going to essentially delete all this and start again. We need
// to be able to despatch on readttables, and the initial readtable functions
// don't need to be written in Lisp.
//
// In the long run a readtable ought to be a hashtable, but for now an assoc
// list will do.
//
// A readtable function is a Lisp function so needs the stackframe and the
// environment. Other arguments (including the output stream) should be passed
// in the argument, so I think the first arg in the frame is the character read;
// the next is the input stream; the next is the readtable, if any.
/*
* for the time being things which may be read are:
* * integers
* * lists
* * atoms
* * dotted pairs
*/
/**
* An example wrapper function while I work out how I'm going to do this.
*
* For this and all other `read` functions unless documented otherwise, the
* arguments in the frame are expected to be:
*
* 0. The input stream to read from;
* 1. The read table currently in use;
* 2. The character most recently read from that stream.
*/
struct pso_pointer read_example( struct pso_pointer frame_pointer,
struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
return result;
}
/**
* @brief Read one integer from the stream and return it.
*
* For this and all other `read` functions unless documented otherwise, the
* arguments in the frame are expected to be:
*
* 0. The input stream to read from;
* 1. The read table currently in use;
* 2. The character most recently read from that stream.
*/
struct pso_pointer read_number( struct pso_pointer frame_pointer,
struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
int base = 10;
// TODO: should check for *read-base* in the environment
int64_t value = 0;
if ( readp( stream ) ) {
if ( nilp( character ) ) {
character = get_character( stream );
}
wchar_t c = nilp( character ) ? 0 :
pointer_to_object( character )->payload.character.character;
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
for ( ; iswdigit( c ); c = url_fgetwc( input ) ) {
value = ( value * base ) + ( ( int ) c - ( int ) L'0' );
}
url_ungetwc( c, input );
result = make_integer( value );
} // else exception?
return result;
}
struct pso_pointer read_symbol( struct pso_pointer frame_pointer,
struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
if ( readp( stream ) ) {
if ( nilp( character ) ) {
character = get_character( stream );
}
wchar_t c = nilp( character ) ? 0 :
pointer_to_object( character )->payload.character.character;
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
for ( ; iswalnum( c ); c = url_fgetwc( input ) ) {
result = make_string_like_thing( c, result, SYMBOLTAG );
}
url_ungetwc( c, input );
result = reverse( result );
}
return result;
}
/**
* @brief Read the next object on the input stream indicated by this stack
* frame, and return a pso_pointer to the object read.
*
* For this and all other `read` functions unless documented otherwise, the
* arguments in the frame are expected to be:
*
* 0. The input stream to read from;
* 1. The read table currently in use;
* 2. The character most recently read from that stream.
*/
struct pso_pointer read( struct pso_pointer frame_pointer,
struct pso_pointer env ) {
struct pso4 *frame = pointer_to_pso4( frame_pointer );
struct pso_pointer stream = fetch_arg( frame, 0 );
struct pso_pointer readtable = fetch_arg( frame, 1 );
struct pso_pointer character = fetch_arg( frame, 2 );
struct pso_pointer result = nil;
if ( nilp( stream ) ) {
stream = make_read_stream( file_to_url_file( stdin ), nil );
}
if ( nilp( readtable ) ) {
// TODO: check for the value of `*read-table*` in the environment and
// use that.
}
if ( nilp( character ) ) {
character = get_character( stream );
}
struct pso_pointer readmacro = assoc( character, readtable );
if ( !nilp( readmacro ) ) {
// invoke the read macro on the stream
} else if ( readp( stream ) && characterp( character ) ) {
wchar_t c =
pointer_to_object( character )->payload.character.character;
URL_FILE *input = pointer_to_object( stream )->payload.stream.stream;
switch ( c ) {
case ';':
for ( c = url_fgetwc( input ); c != '\n';
c = url_fgetwc( input ) );
/* skip all characters from semi-colon to the end of the line */
break;
case EOF:
// result = throw_exception( c_string_to_lisp_symbol( L"read" ),
// c_string_to_lisp_string
// ( L"End of input while reading" ),
// frame_pointer );
break;
default:
struct pso_pointer next =
make_frame( frame_pointer, stream, readtable,
make_character( c ) );
inc_ref( next );
if ( iswdigit( c ) ) {
result = read_number( next, env );
} else if ( iswalpha( c ) ) {
result = read_symbol( next, env );
} else {
// result =
// throw_exception( c_string_to_lisp_symbol( L"read" ),
// make_cons( c_string_to_lisp_string
// ( L"Unrecognised start of input character" ),
// make_string( c, NIL ) ),
// frame_pointer );
}
dec_ref( next );
break;
}
}
return result;
}

25
src/c/io/read.h Normal file
View file

@ -0,0 +1,25 @@
/**
* read.h
*
* Read basic Lisp objects..This is :bootstrap layer print; it needs to be
* able to read characters, symbols, integers, lists and dotted pairs. I
* don't think it needs to be able to read anything else. It must, however,
* take a readtable as argument and expand reader macros.
*
*
* (c) 2017 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_io_read_h
#define __psse_io_read_h
struct pso_pointer read_number( struct pso_pointer frame_pointer,
struct pso_pointer env );
struct pso_pointer read_symbol( struct pso_pointer frame_pointer,
struct pso_pointer env );
struct pso_pointer read( struct pso_pointer frame_pointer,
struct pso_pointer env );
#endif

65
src/c/memory/destroy.c Normal file
View file

@ -0,0 +1,65 @@
/**
* memory/free.c
*
* Centralised point for despatching free methods to types.
*
* TODO: In the long run, we need a type for tags, which defines a constructor
* and a free method, along with the minimum and maximum size classes
* allowable for that tag; and we need a namespace in which tags are
* canonically stored, probably ::system:tags, in which the tag is bound to
* the type record describing it. And this all needs to work in Lisp, not
* in the substrate.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include "memory/node.h"
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/tags.h"
#include "payloads/cons.h"
#include "payloads/exception.h"
#include "payloads/stack.h"
#include "payloads/psse_string.h"
/**
* @brief Despatch destroy message to the handler for the type of the
* object indicated by `p`, if there is one. What the destroy handler
* needs to do is dec_ref all the objects pointed to by it.
*
* The handler has 0.1.0 lisp calling convention, since
* 1. we should be able to write destroy handlers in Lisp; and
* 2. in the long run this whole system should be rewritten in Lisp.
*
* The handler returns `nil` on success, an exception pointer on
* failure. This function returns that exception pointer. How we
* handle that exception pointer I simply don't know yet.
*/
struct pso_pointer destroy( struct pso_pointer p ) {
struct pso_pointer result = nil;
struct pso_pointer f = make_frame( nil, p );
inc_ref( f );
switch ( get_tag_value( p ) ) {
case CONSTV:
destroy_cons( f, nil );
break;
case EXCEPTIONTV:
destroy_exception( f, nil );
break;
case KEYTV:
case STRINGTV:
case SYMBOLTV:
destroy_string( f, nil );
break;
case STACKTV:
destroy_stack_frame( f, nil );
break;
// TODO: others.
}
dec_ref( f );
return result;
}

17
src/c/memory/destroy.h Normal file
View file

@ -0,0 +1,17 @@
/**
* memory/destroy.h
*
* Despatcher for destructor functions when objects are freed.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_destroy_h
#define __psse_memory_destroy_h
#include "memory/pointer.h"
struct pso_pointer destroy( struct pso_pointer p );
#endif

44
src/c/memory/header.h Normal file
View file

@ -0,0 +1,44 @@
/**
* memory/header.h
*
* Header for all page space objects
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_header_h
#define __psse_memory_header_h
#include <bits/stdint-uintn.h>
#include "memory/pointer.h"
#define TAGLENGTH 3
#define MAXREFERENCE 4294967295
/**
* @brief Header for all paged space objects.
*
*/
struct pso_header {
union {
/** the tag (type) of this object,
* considered as bytes */
struct {
/** mnemonic for this type; */
char mnemonic[TAGLENGTH];
/** size class for this object */
uint8_t size_class;
} bytes;
/** the tag considered as a number */
uint32_t value;
} tag;
/** the count of the number of references to this object */
uint32_t count;
/** pointer to the access control list of this object */
struct pso_pointer access;
};
#endif

47
src/c/memory/memory.c Normal file
View file

@ -0,0 +1,47 @@
/**
* memory/memory.c
*
* The memory management subsystem.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <stdbool.h>
#include <stdio.h>
#include "memory/memory.h"
#include "memory/node.h"
#include "memory/pointer.h"
/**
* @brief Freelists for each size class.
*/
struct pso_pointer freelists[MAX_SIZE_CLASS];
/**
* @brief Flag to prevent re-initialisation.
*/
bool memory_initialised = false;
/**
* @brief Initialise the memory allocation system.
*
* Essentially, just set up the freelists; allocating pages will then happen
* automatically as objects are requested.
*
* @param node the index number of the node we are initialising.
* @return int
*/
struct pso_pointer initialise_memory( uint32_t node ) {
if ( memory_initialised ) {
// TODO: throw an exception
} else {
for ( uint8_t i = 0; i <= MAX_SIZE_CLASS; i++ ) {
freelists[i] = nil;
}
memory_initialised = true;
}
return t;
}

30
src/c/memory/memory.h Normal file
View file

@ -0,0 +1,30 @@
/**
* memory/memory.h
*
* The memory management subsystem.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_memory_h
#define __psse_memory_memory_h
#include "memory/pointer.h"
/**
* @brief Maximum size class
*
* Size classes are poweres of 2, in words; so an object of size class 2
* has an allocation size of four words; of size class 3, of eight words,
* and so on. Size classes of 0 and 1 do not work for managed objects,
* since managed objects require a two word header; it's unlikely that
* these undersized size classes will be used at all.
*/
#define MAX_SIZE_CLASS 0xf
struct pso_pointer initialise_memory( );
extern struct pso_pointer out_of_memory_exception;
extern struct pso_pointer freelists[];
#endif

65
src/c/memory/node.c Normal file
View file

@ -0,0 +1,65 @@
/**
* memory/node.c
*
* Top level data about the actual node on which this memory system sits.
* May not belong in `memory`.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include "node.h"
#include <bits/stdint-uintn.h>
#include "environment/environment.h"
#include "memory/memory.h"
#include "memory/pointer.h"
#include "ops/eq.h"
/**
* @brief Flag to prevent the node being initialised more than once.
*
*/
bool node_initialised = false;
/**
* @brief The index of this node in the hypercube.
*
* TODO: once we have a hypercube, this must be set to the correct value
* IMMEDIATELY on startup, before starting to initalise any other part of
* the Lisp system.
*/
uint32_t node_index = 0;
/**
* @brief The canonical `nil` pointer
*
*/
struct pso_pointer nil = ( struct pso_pointer ) { 0, 0, 0 };
/**
* @brief the canonical `t` (true) pointer.
*
*/
struct pso_pointer t = ( struct pso_pointer ) { 0, 0, 1 };
/**
* @brief Set up the basic informetion about this node.
*
* @param index
* @return struct pso_pointer
*/
struct pso_pointer initialise_node( uint32_t index ) {
node_index = index;
struct pso_pointer result = initialise_memory( index );
if ( c_eq( result, t ) ) {
result = initialise_environment( index );
}
return result;
}

36
src/c/memory/node.h Normal file
View file

@ -0,0 +1,36 @@
/**
* memory/node.h
*
* Top level data about the actual node on which this memory system sits.
* May not belong in `memory`.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_node_h
#define __psse_memory_node_h
#include <stdint.h>
/**
* @brief The index of this node in the hypercube.
*
*/
extern uint32_t node_index;
/**
* @brief The canonical `nil` pointer
*
*/
extern struct pso_pointer nil;
/**
* @brief the canonical `t` (true) pointer.
*
*/
extern struct pso_pointer t;
struct pso_pointer initialise_node( uint32_t index );
#endif

169
src/c/memory/page.c Normal file
View file

@ -0,0 +1,169 @@
/**
* memory/page.c
*
* Page for paged space psoects.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <math.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include "debug.h"
#include "memory/memory.h"
#include "memory/node.h"
#include "memory/page.h"
#include "memory/pso.h"
#include "memory/pso2.h"
#include "memory/pso3.h"
#include "memory/pso4.h"
#include "memory/pso5.h"
#include "memory/pso6.h"
#include "memory/pso7.h"
#include "memory/pso8.h"
#include "memory/pso9.h"
#include "memory/psoa.h"
#include "memory/psob.h"
#include "memory/psoc.h"
#include "memory/psod.h"
#include "memory/psoe.h"
#include "memory/psof.h"
#include "memory/tags.h"
#include "payloads/free.h"
/**
* @brief The pages which have so far been initialised.
*
* TODO: This is temporary. We cannot afford to allocate an array big enough
* to hold the number of pages we *might* create at start up time. We need a
* way to grow the number of pages, while keeping access to them cheap.
*/
union page *pages[NPAGES];
/**
* @brief the number of pages which have thus far been allocated.
*
*/
uint32_t npages_allocated = 0;
/**
* @brief private to allocate_page; do not use.
*
* @param page_addr address of the newly allocated page to be initialised;
* @param page_index its location in the pages[] array;
* @param size_class the size class of objects in this page;
* @param freelist the freelist for objects of this size class.
* @return struct pso_pointer the new head for the freelist for this size_class,
*/
struct pso_pointer initialise_page( union page *page_addr, uint16_t page_index,
uint8_t size_class,
struct pso_pointer freelist ) {
struct pso_pointer result = freelist;
int obj_size = pow( 2, size_class );
int obj_bytes = obj_size * sizeof( uint64_t );
int objs_in_page = PAGE_BYTES / obj_bytes;
// we do this backwards (i--) so that object {0, 0, 0} will be first on the
// freelist when the first page is initiated, so we can grab that one for
// `nil` and the next on for `t`.
for ( int i = objs_in_page - 1; i >= 0; i-- ) {
// it should be safe to cast any pso object to a pso2
struct pso2 *object =
( struct pso2 * ) ( page_addr + ( i * obj_bytes ) );
object->header.tag.bytes.size_class = size_class;
strncpy( &( object->header.tag.bytes.mnemonic[0] ), FREETAG,
TAGLENGTH );
object->payload.free.next = result;
result =
make_pointer( node_index, page_index,
( uint16_t ) ( i * obj_size ) );
}
return result;
}
/**
* @brief Allocate a page for objects of this size class, initialise it, and
* link the objects in it into the freelist for this size class.
*
* @param size_class an integer in the range 0...MAX_SIZE_CLASS.
* @return t on success, an exception if an error occurred.
*/
struct pso_pointer allocate_page( uint8_t size_class ) {
struct pso_pointer result = t;
if ( npages_allocated == 0 ) {
for ( int i = 0; i < NPAGES; i++ ) {
pages[i] = NULL;
}
debug_print( L"Pages array zeroed.\n", DEBUG_ALLOC, 0 );
}
if ( npages_allocated < NPAGES ) {
if ( size_class >= 2 && size_class <= MAX_SIZE_CLASS ) {
void *pg = malloc( sizeof( union page ) );
if ( pg != NULL ) {
memset( pg, 0, sizeof( union page ) );
pages[npages_allocated] = pg;
debug_printf( DEBUG_ALLOC, 0,
L"Allocated page %d for objects of size class %x.\n",
npages_allocated, size_class );
freelists[size_class] =
initialise_page( ( union page * ) pg, npages_allocated,
size_class, freelists[size_class] );
debug_printf( DEBUG_ALLOC, 0,
L"Initialised page %d; freelist for size class %x updated.\n",
npages_allocated, size_class );
if ( npages_allocated == 0 ) {
// first page allocated; initialise nil and t
nil = lock_object( allocate( NILTAG, 2 ) );
t = lock_object( allocate( TRUETAG, 2 ) );
}
npages_allocated++;
} else {
// TODO: exception when we have one.
result = nil;
fwide( stderr, 1 );
fwprintf( stderr,
L"\nCannot allocate page: heap exhausted,\n",
size_class, MAX_SIZE_CLASS );
}
} else {
// TODO: exception when we have one.
result = nil;
fwide( stderr, 1 );
fwprintf( stderr,
L"\nCannot allocate page for size class %x, min is 2 max is %x.\n",
size_class, MAX_SIZE_CLASS );
}
} else {
// TODO: exception when we have one.
result = nil;
fwide( stderr, 1 );
fwprintf( stderr,
L"\nCannot allocate page: page space exhausted.\n",
size_class, MAX_SIZE_CLASS );
}
return result;
}
/**
* @brief allow other files to see the current value of npages_allocated, but not
* change it.
*/
uint32_t get_pages_allocated( ) {
return npages_allocated;
}

79
src/c/memory/page.h Normal file
View file

@ -0,0 +1,79 @@
/**
* memory/page.h
*
* Page for paged space psoects.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_page_h
#define __psse_memory_page_h
#include "memory/pointer.h"
#include "memory/pso2.h"
#include "memory/pso3.h"
#include "memory/pso4.h"
#include "memory/pso5.h"
#include "memory/pso6.h"
#include "memory/pso7.h"
#include "memory/pso8.h"
#include "memory/pso9.h"
#include "memory/psoa.h"
#include "memory/psob.h"
#include "memory/psoc.h"
#include "memory/psod.h"
#include "memory/psoe.h"
#include "memory/psof.h"
/**
* the size of a page, **in bytes**.
*/
#define PAGE_BYTES 1048576
/**
* the number of pages we will initially allow for. For
* convenience we'll set up an array of cons pages this big; however,
* TODO: later we will want a mechanism for this to be able to grow
* dynamically to the maximum we can allow.
*/
#define NPAGES 64
extern union page *pages[NPAGES];
/**
* @brief A page is a megabyte of memory which contains objects all of which
* are of the same size class.
*
* No page will contain both pso2s and pso4s, for example. We know what size
* objects are in a page by looking at the size tag of the first object, which
* will always be the fourth byte in the page (i.e page.bytes[3]). However, we
* will not normally have to worry about what size class the objects on a page
* are, since on creation all objects will be linked onto the freelist for
* their size class, they will be allocated from that free list, and on garbage
* collection they will be returned to that freelist.
*/
union page {
uint8_t bytes[PAGE_BYTES];
uint64_t words[PAGE_BYTES / 8];
struct pso2 pso2s[PAGE_BYTES / 32];
struct pso3 pso3s[PAGE_BYTES / 64];
struct pso4 pso4s[PAGE_BYTES / 128];
struct pso5 pso5s[PAGE_BYTES / 256];
struct pso6 pso6s[PAGE_BYTES / 512];
struct pso7 pso7s[PAGE_BYTES / 1024];
struct pso8 pso8s[PAGE_BYTES / 2048];
struct pso9 pso9s[PAGE_BYTES / 4096];
struct psoa psoas[PAGE_BYTES / 8192];
struct psob psobs[PAGE_BYTES / 16384];
struct psoc psocs[PAGE_BYTES / 32768];
struct psod psods[PAGE_BYTES / 65536];
struct psoe psoes[PAGE_BYTES / 131072];
struct psof psofs[PAGE_BYTES / 262144];
};
struct pso_pointer allocate_page( uint8_t size_class );
uint32_t get_pages_allocated( );
#endif

117
src/c/memory/pointer.c Normal file
View file

@ -0,0 +1,117 @@
/**
* memory/node.h
*
* The node on which this instance resides.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <stddef.h>
#include "memory/node.h"
#include "memory/page.h"
#include "memory/pointer.h"
#include "memory/pso.h"
/**
* @brief Make a pointer to a paged-space object.
*
* @param node The index of the node on which the object is curated;
* @param page The memory page in which the object resides;
* @param offset The offset, in words, within that page, of the object.
* @return struct pso_pointer a pointer referencing the specified object.
*/
struct pso_pointer make_pointer( uint32_t node, uint16_t page,
uint16_t offset ) {
return ( struct pso_pointer ) { node, page, offset };
}
/**
* @brief returns the in-memory address of the object indicated by this
* pointer `p`.
*
* NOTE THAT: It's impossible, with our calling conventions, to pass an
* exception back from this function. Consequently, if anything goes wrong
* we return NULL. The caller *should* check for that and throw an exception.
*
* NOTE THAT: The return signature of these functions is pso2, because it is
* safe to cast any paged space object to a pso2, but safe to cast an object
* of a smaller size class to a larger one. If you know what size class you
* want, you should prefer `pointer_to_object_of_size_class()`, q.v.
*
* TODO: The reason I'm doing it this way is because I'm not
* certain reference counter updates work right it we work with 'the object'
* rather than 'the address of the object'. I really ought to have a
* conversation with someone who understands this bloody language.
*
* @param p a pso_pointer which references an object.
*
* @return the actual address in memory of that object, or NULL if `p` is
* invalid.
*/
struct pso2 *pointer_to_object( struct pso_pointer p ) {
struct pso2 *result = NULL;
if ( p.node == node_index ) {
if ( p.page < get_pages_allocated( )
&& p.offset < ( PAGE_BYTES / 8 ) ) {
// TODO: that's not really a safe test of whether this is a valid pointer.
union page *pg = pages[p.page];
result = ( struct pso2 * ) &pg->words[p.offset];
}
}
// TODO: else if we have a copy of the object in cache, return that;
// else request a copy of the object from the node which curates it.
return result;
}
/**
* @brief returns the memory address of the object indicated by this pointer
* `p`, if it is of this `size_class`.
*
* NOTE THAT: It's impossible, with our calling conventions, to pass an
* exception back from this function. Consequently, if anything goes wrong
* we return NULL. The caller *should* check for that and throw an exception.
*
* NOTE THAT: The return signature of these functions is pso2, because it is
* safe to cast any paged space object to a pso2, but safe to cast an object
* of a smaller size class to a larger one. You should check that the object
* returned has the size class you expect.
*
* @param p a pointer to an object;
* @param size_class a size class.
*
* @return the memory address of the object, provided it is a valid object and
* of the specified size class, else NULL.
*/
struct pso2 *pointer_to_object_of_size_class( struct pso_pointer p,
uint8_t size_class ) {
struct pso2 *result = pointer_to_object( p );
if ( result->header.tag.bytes.size_class != size_class ) {
result = NULL;
}
return result;
}
/**
* @brief returns the memory address of the object indicated by this pointer
* `p`, if it has this `tag_value`.
*
* NOTE THAT: It's impossible, with our calling conventions, to pass an
* exception back from this function. Consequently, if anything goes wrong
* we return NULL. The caller *should* check for that and throw an exception.
*/
struct pso2 *pointer_to_object_with_tag_value( struct pso_pointer p,
uint32_t tag_value ) {
struct pso2 *result = pointer_to_object( p );
if ( ( result->header.tag.value & 0xffffff ) != tag_value ) {
result = NULL;
}
return result;
}

53
src/c/memory/pointer.h Normal file
View file

@ -0,0 +1,53 @@
/**
* memory/pointer.h
*
* A pointer to a paged space object.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pointer_h
#define __psse_memory_pointer_h
#include <stdint.h>
/**
* @brief A pointer to an object in page space.
*
*/
struct pso_pointer {
/**
* @brief The index of the node on which this object is curated.
*
* NOTE: This will always be NULL until we have the hypercube router
* working.
*/
uint32_t node;
/**
* @brief The index of the allocated page in which this object is stored.
*/
uint16_t page;
/**
* @brief The offset of the object within the page **in words**.
*
* NOTE THAT: This value is always **in words**, regardless of the size
* class of the objects stored in the page, because until we've got hold
* of the page we don't know its size class.
*/
uint16_t offset;
};
struct pso_pointer make_pointer( uint32_t node, uint16_t page,
uint16_t offset );
struct pso2 *pointer_to_object( struct pso_pointer pointer );
struct pso2 *pointer_to_object_of_size_class( struct pso_pointer p,
uint8_t size_class );
struct pso2 *pointer_to_object_with_tag_value( struct pso_pointer p,
uint32_t tag_value );
#endif

194
src/c/memory/pso.c Normal file
View file

@ -0,0 +1,194 @@
/**
* memory/pso.c
*
* Paged space objects.
*
* Broadly, it should be save to cast any paged space object to a pso2, since
* that is the smallest actually used size class. This should work to extract
* the tag and size class fields from the header, for example. I'm not
* confident enough of my understanding of C to know whether it is similarly
* safe to cast something passed to you as a pso2 up to something larger, even
* if you know from the size class field that it actually is something larger.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include <stdbool.h>
#include <string.h>
#include "debug.h"
#include "memory/destroy.h"
#include "memory/header.h"
#include "memory/memory.h"
#include "memory/node.h"
#include "memory/page.h"
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/tags.h"
#include "ops/truth.h"
/**
* @brief Allocate an object of this size_class with this tag.
*
* @param tag The tag. Only the first three bytes will be used;
* @param size_class The size class for the object to be allocated;
* @return struct pso_pointer a pointer to the newly allocated object
*/
struct pso_pointer allocate( char *tag, uint8_t size_class ) {
struct pso_pointer result = nil;
if ( size_class <= MAX_SIZE_CLASS ) {
if ( nilp( freelists[size_class] ) ) {
result = allocate_page( size_class );
}
if ( !exceptionp( result ) && not( freelists[size_class] ) ) {
result = freelists[size_class];
struct pso2 *object = pointer_to_object( result );
freelists[size_class] = object->payload.free.next;
strncpy( ( char * ) ( object->header.tag.bytes.mnemonic ), tag,
TAGLENGTH );
/* the object ought already to have the right size class in its tag
* because it was popped off the freelist for that size class. */
if ( object->header.tag.bytes.size_class != size_class ) {
// TODO: return an exception instead? Or warn, set it, and continue?
}
/* the objext ought to have a reference count ot zero, because it's
* on the freelist, but again we should sanity check. */
if ( object->header.count != 0 ) {
// TODO: return an exception instead? Or warn, set it, and continue?
}
}
} // TODO: else throw exception
return result;
}
uint32_t payload_size( struct pso2 *object ) {
// TODO: Unit tests DEFINITELY needed!
return ( ( 1 << object->header.tag.bytes.size_class ) -
sizeof( struct pso_header ) );
}
/**
* increment the reference count of the object at this cons pointer.
*
* You can't roll over the reference count. Once it hits the maximum
* value you cannot increment further.
*
* Returns the `pointer`.
*/
struct pso_pointer inc_ref( struct pso_pointer pointer ) {
struct pso2 *object = pointer_to_object( pointer );
if ( object->header.count < MAXREFERENCE ) {
object->header.count++;
#ifdef DEBUG
debug_printf( DEBUG_ALLOC, 0,
L"\nIncremented object of type %3.3s at page %u, offset %u to count %u",
( ( char * ) &object->header.tag.bytes.mnemonic[0] ),
pointer.page, pointer.offset, object->header.count );
if ( vectorpointp( pointer ) ) {
debug_printf( DEBUG_ALLOC, 0,
L"; pointer to vector object of type %3.3s.\n",
( ( char * )
&( object->payload.vectorp.tag.bytes[0] ) ) );
} else {
debug_println( DEBUG_ALLOC );
}
#endif
}
return pointer;
}
/**
* Decrement the reference count of the object at this cons pointer.
*
* If a count has reached MAXREFERENCE it cannot be decremented.
* If a count is decremented to zero the object should be freed.
*
* Returns the `pointer`, or, if the object has been freed, a pointer to `nil`.
*/
struct pso_pointer dec_ref( struct pso_pointer pointer ) {
struct pso2 *object = pointer_to_object( pointer );
if ( object->header.count > 0 && object->header.count != MAXREFERENCE ) {
object->header.count--;
#ifdef DEBUG
debug_printf( DEBUG_ALLOC, 0,
L"\nDecremented object of type %4.4s at page %d, offset %d to count %d",
( ( char * ) ( object->header.tag.bytes.mnemonic ) ),
pointer.page, pointer.offset, object->header.count );
if ( vectorpointp( pointer ) ) {
debug_printf( DEBUG_ALLOC, 0,
L"; pointer to vector object of type %3.3s.\n",
( ( char * )
&( object->payload.vectorp.tag.bytes ) ) );
} else {
debug_println( DEBUG_ALLOC );
}
#endif
if ( object->header.count == 0 ) {
free_object( pointer );
pointer = nil;
}
}
return pointer;
}
/**
* @brief Prevent an object ever being dereferenced.
*
* @param pointer pointer to an object to lock.
*
* @return the `pointer`
*/
struct pso_pointer lock_object( struct pso_pointer pointer ) {
struct pso2 *object = pointer_to_object( pointer );
object->header.count = MAXREFERENCE;
return pointer;
}
/**
* @brief decrement all pointers pointed to by the object at this pointer;
* clear its memory, and return it to the freelist.
*/
struct pso_pointer free_object( struct pso_pointer p ) {
struct pso_pointer result = nil;
struct pso2 *obj = pointer_to_object( p );
uint32_t array_size = payload_size( obj );
uint8_t size_class = obj->header.tag.bytes.size_class;
result = destroy( p );
/* will C just let me cheerfully walk off the end of the array I've declared? */
for ( int i = 0; i < array_size; i++ ) {
obj->payload.words[i] = 0;
}
strncpy( ( char * ) ( obj->header.tag.bytes.mnemonic ), FREETAG,
TAGLENGTH );
#ifdef DEBUG
debug_printf( DEBUG_ALLOC, 0,
L"Freeing object of size class %d at {%d, %d, %d}",
size_class, p.node, p.page, p.offset );
#endif
/* TODO: obtain mutex on freelist */
obj->payload.free.next = freelists[size_class];
freelists[size_class] = p;
return result;
}

28
src/c/memory/pso.h Normal file
View file

@ -0,0 +1,28 @@
/**
* memory/pso.h
*
* Paged space objects.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pso_h
#define __psse_memory_pso_h
#include <stdint.h>
#include "memory/header.h"
#include "memory/pointer.h"
struct pso_pointer allocate( char *tag, uint8_t size_class );
struct pso_pointer dec_ref( struct pso_pointer pointer );
struct pso_pointer inc_ref( struct pso_pointer pointer );
struct pso_pointer lock_object( struct pso_pointer pointer );
struct pso_pointer free_object( struct pso_pointer p );
#endif

56
src/c/memory/pso2.h Normal file
View file

@ -0,0 +1,56 @@
/**
* memory/pso2.h
*
* Paged space object of size class 2, four words total, two words payload.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pso2_h
#define __psse_memory_pso2_h
#include <stdint.h>
#include "../payloads/psse_string.h"
#include "memory/header.h"
#include "payloads/character.h"
#include "payloads/cons.h"
#include "payloads/free.h"
#include "payloads/function.h"
#include "payloads/integer.h"
#include "payloads/keyword.h"
#include "payloads/lambda.h"
#include "payloads/nlambda.h"
#include "payloads/read_stream.h"
#include "payloads/symbol.h"
#include "payloads/time.h"
#include "payloads/vector_pointer.h"
#include "payloads/write_stream.h"
/**
* @brief A paged space object of size class 2, four words total, two words
* payload.
*
*/
struct pso2 {
struct pso_header header;
union {
char bytes[16];
uint64_t words[2];
struct character_payload character;
struct cons_payload cons;
struct free_payload free;
struct function_payload function;
struct integer_payload integer;
struct lambda_payload lambda;
struct function_payload special;
struct stream_payload stream;
struct string_payload string;
// TODO: this isn't working and I don't know why (error: field time has incomplete type)
// struct time_payload time;
struct vectorp_payload vectorp;
} payload;
};
#endif

37
src/c/memory/pso3.h Normal file
View file

@ -0,0 +1,37 @@
/**
* memory/pso3.h
*
* Paged space object of size class 3, 8 words total, 6 words payload.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pso3_h
#define __psse_memory_pso3_h
#include <stdint.h>
#include "memory/header.h"
#include "payloads/exception.h"
#include "payloads/free.h"
#include "payloads/mutex.h"
/**
* @brief A paged space object of size class 3, 8 words total, 6 words
* payload.
*
*/
struct pso3 {
struct pso_header header;
union {
char bytes[48];
uint64_t words[6];
struct exception_payload exception;
struct free_payload free;
struct mutex_payload mutex;
} payload;
};
#endif

18
src/c/memory/pso4.c Normal file
View file

@ -0,0 +1,18 @@
/**
* memory/pso4.h
*
* Paged space object of size class 4, 16 words total, 14 words payload.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#include "memory/pointer.h"
#include "memory/pso.h"
#include "memory/pso2.h"
#include "memory/pso4.h"
struct pso4 *pointer_to_pso4( struct pso_pointer p ) {
struct pso4 *result =
( struct pso4 * ) pointer_to_object_of_size_class( p, 4 );
}

36
src/c/memory/pso4.h Normal file
View file

@ -0,0 +1,36 @@
/**
* memory/pso4.h
*
* Paged space object of size class 4, 16 words total, 14 words payload.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pso4_h
#define __psse_memory_pso4_h
#include <stdint.h>
#include "memory/header.h"
#include "payloads/free.h"
#include "payloads/stack.h"
/**
* @brief A paged space object of size class 4, 16 words total, 14 words
* payload.
*
*/
struct pso4 {
struct pso_header header;
union {
char bytes[112];
uint64_t words[14];
struct free_payload free;
struct stack_frame_payload stack_frame;
} payload;
};
struct pso4 *pointer_to_pso4( struct pso_pointer p );
#endif

32
src/c/memory/pso5.h Normal file
View file

@ -0,0 +1,32 @@
/**
* memory/pso5.h
*
* Paged space object of size class 5, 32 words total, 30 words payload.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pso5_h
#define __psse_memory_pso5_h
#include <stdint.h>
#include "memory/header.h"
#include "payloads/free.h"
/**
* @brief A paged space object of size class 5, 32 words total, 30 words
* payload.
*
*/
struct pso5 {
struct pso_header header;
union {
char bytes[240];
uint64_t words[30];
struct free_payload free;
} payload;
};
#endif

32
src/c/memory/pso6.h Normal file
View file

@ -0,0 +1,32 @@
/**
* memory/pso6.h
*
* Paged space object of size class 6, 64 words total, 62 words payload.
*
* (c) 2026 Simon Brooke <simon@journeyman.cc>
* Licensed under GPL version 2.0, or, at your option, any later version.
*/
#ifndef __psse_memory_pso6_h
#define __psse_memory_pso6_h
#include <stdint.h>
#include "memory/header.h"
#include "payloads/free.h"
/**
* @brief A paged space object of size class 6, 64 words total, 62 words
* payload.
*
*/
struct pso6 {
struct pso_header header;
union {
char bytes[496];
uint64_t words[62];
struct free_payload free;
} payload;
};
#endif

Some files were not shown because too many files have changed in this diff Show more