From 74b47f438090700eb7f58fcc078950f3e7c65563 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Mon, 6 Apr 2026 15:06:50 +0100 Subject: [PATCH 1/3] The Day 1 exercise now works. It did compile yesterday, but didn't link. I installed the package `gcc-multilib` to fix this. --- src/c/day1.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/c/day1.c b/src/c/day1.c index cc9d7d5..16947c9 100644 --- a/src/c/day1.c +++ b/src/c/day1.c @@ -3,10 +3,15 @@ * * Grendel: a compiling Beowulf reimplementation. * - * Day 1 of work towards a compiler... eventually.. + * Day 1 of work towards a compiler... eventually... * * This is a straight copy of Noah Zentzis' work. There's no original work of - * mine here, yet + * mine here, yet. + * + * See: https://generalproblem.net/lets_build_a_compiler/01-starting-out/ + * + * I needed to install (on Debian) the package `gcc-multilib` to get this to + * link. * * (c) 2026 Simon Brooke * Licensed under GPL version 2.0, or, at your option, any later version. @@ -16,6 +21,12 @@ #include #include +/* + * "The `__attribute__((__cdecl__))` part is a GCC-specific syntax extension + * that tells the compiler to use the + * "[cdecl](https://en.wikipedia.org/wiki/X86_calling_conventions#cdecl)" + * calling convention when executing the function." + */ __attribute__((__cdecl__)) extern int lisp_entry(); From f9cbc40e0afa3cb79e5f06f164be16af5d768e99 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Mon, 6 Apr 2026 16:12:53 +0100 Subject: [PATCH 2/3] D'oh! My memory model is WRONG and I need to rethink. --- src/c/day1.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/c/day1.c b/src/c/day1.c index 16947c9..28ed48d 100644 --- a/src/c/day1.c +++ b/src/c/day1.c @@ -13,6 +13,10 @@ * I needed to install (on Debian) the package `gcc-multilib` to get this to * link. * + * The rest of day 1 is setting ip memory representation, which I've already + * done in `memory.h`, and which I'm consciously doing differently from the + * way that Zentis does it. + * * (c) 2026 Simon Brooke * Licensed under GPL version 2.0, or, at your option, any later version. */ From 69d762645aa2b9bf3efa8dcd5048f327f5eb5da5 Mon Sep 17 00:00:00 2001 From: Simon Brooke Date: Thu, 9 Apr 2026 10:03:02 +0100 Subject: [PATCH 3/3] Work on getting the memory model translated into Scheme. --- README.md | 8 +- src/c/memory.c | 0 src/c/memory.h | 17 +++- src/guile/memory.scm | 208 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 228 insertions(+), 5 deletions(-) create mode 100644 src/c/memory.c create mode 100644 src/guile/memory.scm diff --git a/README.md b/README.md index 4974b07..f5e117d 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Tags will be allocated as follows: | 4 | 4 | 0x4 | unassigned (possibly a floating point number, later.) | | 5 | 5 | 0x5 | unassigned | | 6 | 6 | 0x6 | unassigned | -| 7 | 7 | 0x7 | a cons cell | +| 7 | 7 | 0x7 | **never** used: see [Recognising a cons cell](#Recognising-a-cons-cell), below | | 7 | 15 | 0xf | a symbol cell *(this implies a symbol can have only up to seven, or if compressed to five bits per character, eleven characters)* | | 7 | 23 | 0x17 | a pointer to a compiled function *(there's a problem here; it means we can only allocate a function in the lower 72,057,594,037,927,936 bytes of memory; I *think* that's not going to byte us on the bum, pun intended)*. | | 7 | 31 | 0x1f | a pointer to a compiled special form *(same problem as above)*. | @@ -60,6 +60,12 @@ Tags will be allocated as follows: | 7 | 119 | | unassigned | | 7 | 127 | 0x7f | a free cell | +### Recognising a cons cell + +My original idea was to have a specific tag to mean a cons cell, and that tag was going to be 7, binary 111, all three lower-most bits set. + +This does not work. If we were to do that, there is nowhere to put the tag of the `car` of the cell. So a cell is a cons cell if the value of the lower three bits of the tag is **less than** 7; all 64 bit objects other than cons cells will have all of the three lower-most bits of the tag set. + ## Problems with building a Ghuloum-style compiler in Lisp 1.5 Ghuloum's compiler emits strings in the form of assembly language statements into a file which is then run through a separate assembler to produce a binary which is finally integrated with a launcher stub written in C using a linker. This makes it possible to write a Lisp largely in that Lisp itself (provided you have an existing Lisp fostermother image to run the initial compilation); but it does not dirctly enable you to compile a single function into the existing image at runtime, and then immediately use the newly compiled function; and as far as I'm concerned, until you have that you don't have a working Lisp compiler. diff --git a/src/c/memory.c b/src/c/memory.c new file mode 100644 index 0000000..e69de29 diff --git a/src/c/memory.h b/src/c/memory.h index 07ad06f..6fe3c6d 100644 --- a/src/c/memory.h +++ b/src/c/memory.h @@ -16,6 +16,10 @@ /* Tags for 32 bit objects, with 3 bits of tag an one mark bit */ +/** + * @brief An error object + * + */ #define ERRORTV (0) /** * @brief This pointer object is an actual pointer -- an offset into consspace. @@ -41,9 +45,14 @@ */ /** - * @bried This is not actually a pointer at all but the first word of a cell. + * @brief A 64 bit object, other than a cons cell. + + * All 64 bit objects other + * than cons cells shall have all three lower bits of the tag set; a cons cell + * is any word in managed memory with a 32 bit object in the `car` position + * (and thus at least on of the lower three bits of its tag is **not** set.) */ -#define CELLTV (7) +#define OBJ64TV (7) /** * @brief this cell is a symbol @@ -75,7 +84,7 @@ */ #define STRINGTV (0x37) -// The possible potential values remain unassigned: +// These valid potential values remain unassigned: // i = 63 (111111, 0x3f); // i = 71 (1000111, 0x47); // i = 79 (1001111, 0x4f); @@ -107,7 +116,7 @@ /** * @brief Return the tag of this object, assuming it to be a 32 bit object - * (unsafe -- verify that tag32(obj) == CELLTV first) + * (unsafe -- verify that tag32(obj) == OBJ64TV first) */ #define tag64(obj) ((obj << 1) & TAG64) diff --git a/src/guile/memory.scm b/src/guile/memory.scm new file mode 100644 index 0000000..f92a918 --- /dev/null +++ b/src/guile/memory.scm @@ -0,0 +1,208 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; memory.scm +;; +;; Grendel: a compiling Beowulf reimplementation. +;; +;; The memory management subsystem. +;; +;; This is essentially the same representation as given in `memory.h`, +;; except, obviously, expressed in Scheme. +;; +;; I don't currently know how to prevent name collisions in Scheme. I'm a +;; bit worried by this! I'm using the prefix `bw-` where I think I'm at +;; risk. +;; +;; (c) 2026 Simon Brooke +;; Licensed under GPL version 2.0, or, at your option, any later version. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Cons space ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; It **appears** to be possible to make an array of 'u64' in Guile. +;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; the number of cells in the array +(define bw-cons-space-size 2048) + +;; the actual array +(define bw-cons-space (make-typed-array 'u64 0 2048)) + +;; the freelist pointer +(define bw-freelist-pointer 0) + +;; Cons space objects: tsgs ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Tag values for 32 bit objects, with 3 bits of tag and one mark bit + +;; An error object +(define error-tv 0) + +;; An actual pointer -- except that my current plan is that this should be +;; a pointer (offset) into an array of 64 bit words, not the address of an +;; arbitrary byte in memory +(define offset-tv 1) + +;; Maximum value I think we can hold in an integer. +;; calculated by (format #t "~x" (floor (/ (- (expt 2 28) 1) 2))) +(define max-integer #x7ffffff) + +;; Minimum value I think we can hold in an integer. +(define min-integer -#x7ffffff) + +;; An integer object -- a 28 bit signed integer value. +(define integer-tv 2) + +;; A character object +(define char-tv 3) + +;; A 16 bit floating point number (future expansion). +;; Yes, it could be 28 bit, but I think that would hurt my brain. +(define float-tv 4) + +;; Values 5 and 6 are available for further 32 bit data types. + +;; Value 7 is special, because +;; 1. it should appear *only* in the first 32 bit half of a 64 bit word; and +;; 2. it marks the word as a single 64 bit object, rather than as a pair of +;; 32 bit objects. +;; All 64 bit objects other than cons cells shall have all three lower bits +;; of the tag set; a cons cell is any word in managed memory with a 32 bit +;; object in the `car` position (and thus at least one of the lower three bits +;; of its tag is **not** set.) +(define obj64-tv 7) + +;; A symbol object +;; In the long run this may be a pointer into the heap. Lisp 1.5 accepted names +;; of up to 30 characters; I can't pack that into a 64 bit cell, let alone a 56 +;; bit payload! But, for bootstrapping, we'll pack 11 upper alpha characters at +;; five bits each into a 56 bit payload. +(define symbol-tv #xf) + +;; A (compiled) function object +;; I'm pretty sure this is going to have to end up being a pointer into the heap. +(define function-tv #x17) + +;; A (compiled) special form +;; Also probably a pointer into the heap. +(define special-tv #x1f) + +;; A rational number? Future expansion. +(define rational-tv #x27) + +;; A bignum? Future expansion. +(define bignum-tv #x2f) + +;; (Part of) a string? Or a string could be a pointer into the heap? In any case, +;; future expansion. +(define string-tv #x37) + +;; These valid potential values remain unassigned: +;; i = 63 (111111, 0x3f); +;; i = 71 (1000111, 0x47); +;; i = 79 (1001111, 0x4f); +;; i = 87 (1010111, 0x57); +;; i = 95 (1011111, 0x5f); +;; i = 103 (1100111, 0x67); +;; i = 111 (1101111, 0x6f); +;; i = 119 (1110111, 0x77); + +;; A free cell: an unassigned cell which should be part of the freelist. +(define free-tv #x7f) + +;; Cons space objects: masks ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; the mask for the mark bit, which is the first bit in every object. +;; 32 bit objects in the cdr position don't need a mark bit, but tough. +;; The mark bit is for use by the mark-but-don't-sweep garbage collector. +(define mark-mask 1) + +;; the mask for the tag of a 32 bit object,;;after* it has been shifted left +;; one place to skip the mark bit. +(define tag-32-mask 7) + +;; the mask for the tag of a 64 bit object,;;after* it has been shifted left +;; one place to skip the mark bit. +(define tag-64-mask #xf) + +;; the mask for a full 64 bit object. +(define mask-64 #xffffffffffffffff) + +(define payload-64-mask (- mask-64 tag-64-mask)) + +(define mask-32 #xffffffff) + +(define payload-32-mask (- mask-32 tag-32-mask)) + +;; Cons space objects: functions on tags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Return the tag of this object, assuming it to be a 32 bit object +;; (unsafe -- it may be a 64 bit object) +(define (tag-32 obj) (logand (ash obj -1) tag-32-mask)) + +;; Return the tag of this object, assuming it to be a 32 bit object +;; (unsafe -- verify that tag32(obj) == CELLTV first) +(define (tag-64 obj) (logand (ash obj -1) tag-64-mask)) + +;; Return the tag of an object +(define (tag obj) + (cond + ((= (tag-32 obj) obj64-tv) (tag-64 obj)) + (else (tag-32 obj)))) + +;; An object is a 32 bit object if its tag value is less than obj64-tv +(define (obj32? obj) (< (tag-32 obj) obj64-tv)) + +;; Cons space objects: type predicates ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; for our purposes, an object ia a cons cell if it has a 32 bit object +;; in its lower 32 bits and its value is greater than the maximum possible +;; value of a 32 bit object. +(define (bw-cons? obj) + (and (obj32? obj)(> obj mask-32))) + +(define (bw-error? obj) (= (tag obj) error-tv)) + +(define (bw-offset? obj) (= (tag obj) offset-tv)) + +;; Scheme already has a function called `integer?` which +;; I'm going to need. +(define (bw-integer? obj) (= (tag obj) integer-tv)) + +;; Scheme already has a function called `char?` +(define (bw-character? obj) (= (tag obj) char-tv)) + +;; Scheme already has a function called `symbol?` +(define (bw-symbol? obj) (= (tag obj) symbol-tv)) + +(define (bw-function? obj) (= (tag obj) function-tv)) + +(define (bw-special? obj) (= (tag obj) special-tv)) + +;; if the object is 32 bits, it has 3 bits tag (+ 1 bit mark => 28 bits +;; payload); otherwise, it's a 64 bit object with 7 bits tag + 1 bit mark +;; > 56 bits payload. +(define (bw-payload obj) (ash obj (if (obj32? obj) -4 -8))) + +(define (bw-car obj) (logand obj mask-32)) + +(define (bw-make-object tag payload) + (cond ((and (integer? tag) (<= tag free-tv) (integer? payload)) + (ash (+ tag (ash (logand payload payload-32-mask) 3) 1))) + ((> tag tag-64) + (ash (+ tag (ash (logand payload payload-64-mask) 7)) 1)) + (else make-object error-tv (to-error-payload "X1")))) + +(define (bw-make-error payload) + (cond ((string? payload)) + (else (bw-make-error "R1")))) + +(define (bw-make-integer payload) + (cond + ((and (integer? payload) + (> payload min-integer) + (< payload max-integer)) + (bw-make-object integer-tv payload)) + (else (bw-make-error "R6")))) \ No newline at end of file