diff --git a/README.md b/README.md index f5e117d..4974b07 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Tags will be allocated as follows: | 4 | 4 | 0x4 | unassigned (possibly a floating point number, later.) | | 5 | 5 | 0x5 | unassigned | | 6 | 6 | 0x6 | unassigned | -| 7 | 7 | 0x7 | **never** used: see [Recognising a cons cell](#Recognising-a-cons-cell), below | +| 7 | 7 | 0x7 | a cons cell | | 7 | 15 | 0xf | a symbol cell *(this implies a symbol can have only up to seven, or if compressed to five bits per character, eleven characters)* | | 7 | 23 | 0x17 | a pointer to a compiled function *(there's a problem here; it means we can only allocate a function in the lower 72,057,594,037,927,936 bytes of memory; I *think* that's not going to byte us on the bum, pun intended)*. | | 7 | 31 | 0x1f | a pointer to a compiled special form *(same problem as above)*. | @@ -60,12 +60,6 @@ Tags will be allocated as follows: | 7 | 119 | | unassigned | | 7 | 127 | 0x7f | a free cell | -### Recognising a cons cell - -My original idea was to have a specific tag to mean a cons cell, and that tag was going to be 7, binary 111, all three lower-most bits set. - -This does not work. If we were to do that, there is nowhere to put the tag of the `car` of the cell. So a cell is a cons cell if the value of the lower three bits of the tag is **less than** 7; all 64 bit objects other than cons cells will have all of the three lower-most bits of the tag set. - ## Problems with building a Ghuloum-style compiler in Lisp 1.5 Ghuloum's compiler emits strings in the form of assembly language statements into a file which is then run through a separate assembler to produce a binary which is finally integrated with a launcher stub written in C using a linker. This makes it possible to write a Lisp largely in that Lisp itself (provided you have an existing Lisp fostermother image to run the initial compilation); but it does not dirctly enable you to compile a single function into the existing image at runtime, and then immediately use the newly compiled function; and as far as I'm concerned, until you have that you don't have a working Lisp compiler. diff --git a/src/c/day1.c b/src/c/day1.c index 28ed48d..cc9d7d5 100644 --- a/src/c/day1.c +++ b/src/c/day1.c @@ -3,19 +3,10 @@ * * Grendel: a compiling Beowulf reimplementation. * - * Day 1 of work towards a compiler... eventually... + * Day 1 of work towards a compiler... eventually.. * * This is a straight copy of Noah Zentzis' work. There's no original work of - * mine here, yet. - * - * See: https://generalproblem.net/lets_build_a_compiler/01-starting-out/ - * - * I needed to install (on Debian) the package `gcc-multilib` to get this to - * link. - * - * The rest of day 1 is setting ip memory representation, which I've already - * done in `memory.h`, and which I'm consciously doing differently from the - * way that Zentis does it. + * mine here, yet * * (c) 2026 Simon Brooke * Licensed under GPL version 2.0, or, at your option, any later version. @@ -25,12 +16,6 @@ #include #include -/* - * "The `__attribute__((__cdecl__))` part is a GCC-specific syntax extension - * that tells the compiler to use the - * "[cdecl](https://en.wikipedia.org/wiki/X86_calling_conventions#cdecl)" - * calling convention when executing the function." - */ __attribute__((__cdecl__)) extern int lisp_entry(); diff --git a/src/c/memory.c b/src/c/memory.c deleted file mode 100644 index e69de29..0000000 diff --git a/src/c/memory.h b/src/c/memory.h index 6fe3c6d..07ad06f 100644 --- a/src/c/memory.h +++ b/src/c/memory.h @@ -16,10 +16,6 @@ /* Tags for 32 bit objects, with 3 bits of tag an one mark bit */ -/** - * @brief An error object - * - */ #define ERRORTV (0) /** * @brief This pointer object is an actual pointer -- an offset into consspace. @@ -45,14 +41,9 @@ */ /** - * @brief A 64 bit object, other than a cons cell. - - * All 64 bit objects other - * than cons cells shall have all three lower bits of the tag set; a cons cell - * is any word in managed memory with a 32 bit object in the `car` position - * (and thus at least on of the lower three bits of its tag is **not** set.) + * @bried This is not actually a pointer at all but the first word of a cell. */ -#define OBJ64TV (7) +#define CELLTV (7) /** * @brief this cell is a symbol @@ -84,7 +75,7 @@ */ #define STRINGTV (0x37) -// These valid potential values remain unassigned: +// The possible potential values remain unassigned: // i = 63 (111111, 0x3f); // i = 71 (1000111, 0x47); // i = 79 (1001111, 0x4f); @@ -116,7 +107,7 @@ /** * @brief Return the tag of this object, assuming it to be a 32 bit object - * (unsafe -- verify that tag32(obj) == OBJ64TV first) + * (unsafe -- verify that tag32(obj) == CELLTV first) */ #define tag64(obj) ((obj << 1) & TAG64) diff --git a/src/guile/memory.scm b/src/guile/memory.scm deleted file mode 100644 index f92a918..0000000 --- a/src/guile/memory.scm +++ /dev/null @@ -1,208 +0,0 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; memory.scm -;; -;; Grendel: a compiling Beowulf reimplementation. -;; -;; The memory management subsystem. -;; -;; This is essentially the same representation as given in `memory.h`, -;; except, obviously, expressed in Scheme. -;; -;; I don't currently know how to prevent name collisions in Scheme. I'm a -;; bit worried by this! I'm using the prefix `bw-` where I think I'm at -;; risk. -;; -;; (c) 2026 Simon Brooke -;; Licensed under GPL version 2.0, or, at your option, any later version. -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Cons space ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; It **appears** to be possible to make an array of 'u64' in Guile. -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; the number of cells in the array -(define bw-cons-space-size 2048) - -;; the actual array -(define bw-cons-space (make-typed-array 'u64 0 2048)) - -;; the freelist pointer -(define bw-freelist-pointer 0) - -;; Cons space objects: tsgs ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; Tag values for 32 bit objects, with 3 bits of tag and one mark bit - -;; An error object -(define error-tv 0) - -;; An actual pointer -- except that my current plan is that this should be -;; a pointer (offset) into an array of 64 bit words, not the address of an -;; arbitrary byte in memory -(define offset-tv 1) - -;; Maximum value I think we can hold in an integer. -;; calculated by (format #t "~x" (floor (/ (- (expt 2 28) 1) 2))) -(define max-integer #x7ffffff) - -;; Minimum value I think we can hold in an integer. -(define min-integer -#x7ffffff) - -;; An integer object -- a 28 bit signed integer value. -(define integer-tv 2) - -;; A character object -(define char-tv 3) - -;; A 16 bit floating point number (future expansion). -;; Yes, it could be 28 bit, but I think that would hurt my brain. -(define float-tv 4) - -;; Values 5 and 6 are available for further 32 bit data types. - -;; Value 7 is special, because -;; 1. it should appear *only* in the first 32 bit half of a 64 bit word; and -;; 2. it marks the word as a single 64 bit object, rather than as a pair of -;; 32 bit objects. -;; All 64 bit objects other than cons cells shall have all three lower bits -;; of the tag set; a cons cell is any word in managed memory with a 32 bit -;; object in the `car` position (and thus at least one of the lower three bits -;; of its tag is **not** set.) -(define obj64-tv 7) - -;; A symbol object -;; In the long run this may be a pointer into the heap. Lisp 1.5 accepted names -;; of up to 30 characters; I can't pack that into a 64 bit cell, let alone a 56 -;; bit payload! But, for bootstrapping, we'll pack 11 upper alpha characters at -;; five bits each into a 56 bit payload. -(define symbol-tv #xf) - -;; A (compiled) function object -;; I'm pretty sure this is going to have to end up being a pointer into the heap. -(define function-tv #x17) - -;; A (compiled) special form -;; Also probably a pointer into the heap. -(define special-tv #x1f) - -;; A rational number? Future expansion. -(define rational-tv #x27) - -;; A bignum? Future expansion. -(define bignum-tv #x2f) - -;; (Part of) a string? Or a string could be a pointer into the heap? In any case, -;; future expansion. -(define string-tv #x37) - -;; These valid potential values remain unassigned: -;; i = 63 (111111, 0x3f); -;; i = 71 (1000111, 0x47); -;; i = 79 (1001111, 0x4f); -;; i = 87 (1010111, 0x57); -;; i = 95 (1011111, 0x5f); -;; i = 103 (1100111, 0x67); -;; i = 111 (1101111, 0x6f); -;; i = 119 (1110111, 0x77); - -;; A free cell: an unassigned cell which should be part of the freelist. -(define free-tv #x7f) - -;; Cons space objects: masks ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; the mask for the mark bit, which is the first bit in every object. -;; 32 bit objects in the cdr position don't need a mark bit, but tough. -;; The mark bit is for use by the mark-but-don't-sweep garbage collector. -(define mark-mask 1) - -;; the mask for the tag of a 32 bit object,;;after* it has been shifted left -;; one place to skip the mark bit. -(define tag-32-mask 7) - -;; the mask for the tag of a 64 bit object,;;after* it has been shifted left -;; one place to skip the mark bit. -(define tag-64-mask #xf) - -;; the mask for a full 64 bit object. -(define mask-64 #xffffffffffffffff) - -(define payload-64-mask (- mask-64 tag-64-mask)) - -(define mask-32 #xffffffff) - -(define payload-32-mask (- mask-32 tag-32-mask)) - -;; Cons space objects: functions on tags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; Return the tag of this object, assuming it to be a 32 bit object -;; (unsafe -- it may be a 64 bit object) -(define (tag-32 obj) (logand (ash obj -1) tag-32-mask)) - -;; Return the tag of this object, assuming it to be a 32 bit object -;; (unsafe -- verify that tag32(obj) == CELLTV first) -(define (tag-64 obj) (logand (ash obj -1) tag-64-mask)) - -;; Return the tag of an object -(define (tag obj) - (cond - ((= (tag-32 obj) obj64-tv) (tag-64 obj)) - (else (tag-32 obj)))) - -;; An object is a 32 bit object if its tag value is less than obj64-tv -(define (obj32? obj) (< (tag-32 obj) obj64-tv)) - -;; Cons space objects: type predicates ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; for our purposes, an object ia a cons cell if it has a 32 bit object -;; in its lower 32 bits and its value is greater than the maximum possible -;; value of a 32 bit object. -(define (bw-cons? obj) - (and (obj32? obj)(> obj mask-32))) - -(define (bw-error? obj) (= (tag obj) error-tv)) - -(define (bw-offset? obj) (= (tag obj) offset-tv)) - -;; Scheme already has a function called `integer?` which -;; I'm going to need. -(define (bw-integer? obj) (= (tag obj) integer-tv)) - -;; Scheme already has a function called `char?` -(define (bw-character? obj) (= (tag obj) char-tv)) - -;; Scheme already has a function called `symbol?` -(define (bw-symbol? obj) (= (tag obj) symbol-tv)) - -(define (bw-function? obj) (= (tag obj) function-tv)) - -(define (bw-special? obj) (= (tag obj) special-tv)) - -;; if the object is 32 bits, it has 3 bits tag (+ 1 bit mark => 28 bits -;; payload); otherwise, it's a 64 bit object with 7 bits tag + 1 bit mark -;; > 56 bits payload. -(define (bw-payload obj) (ash obj (if (obj32? obj) -4 -8))) - -(define (bw-car obj) (logand obj mask-32)) - -(define (bw-make-object tag payload) - (cond ((and (integer? tag) (<= tag free-tv) (integer? payload)) - (ash (+ tag (ash (logand payload payload-32-mask) 3) 1))) - ((> tag tag-64) - (ash (+ tag (ash (logand payload payload-64-mask) 7)) 1)) - (else make-object error-tv (to-error-payload "X1")))) - -(define (bw-make-error payload) - (cond ((string? payload)) - (else (bw-make-error "R1")))) - -(define (bw-make-integer payload) - (cond - ((and (integer? payload) - (> payload min-integer) - (< payload max-integer)) - (bw-make-object integer-tv payload)) - (else (bw-make-error "R6")))) \ No newline at end of file