diff --git a/README.md b/README.md index f1756f4..6c37626 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,62 @@ # grendel -Reimplementation of Beowulf in C, with compiler \ No newline at end of file +A reimplementation of [Beowulf](https://git.journeyman.cc/simon/beowulf) bootstrapped in C, with a compiler following, basically, [Abdulaziz Ghuloum's recipe](https://bernsteinbear.com/assets/img/11-ghuloum.pdf). + +## Memory model + +It seems I obsess with how things are represented in memory. Although most of the people who build Ghuloum-style compilers treat memory as something of an afterthought, I'm starting with it. + +### In the beginning was the Word + +My intention is that memory will be considered as an array of 64 bit words. + +Each word may be considered as + +1. a cons cell: two instances of object32, each having one mark bit, three tag bits and 28 payload bits; +2. a single object64, having one mark bit, seven tag bits, and 56 payload bits. + +Note that, for any word, the first four bits comprise the mark and (part or all of) the tag, whether the cell is an `object64` or a cons of two `object32`s; for this reason, all `object64`s will have all of the first three bits of the tag set. So: + +``` + 3 3 6 + 0 1 3 4 8 1 2 3 ++-+---+-----------------------------+-+---+----------------------------+ +|M|tag| payload... |M|tag| payload... | ++-+---+----+------------------------+-+---+----------------------------+ +|M|111 tag | payload... | ++-+--------+-----------------------------------------------------------+ +where `M` represents `mark` +``` + +I've tried to do this with C `structs` but I've failed to get the bit fields to pack properly so I'm just going to be a barbarian and use bit masks and bit shifts. + +### Tag! You're it! + +Tags will be allocated as follows: + +| 3-bit value | 7-bit value | (Hex) | Interpretation | +| ----------- | ----------- | ----- | ------------------------------------------------------------ | +| 0 | 0 | 0x0 | a pointer; an offset into the vector of words. | +| 1 | 1 | 0x1 | a signed 28 bit integer. | +| 2 | 2 | 0x2 | a character; possibly just a byte, or possibly a 16 bit wchar. | +| 3 | 3 | 0x3 | unassigned (possibly a floating point number, later.) | +| 4 | 4 | 0x4 | unassigned | +| 5 | 5 | 0x5 | unassigned | +| 6 | 6 | 0x6 | unassigned | +| 7 | 7 | 0x7 | a cons cell | +| 7 | 15 | 0xf | a symbol cell *(this implies a symbol can have only up to seven, or if compressed to five bits per character, eleven characters)* | +| 7 | 23 | 0x17 | a pointer to a compiled function *(there's a problem here; it means we can only allocate a function in the lower 72,057,594,037,927,936 bytes of memory; I *think* that's not going to byte us on the bum, pun intended)*. | +| 7 | 31 | 0x1f | a pointer to a compiled special form *(same problem as above)*. | +| 7 | 39 | 0x27 | unassigned ? a ratio cell ? | +| 7 | 47 | 0x2f | unassigned ? a big number ? | +| 7 | 55 | 0x37 | unassigned ? a string ? | +| 7 | 63 | | unassigned | +| 7 | 71 | | unassigned | +| 7 | 79 | | unassigned | +| 7 | 87 | | unassigned | +| 7 | 95 | | unassigned | +| 7 | 103 | | unassigned | +| 7 | 111 | | unassigned | +| 7 | 119 | | unassigned | +| 7 | 127 | 0x7f | a free cell | + diff --git a/src/grendel.c b/src/grendel.c index bc36820..f6167fb 100644 --- a/src/grendel.c +++ b/src/grendel.c @@ -16,8 +16,30 @@ #include "memory.h" +void showbits( unsigned int x ) +{ + int i=0; + for (i = (sizeof(int) * 8) - 1; i >= 0; i--) + { + putchar(x & (1u << i) ? '1' : '0'); + } + printf("\n"); +} + int main(int argc, char **argv) { - printf( "Grendel: size of pointer: %d, size of cell %d\n", sizeof( struct pointer), sizeof( struct cell)); + printf( "Grendel:\n"); + int i; + + for ( i = 0; i < 8; i++) { + printf( "\ti = %d (%b, 0x%x);\n", i, i, i); + } + for ( ; i < 128; i++) { + if ((i & TAG32MASK) == TAG32MASK) { + printf( "\ti = %d (%b, 0x%x);\n", i, i, i); + } + } + + return 0; } diff --git a/src/memory.h b/src/memory.h index 5303f8e..c768e39 100644 --- a/src/memory.h +++ b/src/memory.h @@ -17,78 +17,114 @@ /* Tags for 32 bit objects, with 3 bits of tag an one mark bit */ /** - * This pointer object is an actual pointer -- an offset into consspace. + * @brief This pointer object is an actual pointer -- an offset into consspace. */ -#define OFFSETTV 0 +#define OFFSETTV (0) /** - * This pointer object is actually a 28 bit integer value. + * @brief This pointer object is actually a 28 bit integer value. */ -#define INTEGERTV 1 +#define INTEGERTV (1) +/** + * @brief A character; initially just a byte. but potentially a 16 bit wchar. + */ + #define CHARTV (2) +/** + * @brief A 16 bit floating point number. (future expansion) + * + * Yes, it could be 28 bit, but I think that would hurt my brain. + */ +#define FLOATTV (3) + /* - * Values 2..6 inclusive reserved further data types, including maybe + * Values 4..6 inclusive reserved further data types, including maybe * implementing reals later. */ /** - * This is not actually a pointer at all but the first word of a cell. + * @bried This is not actually a pointer at all but the first word of a cell. */ -#define CELLTV 7 +#define CELLTV (7) /** - * Half of a cons cell. The mark bit is first, so that the ptag can be - * considered as part of the CTAG. + * @brief this cell is a symbol */ -struct pointer { - /* mark bit for mark and sweep garbage collector. Yes, this is normally - * thought of as part of the cell, but bear with me. */ - unsigned int mark : 1; - /* pointer tag, interpretations as above */ - unsigned int ptag : 3; - /* the actual payload of the pointer object */ - union { - unsigned int offset : 28; - int value : 28; - } payload; -}; - -struct cons_payload { - struct pointer pointers[2]; -}; - -struct symbol_payload { - /* this is the same mark bit as the one in the `address` pointer of the - * cons_payload. */ - unsigned int mark : 1; - unsigned int tag : 7; - char symbol[7]; -}; - -/* Tags for 64 bit objects. Note that, as the 64 bit object may be made up - * of two 32 bit objects (pointers*, the 64 bit tag is stored in bits 4..7 - * inclusive. The first three bits are the first three bits of a 32 bit object, - * and thus for 64bit objects will always b 111.*/ - -/* this 64 bit object is a cons cell */ -#define CONSTV 7 - -/* this 64 bit object is a seven character atom */ -#define ATOMTV 8 - -/* There are potentially another 119 types of object we could store in a 64 - * bit object, but I can't think of any we need just now. */ - -struct cell { - union { - struct cons_payload cons; - struct symbol_payload symbol; - } payload; -}; - -#define CONSSPACESIZE 65536 +#define SYMBOLTV (0xf) /** - * The entire array of cells available to the system. + * @brief this cell is a pointer to a compiled function */ -struct cell consspace[CONSSPACESIZE]; +#define FUNCTIONTV (0x17) + +/** + * @brief This cell is pointer to a compiled special form + */ +#define SPECIALTV (0x1f) + +/** + * @brief this cell is a rational number (future expansion) + */ +#define RATIOTV (0x27) + +/** + * @brief this cell is a digit of a big number (future expansion) + */ +#define BIGNUMTV (0x2f) + +/** + * @brief this cell is part of a string (future expansion) + */ +#define STRINGTV (0x37) + +// The possible potential values remain unassigned: +// i = 63 (111111, 0x3f); +// i = 71 (1000111, 0x47); +// i = 79 (1001111, 0x4f); +// i = 87 (1010111, 0x57); +// i = 95 (1011111, 0x5f); +// i = 103 (1100111, 0x67); +// i = 111 (1101111, 0x6f); +// i = 119 (1110111, 0x77); + +#define FREETV (0x7f) + +#define MARKMASK ((unsigned int)1) +#define TAG32MASK ((unsigned int)7) +#define TAG64MASK ((unsigned int)0xf) + +#define MASK64 (0xffffffffffffffff) +#define MASK32 (0xffffffff) + +/** + * @brief Return the mark bit of this object. + */ +#define mark(obj) ((obj & MARKMASK)) + +/** + * @brief Return the tag of this object, assuming it to be a 32 bit object + * (unsafe -- it may be a 64 bit object) + */ +#define tag32(obj) ((obj << 1) & TAG32MASK) + +/** + * @brief Return the tag of this object, assuming it to be a 32 bit object + * (unsafe -- verify that tag32(obj) == CELLTV first) + */ +#define tag64(obj) ((obj << 1) & TAG64) + +/* 'address register' -- i,e,, first pointer */ +#define ar(obj) (obj & MASK32) + +/* 'decrement register' -- i.e., second pointer */ +#define dr(obj) ((obj << 32) & MASK32) + +/** + * @brief Return the tag of this object, safely. (safe). + */ +#define tag(obj) ((tag32(obj) == 7) ? tag64(obj) : tag32(obj)) + +/** + * @brief return the payload of a pointer (safe). + */ +#define payload(obj) ((tag32(obj) < 7) ? ((obj & MASK32) << 4) : (obj << 8)) #endif