OK, this is a lot of useful groundwork, but nothing really built yet.

This commit is contained in:
Simon Brooke 2026-04-05 15:40:12 +01:00
parent 7e9dc12766
commit 8e9ad73229
3 changed files with 177 additions and 60 deletions

View file

@ -1,3 +1,62 @@
# grendel # grendel
Reimplementation of Beowulf in C, with compiler A reimplementation of [Beowulf](https://git.journeyman.cc/simon/beowulf) bootstrapped in C, with a compiler following, basically, [Abdulaziz Ghuloum's recipe](https://bernsteinbear.com/assets/img/11-ghuloum.pdf).
## Memory model
It seems I obsess with how things are represented in memory. Although most of the people who build Ghuloum-style compilers treat memory as something of an afterthought, I'm starting with it.
### In the beginning was the Word
My intention is that memory will be considered as an array of 64 bit words.
Each word may be considered as
1. a cons cell: two instances of object32, each having one mark bit, three tag bits and 28 payload bits;
2. a single object64, having one mark bit, seven tag bits, and 56 payload bits.
Note that, for any word, the first four bits comprise the mark and (part or all of) the tag, whether the cell is an `object64` or a cons of two `object32`s; for this reason, all `object64`s will have all of the first three bits of the tag set. So:
```
3 3 6
0 1 3 4 8 1 2 3
+-+---+-----------------------------+-+---+----------------------------+
|M|tag| payload... |M|tag| payload... |
+-+---+----+------------------------+-+---+----------------------------+
|M|111 tag | payload... |
+-+--------+-----------------------------------------------------------+
where `M` represents `mark`
```
I've tried to do this with C `structs` but I've failed to get the bit fields to pack properly so I'm just going to be a barbarian and use bit masks and bit shifts.
### Tag! You're it!
Tags will be allocated as follows:
| 3-bit value | 7-bit value | (Hex) | Interpretation |
| ----------- | ----------- | ----- | ------------------------------------------------------------ |
| 0 | 0 | 0x0 | a pointer; an offset into the vector of words. |
| 1 | 1 | 0x1 | a signed 28 bit integer. |
| 2 | 2 | 0x2 | a character; possibly just a byte, or possibly a 16 bit wchar. |
| 3 | 3 | 0x3 | unassigned (possibly a floating point number, later.) |
| 4 | 4 | 0x4 | unassigned |
| 5 | 5 | 0x5 | unassigned |
| 6 | 6 | 0x6 | unassigned |
| 7 | 7 | 0x7 | a cons cell |
| 7 | 15 | 0xf | a symbol cell *(this implies a symbol can have only up to seven, or if compressed to five bits per character, eleven characters)* |
| 7 | 23 | 0x17 | a pointer to a compiled function *(there's a problem here; it means we can only allocate a function in the lower 72,057,594,037,927,936 bytes of memory; I *think* that's not going to byte us on the bum, pun intended)*. |
| 7 | 31 | 0x1f | a pointer to a compiled special form *(same problem as above)*. |
| 7 | 39 | 0x27 | unassigned ? a ratio cell ? |
| 7 | 47 | 0x2f | unassigned ? a big number ? |
| 7 | 55 | 0x37 | unassigned ? a string ? |
| 7 | 63 | | unassigned |
| 7 | 71 | | unassigned |
| 7 | 79 | | unassigned |
| 7 | 87 | | unassigned |
| 7 | 95 | | unassigned |
| 7 | 103 | | unassigned |
| 7 | 111 | | unassigned |
| 7 | 119 | | unassigned |
| 7 | 127 | 0x7f | a free cell |

View file

@ -16,8 +16,30 @@
#include "memory.h" #include "memory.h"
void showbits( unsigned int x )
{
int i=0;
for (i = (sizeof(int) * 8) - 1; i >= 0; i--)
{
putchar(x & (1u << i) ? '1' : '0');
}
printf("\n");
}
int main(int argc, char **argv) { int main(int argc, char **argv) {
printf( "Grendel: size of pointer: %d, size of cell %d\n", sizeof( struct pointer), sizeof( struct cell)); printf( "Grendel:\n");
int i;
for ( i = 0; i < 8; i++) {
printf( "\ti = %d (%b, 0x%x);\n", i, i, i);
}
for ( ; i < 128; i++) {
if ((i & TAG32MASK) == TAG32MASK) {
printf( "\ti = %d (%b, 0x%x);\n", i, i, i);
}
}
return 0; return 0;
} }

View file

@ -17,78 +17,114 @@
/* Tags for 32 bit objects, with 3 bits of tag an one mark bit */ /* Tags for 32 bit objects, with 3 bits of tag an one mark bit */
/** /**
* This pointer object is an actual pointer -- an offset into consspace. * @brief This pointer object is an actual pointer -- an offset into consspace.
*/ */
#define OFFSETTV 0 #define OFFSETTV (0)
/** /**
* This pointer object is actually a 28 bit integer value. * @brief This pointer object is actually a 28 bit integer value.
*/ */
#define INTEGERTV 1 #define INTEGERTV (1)
/**
* @brief A character; initially just a byte. but potentially a 16 bit wchar.
*/
#define CHARTV (2)
/**
* @brief A 16 bit floating point number. (future expansion)
*
* Yes, it could be 28 bit, but I think that would hurt my brain.
*/
#define FLOATTV (3)
/* /*
* Values 2..6 inclusive reserved further data types, including maybe * Values 4..6 inclusive reserved further data types, including maybe
* implementing reals later. * implementing reals later.
*/ */
/** /**
* This is not actually a pointer at all but the first word of a cell. * @bried This is not actually a pointer at all but the first word of a cell.
*/ */
#define CELLTV 7 #define CELLTV (7)
/** /**
* Half of a cons cell. The mark bit is first, so that the ptag can be * @brief this cell is a symbol
* considered as part of the CTAG.
*/ */
struct pointer { #define SYMBOLTV (0xf)
/* mark bit for mark and sweep garbage collector. Yes, this is normally
* thought of as part of the cell, but bear with me. */
unsigned int mark : 1;
/* pointer tag, interpretations as above */
unsigned int ptag : 3;
/* the actual payload of the pointer object */
union {
unsigned int offset : 28;
int value : 28;
} payload;
};
struct cons_payload {
struct pointer pointers[2];
};
struct symbol_payload {
/* this is the same mark bit as the one in the `address` pointer of the
* cons_payload. */
unsigned int mark : 1;
unsigned int tag : 7;
char symbol[7];
};
/* Tags for 64 bit objects. Note that, as the 64 bit object may be made up
* of two 32 bit objects (pointers*, the 64 bit tag is stored in bits 4..7
* inclusive. The first three bits are the first three bits of a 32 bit object,
* and thus for 64bit objects will always b 111.*/
/* this 64 bit object is a cons cell */
#define CONSTV 7
/* this 64 bit object is a seven character atom */
#define ATOMTV 8
/* There are potentially another 119 types of object we could store in a 64
* bit object, but I can't think of any we need just now. */
struct cell {
union {
struct cons_payload cons;
struct symbol_payload symbol;
} payload;
};
#define CONSSPACESIZE 65536
/** /**
* The entire array of cells available to the system. * @brief this cell is a pointer to a compiled function
*/ */
struct cell consspace[CONSSPACESIZE]; #define FUNCTIONTV (0x17)
/**
* @brief This cell is pointer to a compiled special form
*/
#define SPECIALTV (0x1f)
/**
* @brief this cell is a rational number (future expansion)
*/
#define RATIOTV (0x27)
/**
* @brief this cell is a digit of a big number (future expansion)
*/
#define BIGNUMTV (0x2f)
/**
* @brief this cell is part of a string (future expansion)
*/
#define STRINGTV (0x37)
// The possible potential values remain unassigned:
// i = 63 (111111, 0x3f);
// i = 71 (1000111, 0x47);
// i = 79 (1001111, 0x4f);
// i = 87 (1010111, 0x57);
// i = 95 (1011111, 0x5f);
// i = 103 (1100111, 0x67);
// i = 111 (1101111, 0x6f);
// i = 119 (1110111, 0x77);
#define FREETV (0x7f)
#define MARKMASK ((unsigned int)1)
#define TAG32MASK ((unsigned int)7)
#define TAG64MASK ((unsigned int)0xf)
#define MASK64 (0xffffffffffffffff)
#define MASK32 (0xffffffff)
/**
* @brief Return the mark bit of this object.
*/
#define mark(obj) ((obj & MARKMASK))
/**
* @brief Return the tag of this object, assuming it to be a 32 bit object
* (unsafe -- it may be a 64 bit object)
*/
#define tag32(obj) ((obj << 1) & TAG32MASK)
/**
* @brief Return the tag of this object, assuming it to be a 32 bit object
* (unsafe -- verify that tag32(obj) == CELLTV first)
*/
#define tag64(obj) ((obj << 1) & TAG64)
/* 'address register' -- i,e,, first pointer */
#define ar(obj) (obj & MASK32)
/* 'decrement register' -- i.e., second pointer */
#define dr(obj) ((obj << 32) & MASK32)
/**
* @brief Return the tag of this object, safely. (safe).
*/
#define tag(obj) ((tag32(obj) == 7) ? tag64(obj) : tag32(obj))
/**
* @brief return the payload of a pointer (safe).
*/
#define payload(obj) ((tag32(obj) < 7) ? ((obj & MASK32) << 4) : (obj << 8))
#endif #endif