OK, this is a lot of useful groundwork, but nothing really built yet.
This commit is contained in:
parent
7e9dc12766
commit
8e9ad73229
3 changed files with 177 additions and 60 deletions
61
README.md
61
README.md
|
|
@ -1,3 +1,62 @@
|
|||
# grendel
|
||||
|
||||
Reimplementation of Beowulf in C, with compiler
|
||||
A reimplementation of [Beowulf](https://git.journeyman.cc/simon/beowulf) bootstrapped in C, with a compiler following, basically, [Abdulaziz Ghuloum's recipe](https://bernsteinbear.com/assets/img/11-ghuloum.pdf).
|
||||
|
||||
## Memory model
|
||||
|
||||
It seems I obsess with how things are represented in memory. Although most of the people who build Ghuloum-style compilers treat memory as something of an afterthought, I'm starting with it.
|
||||
|
||||
### In the beginning was the Word
|
||||
|
||||
My intention is that memory will be considered as an array of 64 bit words.
|
||||
|
||||
Each word may be considered as
|
||||
|
||||
1. a cons cell: two instances of object32, each having one mark bit, three tag bits and 28 payload bits;
|
||||
2. a single object64, having one mark bit, seven tag bits, and 56 payload bits.
|
||||
|
||||
Note that, for any word, the first four bits comprise the mark and (part or all of) the tag, whether the cell is an `object64` or a cons of two `object32`s; for this reason, all `object64`s will have all of the first three bits of the tag set. So:
|
||||
|
||||
```
|
||||
3 3 6
|
||||
0 1 3 4 8 1 2 3
|
||||
+-+---+-----------------------------+-+---+----------------------------+
|
||||
|M|tag| payload... |M|tag| payload... |
|
||||
+-+---+----+------------------------+-+---+----------------------------+
|
||||
|M|111 tag | payload... |
|
||||
+-+--------+-----------------------------------------------------------+
|
||||
where `M` represents `mark`
|
||||
```
|
||||
|
||||
I've tried to do this with C `structs` but I've failed to get the bit fields to pack properly so I'm just going to be a barbarian and use bit masks and bit shifts.
|
||||
|
||||
### Tag! You're it!
|
||||
|
||||
Tags will be allocated as follows:
|
||||
|
||||
| 3-bit value | 7-bit value | (Hex) | Interpretation |
|
||||
| ----------- | ----------- | ----- | ------------------------------------------------------------ |
|
||||
| 0 | 0 | 0x0 | a pointer; an offset into the vector of words. |
|
||||
| 1 | 1 | 0x1 | a signed 28 bit integer. |
|
||||
| 2 | 2 | 0x2 | a character; possibly just a byte, or possibly a 16 bit wchar. |
|
||||
| 3 | 3 | 0x3 | unassigned (possibly a floating point number, later.) |
|
||||
| 4 | 4 | 0x4 | unassigned |
|
||||
| 5 | 5 | 0x5 | unassigned |
|
||||
| 6 | 6 | 0x6 | unassigned |
|
||||
| 7 | 7 | 0x7 | a cons cell |
|
||||
| 7 | 15 | 0xf | a symbol cell *(this implies a symbol can have only up to seven, or if compressed to five bits per character, eleven characters)* |
|
||||
| 7 | 23 | 0x17 | a pointer to a compiled function *(there's a problem here; it means we can only allocate a function in the lower 72,057,594,037,927,936 bytes of memory; I *think* that's not going to byte us on the bum, pun intended)*. |
|
||||
| 7 | 31 | 0x1f | a pointer to a compiled special form *(same problem as above)*. |
|
||||
| 7 | 39 | 0x27 | unassigned ? a ratio cell ? |
|
||||
| 7 | 47 | 0x2f | unassigned ? a big number ? |
|
||||
| 7 | 55 | 0x37 | unassigned ? a string ? |
|
||||
| 7 | 63 | | unassigned |
|
||||
| 7 | 71 | | unassigned |
|
||||
| 7 | 79 | | unassigned |
|
||||
| 7 | 87 | | unassigned |
|
||||
| 7 | 95 | | unassigned |
|
||||
| 7 | 103 | | unassigned |
|
||||
| 7 | 111 | | unassigned |
|
||||
| 7 | 119 | | unassigned |
|
||||
| 7 | 127 | 0x7f | a free cell |
|
||||
|
||||
|
|
|
|||
|
|
@ -16,8 +16,30 @@
|
|||
|
||||
#include "memory.h"
|
||||
|
||||
void showbits( unsigned int x )
|
||||
{
|
||||
int i=0;
|
||||
for (i = (sizeof(int) * 8) - 1; i >= 0; i--)
|
||||
{
|
||||
putchar(x & (1u << i) ? '1' : '0');
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
printf( "Grendel: size of pointer: %d, size of cell %d\n", sizeof( struct pointer), sizeof( struct cell));
|
||||
printf( "Grendel:\n");
|
||||
int i;
|
||||
|
||||
for ( i = 0; i < 8; i++) {
|
||||
printf( "\ti = %d (%b, 0x%x);\n", i, i, i);
|
||||
}
|
||||
for ( ; i < 128; i++) {
|
||||
if ((i & TAG32MASK) == TAG32MASK) {
|
||||
printf( "\ti = %d (%b, 0x%x);\n", i, i, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
152
src/memory.h
152
src/memory.h
|
|
@ -17,78 +17,114 @@
|
|||
/* Tags for 32 bit objects, with 3 bits of tag an one mark bit */
|
||||
|
||||
/**
|
||||
* This pointer object is an actual pointer -- an offset into consspace.
|
||||
* @brief This pointer object is an actual pointer -- an offset into consspace.
|
||||
*/
|
||||
#define OFFSETTV 0
|
||||
#define OFFSETTV (0)
|
||||
/**
|
||||
* This pointer object is actually a 28 bit integer value.
|
||||
* @brief This pointer object is actually a 28 bit integer value.
|
||||
*/
|
||||
#define INTEGERTV 1
|
||||
#define INTEGERTV (1)
|
||||
/**
|
||||
* @brief A character; initially just a byte. but potentially a 16 bit wchar.
|
||||
*/
|
||||
#define CHARTV (2)
|
||||
/**
|
||||
* @brief A 16 bit floating point number. (future expansion)
|
||||
*
|
||||
* Yes, it could be 28 bit, but I think that would hurt my brain.
|
||||
*/
|
||||
#define FLOATTV (3)
|
||||
|
||||
/*
|
||||
* Values 2..6 inclusive reserved further data types, including maybe
|
||||
* Values 4..6 inclusive reserved further data types, including maybe
|
||||
* implementing reals later.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This is not actually a pointer at all but the first word of a cell.
|
||||
* @bried This is not actually a pointer at all but the first word of a cell.
|
||||
*/
|
||||
#define CELLTV 7
|
||||
#define CELLTV (7)
|
||||
|
||||
/**
|
||||
* Half of a cons cell. The mark bit is first, so that the ptag can be
|
||||
* considered as part of the CTAG.
|
||||
* @brief this cell is a symbol
|
||||
*/
|
||||
struct pointer {
|
||||
/* mark bit for mark and sweep garbage collector. Yes, this is normally
|
||||
* thought of as part of the cell, but bear with me. */
|
||||
unsigned int mark : 1;
|
||||
/* pointer tag, interpretations as above */
|
||||
unsigned int ptag : 3;
|
||||
/* the actual payload of the pointer object */
|
||||
union {
|
||||
unsigned int offset : 28;
|
||||
int value : 28;
|
||||
} payload;
|
||||
};
|
||||
|
||||
struct cons_payload {
|
||||
struct pointer pointers[2];
|
||||
};
|
||||
|
||||
struct symbol_payload {
|
||||
/* this is the same mark bit as the one in the `address` pointer of the
|
||||
* cons_payload. */
|
||||
unsigned int mark : 1;
|
||||
unsigned int tag : 7;
|
||||
char symbol[7];
|
||||
};
|
||||
|
||||
/* Tags for 64 bit objects. Note that, as the 64 bit object may be made up
|
||||
* of two 32 bit objects (pointers*, the 64 bit tag is stored in bits 4..7
|
||||
* inclusive. The first three bits are the first three bits of a 32 bit object,
|
||||
* and thus for 64bit objects will always b 111.*/
|
||||
|
||||
/* this 64 bit object is a cons cell */
|
||||
#define CONSTV 7
|
||||
|
||||
/* this 64 bit object is a seven character atom */
|
||||
#define ATOMTV 8
|
||||
|
||||
/* There are potentially another 119 types of object we could store in a 64
|
||||
* bit object, but I can't think of any we need just now. */
|
||||
|
||||
struct cell {
|
||||
union {
|
||||
struct cons_payload cons;
|
||||
struct symbol_payload symbol;
|
||||
} payload;
|
||||
};
|
||||
|
||||
#define CONSSPACESIZE 65536
|
||||
#define SYMBOLTV (0xf)
|
||||
|
||||
/**
|
||||
* The entire array of cells available to the system.
|
||||
* @brief this cell is a pointer to a compiled function
|
||||
*/
|
||||
struct cell consspace[CONSSPACESIZE];
|
||||
#define FUNCTIONTV (0x17)
|
||||
|
||||
/**
|
||||
* @brief This cell is pointer to a compiled special form
|
||||
*/
|
||||
#define SPECIALTV (0x1f)
|
||||
|
||||
/**
|
||||
* @brief this cell is a rational number (future expansion)
|
||||
*/
|
||||
#define RATIOTV (0x27)
|
||||
|
||||
/**
|
||||
* @brief this cell is a digit of a big number (future expansion)
|
||||
*/
|
||||
#define BIGNUMTV (0x2f)
|
||||
|
||||
/**
|
||||
* @brief this cell is part of a string (future expansion)
|
||||
*/
|
||||
#define STRINGTV (0x37)
|
||||
|
||||
// The possible potential values remain unassigned:
|
||||
// i = 63 (111111, 0x3f);
|
||||
// i = 71 (1000111, 0x47);
|
||||
// i = 79 (1001111, 0x4f);
|
||||
// i = 87 (1010111, 0x57);
|
||||
// i = 95 (1011111, 0x5f);
|
||||
// i = 103 (1100111, 0x67);
|
||||
// i = 111 (1101111, 0x6f);
|
||||
// i = 119 (1110111, 0x77);
|
||||
|
||||
#define FREETV (0x7f)
|
||||
|
||||
#define MARKMASK ((unsigned int)1)
|
||||
#define TAG32MASK ((unsigned int)7)
|
||||
#define TAG64MASK ((unsigned int)0xf)
|
||||
|
||||
#define MASK64 (0xffffffffffffffff)
|
||||
#define MASK32 (0xffffffff)
|
||||
|
||||
/**
|
||||
* @brief Return the mark bit of this object.
|
||||
*/
|
||||
#define mark(obj) ((obj & MARKMASK))
|
||||
|
||||
/**
|
||||
* @brief Return the tag of this object, assuming it to be a 32 bit object
|
||||
* (unsafe -- it may be a 64 bit object)
|
||||
*/
|
||||
#define tag32(obj) ((obj << 1) & TAG32MASK)
|
||||
|
||||
/**
|
||||
* @brief Return the tag of this object, assuming it to be a 32 bit object
|
||||
* (unsafe -- verify that tag32(obj) == CELLTV first)
|
||||
*/
|
||||
#define tag64(obj) ((obj << 1) & TAG64)
|
||||
|
||||
/* 'address register' -- i,e,, first pointer */
|
||||
#define ar(obj) (obj & MASK32)
|
||||
|
||||
/* 'decrement register' -- i.e., second pointer */
|
||||
#define dr(obj) ((obj << 32) & MASK32)
|
||||
|
||||
/**
|
||||
* @brief Return the tag of this object, safely. (safe).
|
||||
*/
|
||||
#define tag(obj) ((tag32(obj) == 7) ? tag64(obj) : tag32(obj))
|
||||
|
||||
/**
|
||||
* @brief return the payload of a pointer (safe).
|
||||
*/
|
||||
#define payload(obj) ((tag32(obj) < 7) ? ((obj & MASK32) << 4) : (obj << 8))
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue