1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-13 17:20:21 +02:00

mark-sweep uses all the metadata bits

Don't require that mark bytes be cleared; instead we have rotating
colors.  Beginnings of support for concurrent marking, pinning,
conservative roots, and generational collection.
This commit is contained in:
Andy Wingo 2022-05-01 15:04:21 +02:00
parent f97906421e
commit 3a04078044

View file

@ -30,6 +30,54 @@ STATIC_ASSERT_EQ(MEDIUM_OBJECT_THRESHOLD,
STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD, STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE); LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
// Each granule has one metadata byte stored in a side table, used for
// mark bits but also for other per-object metadata. Already we were
// using a byte instead of a bit to facilitate parallel marking.
// (Parallel markers are allowed to race.) Turns out we can put a
// pinned bit there too, for objects that can't be moved. Actually
// there are two pinned bits: one that's managed by the collector, which
// pins referents of conservative roots, and one for pins managed
// externally (maybe because the mutator requested a pin.) Then there's
// a "remembered" bit, indicating that the object should be scanned for
// references to the nursery. If the remembered bit is set, the
// corresponding remset byte should also be set in the slab (see below).
//
// Getting back to mark bits -- because we want to allow for
// conservative roots, we need to know whether an address indicates an
// object or not. That means that when an object is allocated, it has
// to set a bit, somewhere. In our case we use the metadata byte, and
// set the "young" bit. In future we could use this for generational
// GC, with the sticky mark bit strategy.
//
// When an object becomes dead after a GC, it will still have a bit set
// -- maybe the young bit, or maybe a survivor bit. The sweeper has to
// clear these bits before the next collection. But, for concurrent
// marking, we will also be marking "live" objects, updating their mark
// bits. So there are four object states concurrently observable:
// young, dead, survivor, and marked. (If we didn't have concurrent
// marking we would still need the "marked" state, because marking
// mutator roots before stopping is also a form of concurrent marking.)
// Even though these states are mutually exclusive, we use separate bits
// for them because we have the space. After each collection, the dead,
// survivor, and marked states rotate by one bit.
enum metadata_byte {
METADATA_BYTE_NONE = 0,
METADATA_BYTE_YOUNG = 1,
METADATA_BYTE_MARK_0 = 2,
METADATA_BYTE_MARK_1 = 4,
METADATA_BYTE_MARK_2 = 8,
METADATA_BYTE_END = 16,
METADATA_BYTE_PINNED = 32,
METADATA_BYTE_PERMAPINNED = 64,
METADATA_BYTE_REMEMBERED = 128
};
static uint8_t rotate_dead_survivor_marked(uint8_t mask) {
uint8_t all =
METADATA_BYTE_MARK_0 | METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
return ((mask << 1) | (mask >> 2)) & all;
}
#define SLAB_SIZE (4 * 1024 * 1024) #define SLAB_SIZE (4 * 1024 * 1024)
#define BLOCK_SIZE (64 * 1024) #define BLOCK_SIZE (64 * 1024)
#define METADATA_BYTES_PER_BLOCK (BLOCK_SIZE / GRANULE_SIZE) #define METADATA_BYTES_PER_BLOCK (BLOCK_SIZE / GRANULE_SIZE)
@ -155,6 +203,8 @@ struct gcobj {
}; };
struct mark_space { struct mark_space {
uint8_t sweep_live_mask;
uint8_t marked_mask;
uintptr_t low_addr; uintptr_t low_addr;
size_t extent; size_t extent;
size_t heap_size; size_t heap_size;
@ -233,10 +283,13 @@ static inline uint8_t* mark_byte(struct mark_space *space, struct gcobj *obj) {
static inline int mark_space_trace_object(struct mark_space *space, static inline int mark_space_trace_object(struct mark_space *space,
struct gcobj *obj) { struct gcobj *obj) {
uint8_t *byte = mark_byte(space, obj); uint8_t *loc = object_metadata_byte(obj);
if (*byte) uint8_t byte = *loc;
if (byte & space->marked_mask)
return 0; return 0;
*byte = 1; uint8_t mask = METADATA_BYTE_YOUNG | METADATA_BYTE_MARK_0
| METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
*loc = (byte & ~mask) | space->marked_mask;
return 1; return 1;
} }
@ -501,6 +554,11 @@ static void reset_sweeper(struct mark_space *space) {
space->next_block = (uintptr_t) &space->slabs[0].blocks; space->next_block = (uintptr_t) &space->slabs[0].blocks;
} }
static void rotate_mark_bytes(struct mark_space *space) {
space->sweep_live_mask = rotate_dead_survivor_marked(space->sweep_live_mask);
space->marked_mask = rotate_dead_survivor_marked(space->marked_mask);
}
static void collect(struct mutator *mut) { static void collect(struct mutator *mut) {
struct heap *heap = mutator_heap(mut); struct heap *heap = mutator_heap(mut);
struct mark_space *space = heap_mark_space(heap); struct mark_space *space = heap_mark_space(heap);
@ -516,6 +574,7 @@ static void collect(struct mutator *mut) {
tracer_trace(heap); tracer_trace(heap);
tracer_release(heap); tracer_release(heap);
reset_sweeper(space); reset_sweeper(space);
rotate_mark_bytes(space);
heap->count++; heap->count++;
large_object_space_finish_gc(lospace); large_object_space_finish_gc(lospace);
heap_reset_stolen_pages(heap, lospace->live_pages_at_last_collection); heap_reset_stolen_pages(heap, lospace->live_pages_at_last_collection);
@ -602,24 +661,23 @@ static size_t live_object_granules(struct gcobj *obj) {
return size_to_granules(bytes); return size_to_granules(bytes);
} }
static size_t next_mark(const uint8_t *mark, size_t limit) { static size_t sweep_and_check_live(uint8_t *loc, uint8_t live_mask) {
size_t n = 0; uint8_t metadata = *loc;
for (; (((uintptr_t)mark) & 7) && n < limit; n++) // If the metadata byte is nonzero, that means either a young, dead,
if (mark[n]) // survived, or marked object. If it's live (young, survived, or
return n; // marked), we found the next mark. Otherwise it's dead and we clear
uintptr_t *word_mark = (uintptr_t *)(mark + n); // the byte.
for (; if (metadata) {
n + sizeof(uintptr_t) * 4 <= limit; if (metadata & live_mask)
n += sizeof(uintptr_t) * 4, word_mark += 4) return 1;
if (word_mark[0] | word_mark[1] | word_mark[2] | word_mark[3]) *loc = 0;
break; }
for (; return 0;
n + sizeof(uintptr_t) <= limit; }
n += sizeof(uintptr_t), word_mark += 1)
if (word_mark[0]) static size_t next_mark(uint8_t *mark, size_t limit, uint8_t live_mask) {
break; for (size_t n = 0; n < limit; n++)
for (; n < limit; n++) if (sweep_and_check_live(&mark[n], live_mask))
if (mark[n])
return n; return n;
return limit; return limit;
} }
@ -656,6 +714,7 @@ static int sweep(struct mutator *mut,
ssize_t to_reclaim = 2 * MEDIUM_OBJECT_GRANULE_THRESHOLD; ssize_t to_reclaim = 2 * MEDIUM_OBJECT_GRANULE_THRESHOLD;
uintptr_t sweep = mut->sweep; uintptr_t sweep = mut->sweep;
uintptr_t limit = align_up(sweep, BLOCK_SIZE); uintptr_t limit = align_up(sweep, BLOCK_SIZE);
uint8_t live_mask = heap_mark_space(mutator_heap(mut))->sweep_live_mask;
if (sweep == limit) { if (sweep == limit) {
sweep = mark_space_next_block(heap_mark_space(mutator_heap(mut))); sweep = mark_space_next_block(heap_mark_space(mutator_heap(mut)));
@ -678,7 +737,7 @@ static int sweep(struct mutator *mut,
limit_granules = to_reclaim; limit_granules = to_reclaim;
} }
} }
size_t free_granules = next_mark(mark, limit_granules); size_t free_granules = next_mark(mark, limit_granules, live_mask);
if (free_granules) { if (free_granules) {
ASSERT(free_granules <= limit_granules); ASSERT(free_granules <= limit_granules);
size_t free_bytes = free_granules * GRANULE_SIZE; size_t free_bytes = free_granules * GRANULE_SIZE;
@ -691,9 +750,8 @@ static int sweep(struct mutator *mut,
if (free_granules == limit_granules) if (free_granules == limit_granules)
break; break;
} }
// Object survived collection; clear mark and continue sweeping. // Object survived collection; skip over it and continue sweeping.
ASSERT(*mark == 1); ASSERT((*mark) & live_mask);
*mark = 0;
sweep += live_object_granules((struct gcobj *)sweep) * GRANULE_SIZE; sweep += live_object_granules((struct gcobj *)sweep) * GRANULE_SIZE;
} }
@ -702,15 +760,32 @@ static int sweep(struct mutator *mut,
} }
// Another thread is triggering GC. Before we stop, finish clearing the // Another thread is triggering GC. Before we stop, finish clearing the
// mark bytes for the mutator's block, and release the block. // dead mark bytes for the mutator's block, and release the block.
static void finish_sweeping(struct mutator *mut) { static void finish_sweeping(struct mutator *mut) {
uintptr_t sweep = mut->sweep; uintptr_t sweep = mut->sweep;
uintptr_t limit = align_up(sweep, BLOCK_SIZE);
uint8_t live_mask = heap_mark_space(mutator_heap(mut))->sweep_live_mask;
if (sweep) { if (sweep) {
uintptr_t limit = align_up(sweep, BLOCK_SIZE);
uint8_t* mark = object_metadata_byte((struct gcobj*)sweep); uint8_t* mark = object_metadata_byte((struct gcobj*)sweep);
size_t limit_granules = (limit - sweep) >> GRANULE_SIZE_LOG_2; size_t limit_granules = (limit - sweep) >> GRANULE_SIZE_LOG_2;
memset(mark, 0, limit_granules); while (limit_granules) {
mut->sweep = 0; size_t free_granules = next_mark(mark, limit_granules, live_mask);
if (free_granules) {
ASSERT(free_granules <= limit_granules);
size_t free_bytes = free_granules * GRANULE_SIZE;
sweep += free_bytes;
mark += free_granules;
limit_granules -= free_granules;
if (limit_granules == 0)
break;
}
// Object survived collection; skip over it and continue sweeping.
ASSERT((*mark) & live_mask);
size_t live_granules = live_object_granules((struct gcobj *)sweep);
sweep += live_granules * GRANULE_SIZE;
limit_granules -= live_granules;
mark += live_granules;
}
} }
} }
@ -920,6 +995,11 @@ static int mark_space_init(struct mark_space *space, struct heap *heap) {
if (!slabs) if (!slabs)
return 0; return 0;
uint8_t dead = METADATA_BYTE_MARK_0;
uint8_t survived = METADATA_BYTE_MARK_1;
uint8_t marked = METADATA_BYTE_MARK_2;
space->marked_mask = marked;
space->sweep_live_mask = METADATA_BYTE_YOUNG | survived | marked;
space->slabs = slabs; space->slabs = slabs;
space->nslabs = nslabs; space->nslabs = nslabs;
space->low_addr = (uintptr_t) slabs; space->low_addr = (uintptr_t) slabs;