1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-10 15:50:50 +02:00

Accelerate scanning of remembered set

This commit is contained in:
Andy Wingo 2022-08-03 21:25:18 +02:00
parent 47c07dd0eb
commit 0fe13e1cab

113
whippet.h
View file

@ -1030,39 +1030,73 @@ heap_object_is_young(struct heap *heap, struct gcobj *obj) {
return (*object_metadata_byte(obj)) & METADATA_BYTE_YOUNG;
}
static void mark_space_trace_generational_roots(struct mark_space *space,
struct heap *heap) {
static inline uint64_t load_eight_aligned_bytes(uint8_t *mark) {
ASSERT(((uintptr_t)mark & 7) == 0);
uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
uint64_t word;
memcpy(&word, aligned_mark, 8);
#ifdef WORDS_BIGENDIAN
word = __builtin_bswap64(word);
#endif
return word;
}
static inline size_t count_zero_bytes(uint64_t bytes) {
return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
}
static uint64_t broadcast_byte(uint8_t byte) {
uint64_t result = byte;
return result * 0x0101010101010101ULL;
}
// Note that it's quite possible (and even likely) that any given remset
// byte doesn't hold any roots, if all stores were to nursery objects.
STATIC_ASSERT_EQ(GRANULES_PER_REMSET_BYTE % 8, 0);
static void mark_space_trace_card(struct mark_space *space,
struct heap *heap, struct slab *slab,
size_t card) {
uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0];
size_t granule_base = card * GRANULES_PER_REMSET_BYTE;
for (size_t granule_in_remset = 0;
granule_in_remset < GRANULES_PER_REMSET_BYTE;
granule_in_remset += 8, granule_base += 8) {
uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + granule_base);
mark_bytes &= space->sweep_mask;
while (mark_bytes) {
size_t granule_offset = count_zero_bytes(mark_bytes);
mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8));
size_t granule = granule_base + granule_offset;
uintptr_t addr = first_addr_in_slab + granule * GRANULE_SIZE;
struct gcobj *obj = (struct gcobj*)addr;
ASSERT(object_metadata_byte(obj) == &slab->metadata[granule]);
tracer_enqueue_root(&heap->tracer, obj);
}
}
}
static void mark_space_trace_remembered_set(struct mark_space *space,
struct heap *heap) {
ASSERT(!space->evacuating);
uint8_t live_tenured_mask = space->live_mask;
for (size_t s = 0; s < space->nslabs; s++) {
struct slab *slab = &space->slabs[s];
uint8_t *remset = slab->remembered_set;
// TODO: Load 8 bytes at a time instead.
for (size_t card = 0; card < REMSET_BYTES_PER_SLAB; card++) {
if (remset[card]) {
remset[card] = 0;
size_t base = card * GRANULES_PER_REMSET_BYTE;
size_t limit = base + GRANULES_PER_REMSET_BYTE;
// We could accelerate this but GRANULES_PER_REMSET_BYTE is 16
// on 64-bit hosts, so maybe it's not so important.
for (size_t granule = base; granule < limit; granule++) {
if (slab->metadata[granule] & space->live_mask) {
struct block *block0 = &slab->blocks[0];
uintptr_t addr = ((uintptr_t)block0->data) + granule * GRANULE_SIZE;
struct gcobj *obj = (struct gcobj*)addr;
ASSERT(object_metadata_byte(obj) == &slab->metadata[granule]);
tracer_enqueue_root(&heap->tracer, obj);
}
}
// Note that it's quite possible (and even likely) that this
// remset byte doesn't cause any roots, if all stores were to
// nursery objects.
for (size_t card_base = 0;
card_base < REMSET_BYTES_PER_SLAB;
card_base += 8) {
uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
if (!remset_bytes) continue;
memset(remset + card_base, 0, 8);
while (remset_bytes) {
size_t card_offset = count_zero_bytes(remset_bytes);
remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
mark_space_trace_card(space, heap, slab, card_base + card_offset);
}
}
}
}
static void mark_space_clear_generational_roots(struct mark_space *space) {
static void mark_space_clear_remembered_set(struct mark_space *space) {
if (!GC_GENERATIONAL) return;
for (size_t slab = 0; slab < space->nslabs; slab++) {
memset(space->slabs[slab].remembered_set, 0, REMSET_BYTES_PER_SLAB);
@ -1072,9 +1106,9 @@ static void mark_space_clear_generational_roots(struct mark_space *space) {
static void trace_generational_roots(struct heap *heap) {
// TODO: Add lospace nursery.
if (atomic_load(&heap->gc_kind) & GC_KIND_FLAG_MINOR) {
mark_space_trace_generational_roots(heap_mark_space(heap), heap);
mark_space_trace_remembered_set(heap_mark_space(heap), heap);
} else {
mark_space_clear_generational_roots(heap_mark_space(heap));
mark_space_clear_remembered_set(heap_mark_space(heap));
}
}
@ -1137,11 +1171,6 @@ static void reset_sweeper(struct mark_space *space) {
space->next_block = (uintptr_t) &space->slabs[0].blocks;
}
static uint64_t broadcast_byte(uint8_t byte) {
uint64_t result = byte;
return result * 0x0101010101010101ULL;
}
static void update_mark_patterns(struct mark_space *space,
int advance_mark_mask) {
uint8_t survivor_mask = space->marked_mask;
@ -1480,21 +1509,6 @@ static int sweep_word(uintptr_t *loc, uintptr_t sweep_mask) {
return 0;
}
static inline uint64_t load_mark_bytes(uint8_t *mark) {
ASSERT(((uintptr_t)mark & 7) == 0);
uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
uint64_t word;
memcpy(&word, aligned_mark, 8);
#ifdef WORDS_BIGENDIAN
word = __builtin_bswap64(word);
#endif
return word;
}
static inline size_t count_zero_bytes(uint64_t bytes) {
return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
}
static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
size_t n = 0;
// If we have a hole, it is likely to be more that 8 granules long.
@ -1502,7 +1516,7 @@ static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
// sweep pointer, then we load aligned mark words.
size_t unaligned = ((uintptr_t) mark) & 7;
if (unaligned) {
uint64_t bytes = load_mark_bytes(mark - unaligned) >> (unaligned * 8);
uint64_t bytes = load_eight_aligned_bytes(mark - unaligned) >> (unaligned * 8);
bytes &= sweep_mask;
if (bytes)
return count_zero_bytes(bytes);
@ -1510,7 +1524,7 @@ static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
}
for(; n < limit; n += 8) {
uint64_t bytes = load_mark_bytes(mark + n);
uint64_t bytes = load_eight_aligned_bytes(mark + n);
bytes &= sweep_mask;
if (bytes)
return n + count_zero_bytes(bytes);
@ -2014,7 +2028,8 @@ static inline void print_start_gc_stats(struct heap *heap) {
}
static inline void print_end_gc_stats(struct heap *heap) {
printf("Completed %ld collections\n", heap->count);
printf("Completed %ld collections (%ld major)\n",
heap->count, heap->count - heap->minor_count);
printf("Heap size with overhead is %zd (%zu slabs)\n",
heap->size, heap_mark_space(heap)->nslabs);
}