mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-12 00:30:20 +02:00
Accelerate scanning of remembered set
This commit is contained in:
parent
47c07dd0eb
commit
0fe13e1cab
1 changed files with 64 additions and 49 deletions
113
whippet.h
113
whippet.h
|
@ -1030,39 +1030,73 @@ heap_object_is_young(struct heap *heap, struct gcobj *obj) {
|
||||||
return (*object_metadata_byte(obj)) & METADATA_BYTE_YOUNG;
|
return (*object_metadata_byte(obj)) & METADATA_BYTE_YOUNG;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mark_space_trace_generational_roots(struct mark_space *space,
|
static inline uint64_t load_eight_aligned_bytes(uint8_t *mark) {
|
||||||
struct heap *heap) {
|
ASSERT(((uintptr_t)mark & 7) == 0);
|
||||||
|
uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
|
||||||
|
uint64_t word;
|
||||||
|
memcpy(&word, aligned_mark, 8);
|
||||||
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
word = __builtin_bswap64(word);
|
||||||
|
#endif
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t count_zero_bytes(uint64_t bytes) {
|
||||||
|
return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t broadcast_byte(uint8_t byte) {
|
||||||
|
uint64_t result = byte;
|
||||||
|
return result * 0x0101010101010101ULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note that it's quite possible (and even likely) that any given remset
|
||||||
|
// byte doesn't hold any roots, if all stores were to nursery objects.
|
||||||
|
STATIC_ASSERT_EQ(GRANULES_PER_REMSET_BYTE % 8, 0);
|
||||||
|
static void mark_space_trace_card(struct mark_space *space,
|
||||||
|
struct heap *heap, struct slab *slab,
|
||||||
|
size_t card) {
|
||||||
|
uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0];
|
||||||
|
size_t granule_base = card * GRANULES_PER_REMSET_BYTE;
|
||||||
|
for (size_t granule_in_remset = 0;
|
||||||
|
granule_in_remset < GRANULES_PER_REMSET_BYTE;
|
||||||
|
granule_in_remset += 8, granule_base += 8) {
|
||||||
|
uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + granule_base);
|
||||||
|
mark_bytes &= space->sweep_mask;
|
||||||
|
while (mark_bytes) {
|
||||||
|
size_t granule_offset = count_zero_bytes(mark_bytes);
|
||||||
|
mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8));
|
||||||
|
size_t granule = granule_base + granule_offset;
|
||||||
|
uintptr_t addr = first_addr_in_slab + granule * GRANULE_SIZE;
|
||||||
|
struct gcobj *obj = (struct gcobj*)addr;
|
||||||
|
ASSERT(object_metadata_byte(obj) == &slab->metadata[granule]);
|
||||||
|
tracer_enqueue_root(&heap->tracer, obj);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void mark_space_trace_remembered_set(struct mark_space *space,
|
||||||
|
struct heap *heap) {
|
||||||
ASSERT(!space->evacuating);
|
ASSERT(!space->evacuating);
|
||||||
uint8_t live_tenured_mask = space->live_mask;
|
|
||||||
for (size_t s = 0; s < space->nslabs; s++) {
|
for (size_t s = 0; s < space->nslabs; s++) {
|
||||||
struct slab *slab = &space->slabs[s];
|
struct slab *slab = &space->slabs[s];
|
||||||
uint8_t *remset = slab->remembered_set;
|
uint8_t *remset = slab->remembered_set;
|
||||||
// TODO: Load 8 bytes at a time instead.
|
for (size_t card_base = 0;
|
||||||
for (size_t card = 0; card < REMSET_BYTES_PER_SLAB; card++) {
|
card_base < REMSET_BYTES_PER_SLAB;
|
||||||
if (remset[card]) {
|
card_base += 8) {
|
||||||
remset[card] = 0;
|
uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
|
||||||
size_t base = card * GRANULES_PER_REMSET_BYTE;
|
if (!remset_bytes) continue;
|
||||||
size_t limit = base + GRANULES_PER_REMSET_BYTE;
|
memset(remset + card_base, 0, 8);
|
||||||
// We could accelerate this but GRANULES_PER_REMSET_BYTE is 16
|
while (remset_bytes) {
|
||||||
// on 64-bit hosts, so maybe it's not so important.
|
size_t card_offset = count_zero_bytes(remset_bytes);
|
||||||
for (size_t granule = base; granule < limit; granule++) {
|
remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
|
||||||
if (slab->metadata[granule] & space->live_mask) {
|
mark_space_trace_card(space, heap, slab, card_base + card_offset);
|
||||||
struct block *block0 = &slab->blocks[0];
|
|
||||||
uintptr_t addr = ((uintptr_t)block0->data) + granule * GRANULE_SIZE;
|
|
||||||
struct gcobj *obj = (struct gcobj*)addr;
|
|
||||||
ASSERT(object_metadata_byte(obj) == &slab->metadata[granule]);
|
|
||||||
tracer_enqueue_root(&heap->tracer, obj);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Note that it's quite possible (and even likely) that this
|
|
||||||
// remset byte doesn't cause any roots, if all stores were to
|
|
||||||
// nursery objects.
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mark_space_clear_generational_roots(struct mark_space *space) {
|
static void mark_space_clear_remembered_set(struct mark_space *space) {
|
||||||
if (!GC_GENERATIONAL) return;
|
if (!GC_GENERATIONAL) return;
|
||||||
for (size_t slab = 0; slab < space->nslabs; slab++) {
|
for (size_t slab = 0; slab < space->nslabs; slab++) {
|
||||||
memset(space->slabs[slab].remembered_set, 0, REMSET_BYTES_PER_SLAB);
|
memset(space->slabs[slab].remembered_set, 0, REMSET_BYTES_PER_SLAB);
|
||||||
|
@ -1072,9 +1106,9 @@ static void mark_space_clear_generational_roots(struct mark_space *space) {
|
||||||
static void trace_generational_roots(struct heap *heap) {
|
static void trace_generational_roots(struct heap *heap) {
|
||||||
// TODO: Add lospace nursery.
|
// TODO: Add lospace nursery.
|
||||||
if (atomic_load(&heap->gc_kind) & GC_KIND_FLAG_MINOR) {
|
if (atomic_load(&heap->gc_kind) & GC_KIND_FLAG_MINOR) {
|
||||||
mark_space_trace_generational_roots(heap_mark_space(heap), heap);
|
mark_space_trace_remembered_set(heap_mark_space(heap), heap);
|
||||||
} else {
|
} else {
|
||||||
mark_space_clear_generational_roots(heap_mark_space(heap));
|
mark_space_clear_remembered_set(heap_mark_space(heap));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1137,11 +1171,6 @@ static void reset_sweeper(struct mark_space *space) {
|
||||||
space->next_block = (uintptr_t) &space->slabs[0].blocks;
|
space->next_block = (uintptr_t) &space->slabs[0].blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint64_t broadcast_byte(uint8_t byte) {
|
|
||||||
uint64_t result = byte;
|
|
||||||
return result * 0x0101010101010101ULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void update_mark_patterns(struct mark_space *space,
|
static void update_mark_patterns(struct mark_space *space,
|
||||||
int advance_mark_mask) {
|
int advance_mark_mask) {
|
||||||
uint8_t survivor_mask = space->marked_mask;
|
uint8_t survivor_mask = space->marked_mask;
|
||||||
|
@ -1480,21 +1509,6 @@ static int sweep_word(uintptr_t *loc, uintptr_t sweep_mask) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint64_t load_mark_bytes(uint8_t *mark) {
|
|
||||||
ASSERT(((uintptr_t)mark & 7) == 0);
|
|
||||||
uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
|
|
||||||
uint64_t word;
|
|
||||||
memcpy(&word, aligned_mark, 8);
|
|
||||||
#ifdef WORDS_BIGENDIAN
|
|
||||||
word = __builtin_bswap64(word);
|
|
||||||
#endif
|
|
||||||
return word;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline size_t count_zero_bytes(uint64_t bytes) {
|
|
||||||
return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
|
static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
// If we have a hole, it is likely to be more that 8 granules long.
|
// If we have a hole, it is likely to be more that 8 granules long.
|
||||||
|
@ -1502,7 +1516,7 @@ static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
|
||||||
// sweep pointer, then we load aligned mark words.
|
// sweep pointer, then we load aligned mark words.
|
||||||
size_t unaligned = ((uintptr_t) mark) & 7;
|
size_t unaligned = ((uintptr_t) mark) & 7;
|
||||||
if (unaligned) {
|
if (unaligned) {
|
||||||
uint64_t bytes = load_mark_bytes(mark - unaligned) >> (unaligned * 8);
|
uint64_t bytes = load_eight_aligned_bytes(mark - unaligned) >> (unaligned * 8);
|
||||||
bytes &= sweep_mask;
|
bytes &= sweep_mask;
|
||||||
if (bytes)
|
if (bytes)
|
||||||
return count_zero_bytes(bytes);
|
return count_zero_bytes(bytes);
|
||||||
|
@ -1510,7 +1524,7 @@ static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
|
||||||
}
|
}
|
||||||
|
|
||||||
for(; n < limit; n += 8) {
|
for(; n < limit; n += 8) {
|
||||||
uint64_t bytes = load_mark_bytes(mark + n);
|
uint64_t bytes = load_eight_aligned_bytes(mark + n);
|
||||||
bytes &= sweep_mask;
|
bytes &= sweep_mask;
|
||||||
if (bytes)
|
if (bytes)
|
||||||
return n + count_zero_bytes(bytes);
|
return n + count_zero_bytes(bytes);
|
||||||
|
@ -2014,7 +2028,8 @@ static inline void print_start_gc_stats(struct heap *heap) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void print_end_gc_stats(struct heap *heap) {
|
static inline void print_end_gc_stats(struct heap *heap) {
|
||||||
printf("Completed %ld collections\n", heap->count);
|
printf("Completed %ld collections (%ld major)\n",
|
||||||
|
heap->count, heap->count - heap->minor_count);
|
||||||
printf("Heap size with overhead is %zd (%zu slabs)\n",
|
printf("Heap size with overhead is %zd (%zu slabs)\n",
|
||||||
heap->size, heap_mark_space(heap)->nslabs);
|
heap->size, heap_mark_space(heap)->nslabs);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue