1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-26 21:20:30 +02:00

nofl space: Rework treatment of mark bits to avoid masks

This will allow us to free up some metadata bits.
This commit is contained in:
Andy Wingo 2025-03-05 17:01:51 +01:00
parent 4d271e7492
commit 29cf0f40d3
2 changed files with 100 additions and 44 deletions

View file

@ -150,8 +150,8 @@ struct nofl_block_stack {
#define NOFL_PAGE_OUT_QUEUE_SIZE 4 #define NOFL_PAGE_OUT_QUEUE_SIZE 4
struct nofl_space { struct nofl_space {
uint8_t live_mask; uint8_t current_mark;
uint8_t marked_mask; uint8_t survivor_mark;
uint8_t evacuating; uint8_t evacuating;
struct extents *extents; struct extents *extents;
size_t heap_size; size_t heap_size;
@ -249,10 +249,17 @@ enum nofl_metadata_byte {
}; };
static uint8_t static uint8_t
nofl_rotate_dead_survivor_marked(uint8_t mask) { nofl_advance_current_mark(uint8_t mark) {
uint8_t all = switch (mark) {
NOFL_METADATA_BYTE_MARK_0 | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; case NOFL_METADATA_BYTE_MARK_0:
return ((mask << 1) | (mask >> 2)) & all; return NOFL_METADATA_BYTE_MARK_1;
case NOFL_METADATA_BYTE_MARK_1:
return NOFL_METADATA_BYTE_MARK_2;
case NOFL_METADATA_BYTE_MARK_2:
return NOFL_METADATA_BYTE_MARK_0;
default:
GC_CRASH();
}
} }
static struct gc_lock static struct gc_lock
@ -702,12 +709,23 @@ nofl_allocator_finish_hole(struct nofl_allocator *alloc) {
} }
} }
static inline int
nofl_metadata_byte_has_mark(uint8_t byte, uint8_t marked) {
return (byte & NOFL_METADATA_BYTE_MARK_MASK) == marked;
}
static inline int
nofl_metadata_byte_is_young_or_has_mark(uint8_t byte, uint8_t marked) {
return (nofl_metadata_byte_has_mark(byte, NOFL_METADATA_BYTE_YOUNG)
|| nofl_metadata_byte_has_mark(byte, marked));
}
// Sweep some heap to reclaim free space, advancing alloc->alloc and // Sweep some heap to reclaim free space, advancing alloc->alloc and
// alloc->sweep. Return the size of the hole in granules, or 0 if we // alloc->sweep. Return the size of the hole in granules, or 0 if we
// reached the end of the block. // reached the end of the block.
static size_t static size_t
nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc, nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc,
uint8_t live_mask) { uint8_t survivor_mark) {
GC_ASSERT(nofl_allocator_has_block(alloc)); GC_ASSERT(nofl_allocator_has_block(alloc));
GC_ASSERT_EQ(alloc->alloc, alloc->sweep); GC_ASSERT_EQ(alloc->alloc, alloc->sweep);
uintptr_t sweep = alloc->sweep; uintptr_t sweep = alloc->sweep;
@ -724,7 +742,8 @@ nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc,
// right after a hole, which can point to either the end of the // right after a hole, which can point to either the end of the
// block or to a live object. Assume that a live object is more // block or to a live object. Assume that a live object is more
// common. // common.
while (limit_granules && (metadata[0] & live_mask)) { while (limit_granules &&
nofl_metadata_byte_has_mark(metadata[0], survivor_mark)) {
// Object survived collection; skip over it and continue sweeping. // Object survived collection; skip over it and continue sweeping.
size_t object_granules = nofl_space_live_object_granules(metadata); size_t object_granules = nofl_space_live_object_granules(metadata);
sweep += object_granules * NOFL_GRANULE_SIZE; sweep += object_granules * NOFL_GRANULE_SIZE;
@ -737,8 +756,9 @@ nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc,
return 0; return 0;
} }
size_t hole_granules = scan_for_byte_with_bits(metadata, limit_granules, size_t hole_granules = scan_for_byte_with_tag(metadata, limit_granules,
live_mask); NOFL_METADATA_BYTE_MARK_MASK,
survivor_mark);
size_t free_bytes = hole_granules * NOFL_GRANULE_SIZE; size_t free_bytes = hole_granules * NOFL_GRANULE_SIZE;
GC_ASSERT(hole_granules); GC_ASSERT(hole_granules);
GC_ASSERT(hole_granules <= limit_granules); GC_ASSERT(hole_granules <= limit_granules);
@ -758,10 +778,10 @@ nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc,
static void static void
nofl_allocator_finish_sweeping_in_block(struct nofl_allocator *alloc, nofl_allocator_finish_sweeping_in_block(struct nofl_allocator *alloc,
uint8_t live_mask) { uint8_t survivor_mark) {
do { do {
nofl_allocator_finish_hole(alloc); nofl_allocator_finish_hole(alloc);
} while (nofl_allocator_next_hole_in_block(alloc, live_mask)); } while (nofl_allocator_next_hole_in_block(alloc, survivor_mark));
} }
static void static void
@ -775,7 +795,7 @@ nofl_allocator_release_block(struct nofl_allocator *alloc,
} else if (space->evacuating) { } else if (space->evacuating) {
nofl_allocator_release_full_evacuation_target(alloc, space); nofl_allocator_release_full_evacuation_target(alloc, space);
} else { } else {
nofl_allocator_finish_sweeping_in_block(alloc, space->live_mask); nofl_allocator_finish_sweeping_in_block(alloc, space->survivor_mark);
nofl_allocator_release_full_block(alloc, space); nofl_allocator_release_full_block(alloc, space);
} }
} }
@ -805,7 +825,7 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc,
// Sweep current block for a hole. // Sweep current block for a hole.
if (nofl_allocator_has_block(alloc)) { if (nofl_allocator_has_block(alloc)) {
size_t granules = size_t granules =
nofl_allocator_next_hole_in_block(alloc, space->live_mask); nofl_allocator_next_hole_in_block(alloc, space->survivor_mark);
if (granules) if (granules)
return granules; return granules;
else else
@ -823,7 +843,7 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc,
alloc->block.summary->holes_with_fragmentation = 0; alloc->block.summary->holes_with_fragmentation = 0;
alloc->block.summary->fragmentation_granules = 0; alloc->block.summary->fragmentation_granules = 0;
size_t granules = size_t granules =
nofl_allocator_next_hole_in_block(alloc, space->live_mask); nofl_allocator_next_hole_in_block(alloc, space->survivor_mark);
if (granules) if (granules)
return granules; return granules;
nofl_allocator_release_full_block(alloc, space); nofl_allocator_release_full_block(alloc, space);
@ -1130,16 +1150,6 @@ nofl_space_prepare_evacuation(struct nofl_space *space) {
} }
} }
static void
nofl_space_update_mark_patterns(struct nofl_space *space,
int advance_mark_mask) {
uint8_t survivor_mask = space->marked_mask;
uint8_t next_marked_mask = nofl_rotate_dead_survivor_marked(survivor_mask);
if (advance_mark_mask)
space->marked_mask = next_marked_mask;
space->live_mask = survivor_mask | next_marked_mask;
}
static void static void
nofl_space_clear_block_marks(struct nofl_space *space) { nofl_space_clear_block_marks(struct nofl_space *space) {
for (size_t s = 0; s < space->nslabs; s++) { for (size_t s = 0; s < space->nslabs; s++) {
@ -1152,7 +1162,7 @@ static void
nofl_space_prepare_gc(struct nofl_space *space, enum gc_collection_kind kind) { nofl_space_prepare_gc(struct nofl_space *space, enum gc_collection_kind kind) {
int is_minor = kind == GC_COLLECTION_MINOR; int is_minor = kind == GC_COLLECTION_MINOR;
if (!is_minor) { if (!is_minor) {
nofl_space_update_mark_patterns(space, 1); space->current_mark = nofl_advance_current_mark(space->current_mark);
nofl_space_clear_block_marks(space); nofl_space_clear_block_marks(space);
} }
} }
@ -1209,7 +1219,7 @@ nofl_space_promote_blocks(struct nofl_space *space) {
block.summary->holes_with_fragmentation = 0; block.summary->holes_with_fragmentation = 0;
block.summary->fragmentation_granules = 0; block.summary->fragmentation_granules = 0;
struct nofl_allocator alloc = { block.addr, block.addr, block }; struct nofl_allocator alloc = { block.addr, block.addr, block };
nofl_allocator_finish_sweeping_in_block(&alloc, space->live_mask); nofl_allocator_finish_sweeping_in_block(&alloc, space->current_mark);
atomic_fetch_add(&space->old_generation_granules, atomic_fetch_add(&space->old_generation_granules,
NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules); NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules);
nofl_block_list_push(&space->old, block); nofl_block_list_push(&space->old, block);
@ -1238,7 +1248,7 @@ nofl_space_verify_sweepable_blocks(struct nofl_space *space,
uintptr_t limit = addr + NOFL_BLOCK_SIZE; uintptr_t limit = addr + NOFL_BLOCK_SIZE;
uint8_t *meta = nofl_metadata_byte_for_addr(b.addr); uint8_t *meta = nofl_metadata_byte_for_addr(b.addr);
while (addr < limit) { while (addr < limit) {
if (meta[0] & space->live_mask) { if (nofl_metadata_byte_has_mark(meta[0], space->current_mark)) {
struct gc_ref obj = gc_ref(addr); struct gc_ref obj = gc_ref(addr);
size_t obj_bytes; size_t obj_bytes;
gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes);
@ -1275,8 +1285,7 @@ nofl_space_verify_swept_blocks(struct nofl_space *space,
uint8_t *meta = nofl_metadata_byte_for_addr(addr); uint8_t *meta = nofl_metadata_byte_for_addr(addr);
while (addr < limit) { while (addr < limit) {
if (meta[0]) { if (meta[0]) {
GC_ASSERT(meta[0] & space->marked_mask); GC_ASSERT(nofl_metadata_byte_has_mark(meta[0], space->current_mark));
GC_ASSERT_EQ(meta[0] & ~(space->marked_mask | NOFL_METADATA_BYTE_END), 0);
struct gc_ref obj = gc_ref(addr); struct gc_ref obj = gc_ref(addr);
size_t obj_bytes; size_t obj_bytes;
gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes);
@ -1381,7 +1390,7 @@ nofl_space_finish_gc(struct nofl_space *space,
gc_lock_release(&lock); gc_lock_release(&lock);
nofl_space_promote_blocks(space); nofl_space_promote_blocks(space);
nofl_space_reset_statistics(space); nofl_space_reset_statistics(space);
nofl_space_update_mark_patterns(space, 0); space->survivor_mark = space->current_mark;
if (GC_DEBUG) if (GC_DEBUG)
nofl_space_verify_before_restart(space); nofl_space_verify_before_restart(space);
} }
@ -1426,7 +1435,7 @@ nofl_space_set_mark_relaxed(struct nofl_space *space, uint8_t *metadata,
uint8_t byte) { uint8_t byte) {
uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK; uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK;
atomic_store_explicit(metadata, atomic_store_explicit(metadata,
(byte & ~mask) | space->marked_mask, (byte & ~mask) | space->current_mark,
memory_order_relaxed); memory_order_relaxed);
return 1; return 1;
} }
@ -1435,7 +1444,7 @@ static inline int
nofl_space_set_mark(struct nofl_space *space, uint8_t *metadata, uint8_t byte) { nofl_space_set_mark(struct nofl_space *space, uint8_t *metadata, uint8_t byte) {
uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK; uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK;
atomic_store_explicit(metadata, atomic_store_explicit(metadata,
(byte & ~mask) | space->marked_mask, (byte & ~mask) | space->current_mark,
memory_order_release); memory_order_release);
return 1; return 1;
} }
@ -1515,7 +1524,7 @@ nofl_space_evacuate(struct nofl_space *space, uint8_t *metadata, uint8_t byte,
// First check again if someone else tried to evacuate this object and ended // First check again if someone else tried to evacuate this object and ended
// up marking in place instead. // up marking in place instead.
byte = atomic_load_explicit(metadata, memory_order_acquire); byte = atomic_load_explicit(metadata, memory_order_acquire);
if (byte & space->marked_mask) { if (nofl_metadata_byte_has_mark(byte, space->current_mark)) {
// Indeed, already marked in place. // Indeed, already marked in place.
gc_atomic_forward_abort(&fwd); gc_atomic_forward_abort(&fwd);
return 0; return 0;
@ -1581,7 +1590,7 @@ nofl_space_evacuate_or_mark_object(struct nofl_space *space,
struct nofl_allocator *evacuate) { struct nofl_allocator *evacuate) {
uint8_t *metadata = nofl_metadata_byte_for_object(old_ref); uint8_t *metadata = nofl_metadata_byte_for_object(old_ref);
uint8_t byte = *metadata; uint8_t byte = *metadata;
if (byte & space->marked_mask) if (nofl_metadata_byte_has_mark(byte, space->current_mark))
return 0; return 0;
if (nofl_space_should_evacuate(space, byte, old_ref)) if (nofl_space_should_evacuate(space, byte, old_ref))
@ -1626,7 +1635,7 @@ nofl_space_forward_or_mark_if_traced(struct nofl_space *space,
struct gc_ref ref) { struct gc_ref ref) {
uint8_t *metadata = nofl_metadata_byte_for_object(ref); uint8_t *metadata = nofl_metadata_byte_for_object(ref);
uint8_t byte = *metadata; uint8_t byte = *metadata;
if (byte & space->marked_mask) if (nofl_metadata_byte_has_mark(byte, space->current_mark))
return 1; return 1;
if (!nofl_space_should_evacuate(space, byte, ref)) if (!nofl_space_should_evacuate(space, byte, ref))
@ -1663,13 +1672,12 @@ nofl_space_mark_conservative_ref(struct nofl_space *space,
uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed);
// Already marked object? Nothing to do. // Already marked object? Nothing to do.
if (byte & space->marked_mask) if (nofl_metadata_byte_has_mark(byte, space->current_mark))
return gc_ref_null(); return gc_ref_null();
// Addr is the not start of an unmarked object? Search backwards if // Addr is the not start of an unmarked object? Search backwards if
// we have interior pointers, otherwise not an object. // we have interior pointers, otherwise not an object.
uint8_t object_start_mask = space->live_mask | NOFL_METADATA_BYTE_YOUNG; if (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark)) {
if (!(byte & object_start_mask)) {
if (!possibly_interior) if (!possibly_interior)
return gc_ref_null(); return gc_ref_null();
@ -1685,9 +1693,12 @@ nofl_space_mark_conservative_ref(struct nofl_space *space,
// Ran into the end of some other allocation? Not an object, then. // Ran into the end of some other allocation? Not an object, then.
if (byte & NOFL_METADATA_BYTE_END) if (byte & NOFL_METADATA_BYTE_END)
return gc_ref_null(); return gc_ref_null();
// Object already marked? Nothing to do.
if (nofl_metadata_byte_has_mark(byte, space->current_mark))
return gc_ref_null();
// Continue until we find object start. // Continue until we find object start.
} while (!(byte & object_start_mask)); } while (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark));
// Found object start, and object is unmarked; adjust addr. // Found object start, and object is unmarked; adjust addr.
addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE; addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE;
@ -1842,8 +1853,7 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic,
if (!slabs) if (!slabs)
return 0; return 0;
space->marked_mask = NOFL_METADATA_BYTE_MARK_0; space->current_mark = space->survivor_mark = NOFL_METADATA_BYTE_MARK_0;
nofl_space_update_mark_patterns(space, 0);
space->extents = extents_allocate(10); space->extents = extents_allocate(10);
nofl_space_add_slabs(space, slabs, nslabs); nofl_space_add_slabs(space, slabs, nslabs);
pthread_mutex_init(&space->lock, NULL); pthread_mutex_init(&space->lock, NULL);

View file

@ -31,7 +31,7 @@ match_bytes_against_bits(uint64_t bytes, uint8_t mask) {
return bytes & broadcast_byte(mask); return bytes & broadcast_byte(mask);
} }
static size_t static inline size_t
scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t mask) { scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t mask) {
size_t n = 0; size_t n = 0;
size_t unaligned = ((uintptr_t) ptr) & 7; size_t unaligned = ((uintptr_t) ptr) & 7;
@ -53,6 +53,52 @@ scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t mask) {
return limit; return limit;
} }
static inline uint64_t
match_bytes_against_tag(uint64_t bytes, uint8_t mask, uint8_t tag) {
// Precondition: tag within mask.
GC_ASSERT_EQ(tag & mask, tag);
// Precondition: high bit of mask byte is empty, so that we can add without
// overflow.
GC_ASSERT_EQ(mask & 0x7f, mask);
// Precondition: mask is low bits of byte.
GC_ASSERT(mask);
GC_ASSERT_EQ(mask & (mask + 1), 0);
uint64_t vmask = broadcast_byte(mask);
uint64_t vtest = broadcast_byte(mask + 1);
uint64_t vtag = broadcast_byte(tag);
bytes &= vmask;
uint64_t m = (bytes ^ vtag) + vmask;
return (m & vtest) ^ vtest;
}
static inline size_t
scan_for_byte_with_tag(uint8_t *ptr, size_t limit, uint8_t mask, uint8_t tag) {
// The way we handle unaligned reads by padding high bytes with zeroes assumes
// that all-zeroes is not a matching byte.
GC_ASSERT(tag);
size_t n = 0;
size_t unaligned = ((uintptr_t) ptr) & 7;
if (unaligned) {
uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8);
uint64_t match = match_bytes_against_tag(bytes, mask, tag);
if (match)
return count_zero_bytes(match);
n += 8 - unaligned;
}
for(; n < limit; n += 8) {
uint64_t bytes = load_eight_aligned_bytes(ptr + n);
uint64_t match = match_bytes_against_tag(bytes, mask, tag);
if (match)
return n + count_zero_bytes(match);
}
return limit;
}
static inline uint64_t static inline uint64_t
match_bytes_against_2_tags(uint64_t bytes, uint8_t mask, uint8_t tag1, match_bytes_against_2_tags(uint64_t bytes, uint8_t mask, uint8_t tag1,
uint8_t tag2) uint8_t tag2)
@ -78,7 +124,7 @@ match_bytes_against_2_tags(uint64_t bytes, uint8_t mask, uint8_t tag1,
return ((m1 & m2) & vtest) ^ vtest; return ((m1 & m2) & vtest) ^ vtest;
} }
static size_t static inline size_t
scan_for_byte_with_tags(uint8_t *ptr, size_t limit, uint8_t mask, scan_for_byte_with_tags(uint8_t *ptr, size_t limit, uint8_t mask,
uint8_t tag1, uint8_t tag2) { uint8_t tag1, uint8_t tag2) {
// The way we handle unaligned reads by padding high bytes with zeroes assumes // The way we handle unaligned reads by padding high bytes with zeroes assumes