From d22eb889482f3022eaeaab17bd67123d0e2f9990 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 7 Mar 2025 12:54:29 +0100 Subject: [PATCH] nofl space / mmc supports untagged allocations --- api/mmc-attrs.h | 33 +++++++++++- src/gc-trace.h | 12 +++++ src/large-object-space.h | 28 +++++++++-- src/mmc.c | 106 ++++++++++++++++++++++++++------------- src/nofl-space.h | 79 +++++++++++++++++++++++------ src/pcc.c | 2 +- src/semi.c | 2 +- 7 files changed, 203 insertions(+), 59 deletions(-) diff --git a/api/mmc-attrs.h b/api/mmc-attrs.h index 3241677dd..9371f8abe 100644 --- a/api/mmc-attrs.h +++ b/api/mmc-attrs.h @@ -31,10 +31,39 @@ static inline size_t gc_allocator_alloc_table_alignment(void) { return 4 * 1024 * 1024; } static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) { - return 1; + uint8_t young = 1; + uint8_t trace_precisely = 0; + uint8_t trace_none = 8; + uint8_t trace_conservatively = 16; + uint8_t pinned = 16; + if (GC_CONSERVATIVE_TRACE) { + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + return young | trace_conservatively; + case GC_ALLOCATION_TAGGED_POINTERLESS: + return young | trace_none; + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return young | trace_none; + default: + GC_CRASH(); + }; + } else { + switch (kind) { + case GC_ALLOCATION_TAGGED: + return young | trace_precisely; + case GC_ALLOCATION_TAGGED_POINTERLESS: + return young | trace_none; + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return young | trace_none | pinned; + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + default: + GC_CRASH(); + }; + } } static inline uint8_t gc_allocator_alloc_table_end_pattern(void) { - return 16; + return 32; } static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) { diff --git a/src/gc-trace.h b/src/gc-trace.h index b9e4691e8..cc1dd2808 100644 --- a/src/gc-trace.h +++ b/src/gc-trace.h @@ -28,6 +28,18 @@ static inline int gc_has_conservative_roots(void) { gc_has_global_conservative_roots(); } +enum gc_trace_kind { + GC_TRACE_PRECISELY, + GC_TRACE_NONE, + GC_TRACE_CONSERVATIVELY, + GC_TRACE_EPHEMERON, +}; + +struct gc_trace_plan { + enum gc_trace_kind kind; + size_t size; // For conservative tracing. +}; + static inline int gc_conservative_ref_might_be_a_heap_object(struct gc_conservative_ref ref, int possibly_interior) { diff --git a/src/large-object-space.h b/src/large-object-space.h index 887cdef0e..cdd798343 100644 --- a/src/large-object-space.h +++ b/src/large-object-space.h @@ -11,6 +11,7 @@ #include "gc-assert.h" #include "gc-ref.h" #include "gc-conservative-ref.h" +#include "gc-trace.h" #include "address-map.h" #include "address-set.h" #include "background-thread.h" @@ -35,6 +36,7 @@ struct large_object { struct large_object_node; struct large_object_live_data { uint8_t mark; + enum gc_trace_kind trace; }; struct large_object_dead_data { uint8_t age; @@ -166,14 +168,27 @@ large_object_space_start_gc(struct large_object_space *space, int is_minor_gc) { } } -static inline size_t -large_object_space_object_size(struct large_object_space *space, - struct gc_ref ref) { +static inline struct gc_trace_plan +large_object_space_object_trace_plan(struct large_object_space *space, + struct gc_ref ref) { uintptr_t node_bits = address_map_lookup(&space->object_map, gc_ref_value(ref), 0); GC_ASSERT(node_bits); struct large_object_node *node = (struct large_object_node*) node_bits; - return node->key.size; + switch (node->value.live.trace) { + case GC_TRACE_PRECISELY: + return (struct gc_trace_plan){ GC_TRACE_PRECISELY, }; + case GC_TRACE_NONE: + return (struct gc_trace_plan){ GC_TRACE_NONE, }; +#if GC_CONSERVATIVE_TRACE + case GC_TRACE_CONSERVATIVELY: { + return (struct gc_trace_plan){ GC_TRACE_CONSERVATIVELY, node->key.size }; + } + // No large ephemerons. +#endif + default: + GC_CRASH(); + } } static uint8_t* @@ -402,7 +417,8 @@ large_object_space_mark_conservative_ref(struct large_object_space *space, } static void* -large_object_space_alloc(struct large_object_space *space, size_t npages) { +large_object_space_alloc(struct large_object_space *space, size_t npages, + enum gc_trace_kind trace) { void *ret = NULL; pthread_mutex_lock(&space->lock); @@ -422,6 +438,7 @@ large_object_space_alloc(struct large_object_space *space, size_t npages) { node->value.is_live = 1; memset(&node->value.live, 0, sizeof(node->value.live)); node->value.live.mark = LARGE_OBJECT_NURSERY; + node->value.live.trace = trace; // If the hole is actually too big, trim its tail. if (node->key.size > size) { @@ -458,6 +475,7 @@ large_object_space_alloc(struct large_object_space *space, size_t npages) { struct large_object_data v = {0,}; v.is_live = 1; v.live.mark = LARGE_OBJECT_NURSERY; + v.live.trace = trace; pthread_mutex_lock(&space->object_tree_lock); struct large_object_node *node = diff --git a/src/mmc.c b/src/mmc.c index d5716558f..081d7b83a 100644 --- a/src/mmc.c +++ b/src/mmc.c @@ -332,37 +332,41 @@ trace_conservative_edges(uintptr_t low, uintptr_t high, int possibly_interior, possibly_interior); } -static inline void -trace_one_conservatively(struct gc_ref ref, struct gc_heap *heap, - struct gc_trace_worker *worker) { - size_t bytes; +static inline struct gc_trace_plan +trace_plan(struct gc_heap *heap, struct gc_ref ref) { if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref))) { - // Generally speaking we trace conservatively and don't allow much - // in the way of incremental precise marking on a - // conservative-by-default heap. But, we make an exception for - // ephemerons. - if (GC_UNLIKELY(nofl_is_ephemeron(ref))) { - gc_trace_ephemeron(gc_ref_heap_object(ref), tracer_visit, heap, - worker); - return; - } - bytes = nofl_space_object_size(heap_nofl_space(heap), ref); + return nofl_space_object_trace_plan(heap_nofl_space(heap), ref); } else { - bytes = large_object_space_object_size(heap_large_object_space(heap), ref); + return large_object_space_object_trace_plan(heap_large_object_space(heap), + ref); } - // Intraheap edges are not interior. - int possibly_interior = 0; - trace_conservative_edges(gc_ref_value(ref), gc_ref_value(ref) + bytes, - possibly_interior, heap, worker); } static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, struct gc_trace_worker *worker) { - if (gc_has_conservative_intraheap_edges()) - trace_one_conservatively(ref, heap, worker); - else - gc_trace_object(ref, tracer_visit, heap, worker, NULL); + struct gc_trace_plan plan = trace_plan(heap, ref); + switch (plan.kind) { + case GC_TRACE_PRECISELY: + gc_trace_object(ref, tracer_visit, heap, worker, NULL); + break; + case GC_TRACE_NONE: + break; + case GC_TRACE_CONSERVATIVELY: { + // Intraheap edges are not interior. + uintptr_t addr = gc_ref_value(ref); + int possibly_interior = 0; + trace_conservative_edges(addr, addr + plan.size, possibly_interior, + heap, worker); + break; + } + case GC_TRACE_EPHEMERON: + gc_trace_ephemeron(gc_ref_heap_object(ref), tracer_visit, heap, + worker); + break; + default: + GC_CRASH(); + } } static inline void @@ -860,8 +864,36 @@ gc_safepoint_slow(struct gc_mutator *mut) { heap_unlock(heap); } +static enum gc_trace_kind +compute_trace_kind(enum gc_allocation_kind kind) { + if (GC_CONSERVATIVE_TRACE) { + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + return GC_TRACE_CONSERVATIVELY; + case GC_ALLOCATION_TAGGED_POINTERLESS: + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return GC_TRACE_NONE; + default: + GC_CRASH(); + }; + } else { + switch (kind) { + case GC_ALLOCATION_TAGGED: + return GC_TRACE_PRECISELY; + case GC_ALLOCATION_TAGGED_POINTERLESS: + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return GC_TRACE_NONE; + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + default: + GC_CRASH(); + }; + } +} + static void* -allocate_large(struct gc_mutator *mut, size_t size) { +allocate_large(struct gc_mutator *mut, size_t size, + enum gc_trace_kind kind) { struct gc_heap *heap = mutator_heap(mut); struct nofl_space *nofl_space = heap_nofl_space(heap); struct large_object_space *lospace = heap_large_object_space(heap); @@ -875,7 +907,7 @@ allocate_large(struct gc_mutator *mut, size_t size) { trigger_collection(mut, GC_COLLECTION_COMPACTING, 0); atomic_fetch_add(&heap->large_object_pages, npages); - void *ret = large_object_space_alloc(lospace, npages); + void *ret = large_object_space_alloc(lospace, npages, kind); if (!ret) { perror("weird: we have the space but mmap didn't work"); @@ -893,17 +925,10 @@ collect_for_small_allocation(void *mut) { void* gc_allocate_slow(struct gc_mutator *mut, size_t size, enum gc_allocation_kind kind) { - if (GC_UNLIKELY(kind != GC_ALLOCATION_TAGGED - && kind != GC_ALLOCATION_TAGGED_POINTERLESS)) { - fprintf(stderr, "mmc collector cannot make allocations of kind %d\n", - (int)kind); - GC_CRASH(); - } - GC_ASSERT(size > 0); // allocating 0 bytes would be silly if (size > gc_allocator_large_threshold()) - return allocate_large(mut, size); + return allocate_large(mut, size, compute_trace_kind(kind)); return gc_ref_heap_object(nofl_allocate(&mut->allocator, heap_nofl_space(mutator_heap(mut)), @@ -1121,7 +1146,20 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), offsetof(struct nofl_allocator, sweep)); GC_ASSERT_EQ(gc_allocator_alloc_table_alignment(), NOFL_SLAB_SIZE); - GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), NOFL_METADATA_BYTE_YOUNG); + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_TAGGED), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_PRECISELY); + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_TAGGED_POINTERLESS), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_NONE); + if (GC_CONSERVATIVE_TRACE) { + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_UNTAGGED_CONSERVATIVE), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_CONSERVATIVELY); + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_UNTAGGED_POINTERLESS), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_NONE); + } else { + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_UNTAGGED_POINTERLESS), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_NONE | + NOFL_METADATA_BYTE_PINNED); + } GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END); if (GC_GENERATIONAL) { GC_ASSERT_EQ(gc_write_barrier_field_table_alignment(), NOFL_SLAB_SIZE); diff --git a/src/nofl-space.h b/src/nofl-space.h index 2ad64dc4f..9a7f29304 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -183,6 +183,11 @@ struct nofl_allocator { struct nofl_block_ref block; }; +#if GC_CONSERVATIVE_TRACE && GC_CONCURRENT_TRACE +// There are just not enough bits in the mark table. +#error Unsupported configuration +#endif + // Each granule has one mark byte stored in a side table. A granule's // mark state is a whole byte instead of a bit to facilitate parallel // marking. (Parallel markers are allowed to race.) We also use this @@ -236,32 +241,32 @@ enum nofl_metadata_byte { NOFL_METADATA_BYTE_YOUNG = 1, NOFL_METADATA_BYTE_MARK_0 = 2, NOFL_METADATA_BYTE_MARK_1 = 3, -#if GC_CONCURRENT_TRACE NOFL_METADATA_BYTE_MARK_2 = 4, NOFL_METADATA_BYTE_MARK_MASK = 7, - /* NOFL_METADATA_BYTE_UNUSED_0 = 8, */ -#else - NOFL_METADATA_BYTE_MARK_MASK = 3, - /* NOFL_METADATA_BYTE_UNUSED_0 = 4, */ - /* NOFL_METADATA_BYTE_UNUSED_1 = 8, */ -#endif - NOFL_METADATA_BYTE_END = 16, - NOFL_METADATA_BYTE_PINNED = 32, + NOFL_METADATA_BYTE_TRACE_PRECISELY = 0, + NOFL_METADATA_BYTE_TRACE_NONE = 8, + NOFL_METADATA_BYTE_TRACE_CONSERVATIVELY = 16, + NOFL_METADATA_BYTE_TRACE_EPHEMERON = 24, + NOFL_METADATA_BYTE_TRACE_KIND_MASK = 0|8|16|24, + NOFL_METADATA_BYTE_PINNED = 16, + NOFL_METADATA_BYTE_END = 32, NOFL_METADATA_BYTE_LOGGED_0 = 64, NOFL_METADATA_BYTE_LOGGED_1 = 128, - NOFL_METADATA_BYTE_EPHEMERON = NOFL_METADATA_BYTE_PINNED, }; +STATIC_ASSERT_EQ(0, + NOFL_METADATA_BYTE_TRACE_PRECISELY&NOFL_METADATA_BYTE_PINNED); +STATIC_ASSERT_EQ(0, + NOFL_METADATA_BYTE_TRACE_NONE&NOFL_METADATA_BYTE_PINNED); + static uint8_t nofl_advance_current_mark(uint8_t mark) { switch (mark) { case NOFL_METADATA_BYTE_MARK_0: return NOFL_METADATA_BYTE_MARK_1; case NOFL_METADATA_BYTE_MARK_1: -#if GC_CONCURRENT_TRACE return NOFL_METADATA_BYTE_MARK_2; case NOFL_METADATA_BYTE_MARK_2: -#endif return NOFL_METADATA_BYTE_MARK_0; default: GC_CRASH(); @@ -925,14 +930,16 @@ nofl_finish_sweeping(struct nofl_allocator *alloc, static inline int nofl_is_ephemeron(struct gc_ref ref) { uint8_t meta = *nofl_metadata_byte_for_addr(gc_ref_value(ref)); - return meta & NOFL_METADATA_BYTE_EPHEMERON; + uint8_t kind = meta & NOFL_METADATA_BYTE_TRACE_KIND_MASK; + return kind == NOFL_METADATA_BYTE_TRACE_EPHEMERON; } static void nofl_space_set_ephemeron_flag(struct gc_ref ref) { if (gc_has_conservative_intraheap_edges()) { uint8_t *metadata = nofl_metadata_byte_for_addr(gc_ref_value(ref)); - *metadata |= NOFL_METADATA_BYTE_EPHEMERON; + uint8_t byte = *metadata & ~NOFL_METADATA_BYTE_TRACE_KIND_MASK; + *metadata = byte | NOFL_METADATA_BYTE_TRACE_EPHEMERON; } } @@ -1465,8 +1472,8 @@ nofl_space_set_nonempty_mark(struct nofl_space *space, uint8_t *metadata, static inline void nofl_space_pin_object(struct nofl_space *space, struct gc_ref ref) { - // For the heap-conservative configuration, all objects are pinned, - // and we re-use the pinned bit to identify ephemerons. + // For the heap-conservative configuration, all objects are pinned, and we use + // the pinned bit instead to identify an object's trace kind. if (gc_has_conservative_intraheap_edges()) return; uint8_t *metadata = nofl_metadata_byte_for_object(ref); @@ -1721,6 +1728,46 @@ nofl_space_object_size(struct nofl_space *space, struct gc_ref ref) { return granules * NOFL_GRANULE_SIZE; } +static inline enum gc_trace_kind +nofl_metadata_byte_trace_kind(uint8_t byte) +{ + switch (byte & NOFL_METADATA_BYTE_TRACE_KIND_MASK) { + case NOFL_METADATA_BYTE_TRACE_PRECISELY: + return GC_TRACE_PRECISELY; + case NOFL_METADATA_BYTE_TRACE_NONE: + return GC_TRACE_NONE; +#if GC_CONSERVATIVE_TRACE + case NOFL_METADATA_BYTE_TRACE_CONSERVATIVELY: + return GC_TRACE_CONSERVATIVELY; + case NOFL_METADATA_BYTE_TRACE_EPHEMERON: + return GC_TRACE_EPHEMERON; +#endif + default: + GC_CRASH(); + } +} +static inline struct gc_trace_plan +nofl_space_object_trace_plan(struct nofl_space *space, struct gc_ref ref) { + uint8_t *loc = nofl_metadata_byte_for_object(ref); + uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); + enum gc_trace_kind kind = nofl_metadata_byte_trace_kind(byte); + switch (kind) { + case GC_TRACE_PRECISELY: + case GC_TRACE_NONE: + return (struct gc_trace_plan){ kind, }; +#if GC_CONSERVATIVE_TRACE + case GC_TRACE_CONSERVATIVELY: { + size_t granules = nofl_space_live_object_granules(loc); + return (struct gc_trace_plan){ kind, granules * NOFL_GRANULE_SIZE }; + } + case GC_TRACE_EPHEMERON: + return (struct gc_trace_plan){ kind, }; +#endif + default: + GC_CRASH(); + } +} + static struct nofl_slab* nofl_allocate_slabs(size_t nslabs) { return gc_platform_acquire_memory(nslabs * NOFL_SLAB_SIZE, NOFL_SLAB_SIZE); diff --git a/src/pcc.c b/src/pcc.c index e4c4e37e3..b605eb762 100644 --- a/src/pcc.c +++ b/src/pcc.c @@ -964,7 +964,7 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) { trigger_collection(mut, GC_COLLECTION_COMPACTING); atomic_fetch_add(&heap->large_object_pages, npages); - void *ret = large_object_space_alloc(space, npages); + void *ret = large_object_space_alloc(space, npages, GC_TRACE_PRECISELY); if (!ret) { perror("weird: we have the space but mmap didn't work"); diff --git a/src/semi.c b/src/semi.c index b1abee836..0626e02b0 100644 --- a/src/semi.c +++ b/src/semi.c @@ -495,7 +495,7 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) { while (!semi_space_steal_pages(semi_space, npages)) collect_for_large_alloc(mut, npages); - void *ret = large_object_space_alloc(space, npages); + void *ret = large_object_space_alloc(space, npages, GC_TRACE_PRECISELY); if (!ret) { perror("weird: we have the space but mmap didn't work");