From 9f437485ec1f49ee4369f9aeaf4dd324d71c4b26 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 9 Sep 2024 15:02:12 +0200 Subject: [PATCH] MMC marks roots in parallel during the pause, not while stopping Following the analysis in https://wingolog.org/archives/2024/09/06/on-taking-advantage-of-ragged-stops, we simplify MMC by traversing roots only during the pause. This lets us use gc_tracer parallel root-tracing. --- src/mmc.c | 551 +++++++++++------------------------------- src/nofl-space.h | 52 ++-- src/parallel-tracer.h | 114 ++++++--- src/root.h | 39 ++- src/serial-tracer.h | 35 +-- src/tracer.h | 13 +- 6 files changed, 312 insertions(+), 492 deletions(-) diff --git a/src/mmc.c b/src/mmc.c index 4c3cb9dc8..e6335dced 100644 --- a/src/mmc.c +++ b/src/mmc.c @@ -40,7 +40,6 @@ struct gc_heap { pthread_cond_t mutator_cond; size_t size; int collecting; - int mark_while_stopping; int check_pending_ephemerons; struct gc_pending_ephemerons *pending_ephemerons; struct gc_finalizer_state *finalizer_state; @@ -50,10 +49,9 @@ struct gc_heap { size_t paused_mutator_count; size_t inactive_mutator_count; struct gc_heap_roots *roots; - struct gc_mutator *mutator_trace_list; + struct gc_mutator *mutators; long count; uint8_t last_collection_was_minor; - struct gc_mutator *inactive_mutators; struct gc_tracer tracer; double fragmentation_low_threshold; double fragmentation_high_threshold; @@ -71,24 +69,14 @@ struct gc_heap { #define MUTATOR_EVENT(mut, event, ...) \ (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__) -struct gc_mutator_mark_buf { - size_t size; - size_t capacity; - struct gc_ref *objects; -}; - struct gc_mutator { struct nofl_allocator allocator; struct gc_heap *heap; struct gc_stack stack; struct gc_mutator_roots *roots; - struct gc_mutator_mark_buf mark_buf; void *event_listener_data; - // Three uses for this in-object linked-list pointer: - // - inactive (blocked in syscall) mutators - // - grey objects when stopping active mutators for mark-in-place - // - untraced mutators when stopping active mutators for evacuation struct gc_mutator *next; + struct gc_mutator *prev; }; struct gc_trace_worker_data { @@ -126,9 +114,6 @@ gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, nofl_allocator_finish(&data.allocator, heap_nofl_space(heap)); } -static void collect(struct gc_mutator *mut, - enum gc_collection_kind requested_kind) GC_NEVER_INLINE; - static inline int do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref, struct gc_trace_worker *worker) { @@ -180,34 +165,6 @@ gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { GC_CRASH(); } -static inline struct gc_ref -do_trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref, - int possibly_interior) { - if (!gc_conservative_ref_might_be_a_heap_object(ref, possibly_interior)) - return gc_ref_null(); - - struct nofl_space *nofl_space = heap_nofl_space(heap); - if (GC_LIKELY(nofl_space_contains_conservative_ref(nofl_space, ref))) - return nofl_space_mark_conservative_ref(nofl_space, ref, possibly_interior); - - struct large_object_space *lospace = heap_large_object_space(heap); - return large_object_space_mark_conservative_ref(lospace, ref, - possibly_interior); -} - -static inline struct gc_ref -trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref, - int possibly_interior) { - struct gc_ref ret = do_trace_conservative_ref(heap, ref, possibly_interior); - - if (gc_ref_is_heap_object(ret) && - GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons, - memory_order_relaxed))) - gc_resolve_pending_ephemerons(ret, heap); - - return ret; -} - static int mutators_are_stopping(struct gc_heap *heap) { return atomic_load_explicit(&heap->collecting, memory_order_relaxed); @@ -241,6 +198,13 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { pthread_cond_wait(&heap->mutator_cond, &heap->lock); if (heap->mutator_count == 1) heap->multithreaded = 1; + mut->next = mut->prev = NULL; + struct gc_mutator *tail = heap->mutators; + if (tail) { + mut->next = tail; + tail->prev = mut; + } + heap->mutators = mut; heap->mutator_count++; heap_unlock(heap); } @@ -252,6 +216,12 @@ remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { mut->heap = NULL; heap_lock(heap); heap->mutator_count--; + if (mut->next) + mut->next->prev = mut->prev; + if (mut->prev) + mut->prev->next = mut->next; + else + heap->mutators = mut->next; // We have no roots. If there is a GC stop currently in progress, // maybe tell the controller it can continue. if (mutators_are_stopping(heap) && all_mutators_stopped(heap)) @@ -285,72 +255,6 @@ heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) { nofl_space_reacquire_memory(heap_nofl_space(heap), bytes); } -static void -mutator_mark_buf_grow(struct gc_mutator_mark_buf *buf) { - size_t old_capacity = buf->capacity; - size_t old_bytes = old_capacity * sizeof(struct gc_ref); - - size_t new_bytes = old_bytes ? old_bytes * 2 : getpagesize(); - size_t new_capacity = new_bytes / sizeof(struct gc_ref); - - void *mem = mmap(NULL, new_bytes, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { - perror("allocating mutator mark buffer failed"); - GC_CRASH(); - } - if (old_bytes) { - memcpy(mem, buf->objects, old_bytes); - munmap(buf->objects, old_bytes); - } - buf->objects = mem; - buf->capacity = new_capacity; -} - -static void -mutator_mark_buf_push(struct gc_mutator_mark_buf *buf, struct gc_ref ref) { - if (GC_UNLIKELY(buf->size == buf->capacity)) - mutator_mark_buf_grow(buf); - buf->objects[buf->size++] = ref; -} - -static void -mutator_mark_buf_release(struct gc_mutator_mark_buf *buf) { - size_t bytes = buf->size * sizeof(struct gc_ref); - if (bytes >= getpagesize()) - madvise(buf->objects, align_up(bytes, getpagesize()), MADV_DONTNEED); - buf->size = 0; -} - -static void -mutator_mark_buf_destroy(struct gc_mutator_mark_buf *buf) { - size_t bytes = buf->capacity * sizeof(struct gc_ref); - if (bytes) - munmap(buf->objects, bytes); -} - -static void -enqueue_mutator_for_tracing(struct gc_mutator *mut) { - struct gc_heap *heap = mutator_heap(mut); - GC_ASSERT(mut->next == NULL); - struct gc_mutator *next = - atomic_load_explicit(&heap->mutator_trace_list, memory_order_acquire); - do { - mut->next = next; - } while (!atomic_compare_exchange_weak(&heap->mutator_trace_list, - &next, mut)); -} - -static int -heap_should_mark_while_stopping(struct gc_heap *heap) { - return atomic_load_explicit(&heap->mark_while_stopping, memory_order_acquire); -} - -static int -mutator_should_mark_while_stopping(struct gc_mutator *mut) { - return heap_should_mark_while_stopping(mutator_heap(mut)); -} - void gc_mutator_set_roots(struct gc_mutator *mut, struct gc_mutator_roots *roots) { mut->roots = roots; @@ -373,68 +277,42 @@ tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { gc_trace_worker_enqueue(worker, gc_edge_ref(edge)); } -static void -trace_and_enqueue_locally(struct gc_edge edge, struct gc_heap *heap, - void *data) { - struct gc_mutator *mut = data; - if (trace_edge(heap, edge, NULL)) - mutator_mark_buf_push(&mut->mark_buf, gc_edge_ref(edge)); +static inline struct gc_ref +do_trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref, + int possibly_interior) { + if (!gc_conservative_ref_might_be_a_heap_object(ref, possibly_interior)) + return gc_ref_null(); + + struct nofl_space *nofl_space = heap_nofl_space(heap); + if (GC_LIKELY(nofl_space_contains_conservative_ref(nofl_space, ref))) + return nofl_space_mark_conservative_ref(nofl_space, ref, possibly_interior); + + struct large_object_space *lospace = heap_large_object_space(heap); + return large_object_space_mark_conservative_ref(lospace, ref, + possibly_interior); +} + +static inline struct gc_ref +trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref, + int possibly_interior) { + struct gc_ref ret = do_trace_conservative_ref(heap, ref, possibly_interior); + + if (gc_ref_is_heap_object(ret) && + GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons, + memory_order_relaxed))) + gc_resolve_pending_ephemerons(ret, heap); + + return ret; } static inline void -do_trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref, - struct gc_heap *heap, - void *data, - int possibly_interior) { - struct gc_mutator *mut = data; - struct gc_ref object = trace_conservative_ref(heap, ref, possibly_interior); - if (gc_ref_is_heap_object(object)) - mutator_mark_buf_push(&mut->mark_buf, object); -} - -static void -trace_possibly_interior_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref, - struct gc_heap *heap, - void *data) { - return do_trace_conservative_ref_and_enqueue_locally(ref, heap, data, 1); -} - -static void -trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref, - struct gc_heap *heap, - void *data) { - return do_trace_conservative_ref_and_enqueue_locally(ref, heap, data, 0); -} - -static void -trace_and_enqueue_globally(struct gc_edge edge, struct gc_heap *heap, - void *unused) { - if (trace_edge(heap, edge, NULL)) - gc_tracer_enqueue_root(&heap->tracer, gc_edge_ref(edge)); -} - -static inline void -do_trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref, - struct gc_heap *heap, - void *data, - int possibly_interior) { - struct gc_ref object = trace_conservative_ref(heap, ref, possibly_interior); - if (gc_ref_is_heap_object(object)) - gc_tracer_enqueue_root(&heap->tracer, object); -} - -static void -trace_possibly_interior_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref, - struct gc_heap *heap, - void *data) { - return do_trace_conservative_ref_and_enqueue_globally(ref, heap, data, 1); -} - -static void -trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref, - struct gc_heap *heap, - void *data) { - return do_trace_conservative_ref_and_enqueue_globally(ref, heap, data, 0); +tracer_trace_conservative_ref(struct gc_conservative_ref ref, + struct gc_heap *heap, + struct gc_trace_worker *worker, + int possibly_interior) { + struct gc_ref resolved = trace_conservative_ref(heap, ref, possibly_interior); + if (gc_ref_is_heap_object(resolved)) + gc_trace_worker_enqueue(worker, resolved); } static inline struct gc_conservative_ref @@ -446,26 +324,13 @@ load_conservative_ref(uintptr_t addr) { } static inline void -trace_conservative_edges(uintptr_t low, - uintptr_t high, - void (*trace)(struct gc_conservative_ref, - struct gc_heap *, void *), - struct gc_heap *heap, - void *data) { +trace_conservative_edges(uintptr_t low, uintptr_t high, int possibly_interior, + struct gc_heap *heap, struct gc_trace_worker *worker) { GC_ASSERT(low == align_down(low, sizeof(uintptr_t))); GC_ASSERT(high == align_down(high, sizeof(uintptr_t))); for (uintptr_t addr = low; addr < high; addr += sizeof(uintptr_t)) - trace(load_conservative_ref(addr), heap, data); -} - -static inline void -tracer_trace_conservative_ref(struct gc_conservative_ref ref, - struct gc_heap *heap, void *data) { - struct gc_trace_worker *worker = data; - int possibly_interior = 0; - struct gc_ref resolved = trace_conservative_ref(heap, ref, possibly_interior); - if (gc_ref_is_heap_object(resolved)) - gc_trace_worker_enqueue(worker, resolved); + tracer_trace_conservative_ref(load_conservative_ref(addr), heap, worker, + possibly_interior); } static inline void @@ -486,10 +351,10 @@ trace_one_conservatively(struct gc_ref ref, struct gc_heap *heap, } else { bytes = large_object_space_object_size(heap_large_object_space(heap), ref); } - trace_conservative_edges(gc_ref_value(ref), - gc_ref_value(ref) + bytes, - tracer_trace_conservative_ref, heap, - worker); + // Intraheap edges are not interior. + int possibly_interior = 0; + trace_conservative_edges(gc_ref_value(ref), gc_ref_value(ref) + bytes, + possibly_interior, heap, worker); } static inline void @@ -511,6 +376,14 @@ trace_root(struct gc_root root, struct gc_heap *heap, case GC_ROOT_KIND_MUTATOR: gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker); break; + case GC_ROOT_KIND_CONSERVATIVE_EDGES: + trace_conservative_edges(root.range.lo_addr, root.range.hi_addr, 0, + heap, worker); + break; + case GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES: + trace_conservative_edges(root.range.lo_addr, root.range.hi_addr, 1, + heap, worker); + break; case GC_ROOT_KIND_RESOLVED_EPHEMERONS: gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit, heap, worker); @@ -518,101 +391,52 @@ trace_root(struct gc_root root, struct gc_heap *heap, case GC_ROOT_KIND_EDGE: tracer_visit(root.edge, heap, worker); break; + case GC_ROOT_KIND_REMEMBERED_OBJECT: + trace_one(root.ref, heap, worker); + break; + case GC_ROOT_KIND_REMEMBERED_SLAB: + nofl_space_trace_remembered_slab(heap_nofl_space(heap), root.idx, + trace_one, heap, worker); + break; default: GC_CRASH(); } } static void -visit_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) { - gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); +enqueue_conservative_roots(uintptr_t low, uintptr_t high, + struct gc_heap *heap, void *data) { + int *possibly_interior = data; + gc_tracer_add_root(&heap->tracer, + gc_root_conservative_edges(low, high, *possibly_interior)); } static void -mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low, - uintptr_t high, - struct gc_heap *heap, - void *data) { - trace_conservative_edges(low, high, - gc_mutator_conservative_roots_may_be_interior() - ? trace_possibly_interior_conservative_ref_and_enqueue_globally - : trace_conservative_ref_and_enqueue_globally, - heap, data); +enqueue_mutator_conservative_roots(struct gc_heap *heap) { + if (gc_has_mutator_conservative_roots()) { + int possibly_interior = gc_mutator_conservative_roots_may_be_interior(); + for (struct gc_mutator *mut = heap->mutators; + mut; + mut = mut->next) + gc_stack_visit(&mut->stack, enqueue_conservative_roots, heap, + &possibly_interior); + } } static void -mark_and_globally_enqueue_heap_conservative_roots(uintptr_t low, - uintptr_t high, - struct gc_heap *heap, - void *data) { - trace_conservative_edges(low, high, - trace_conservative_ref_and_enqueue_globally, - heap, data); +enqueue_global_conservative_roots(struct gc_heap *heap) { + if (gc_has_global_conservative_roots()) { + int possibly_interior = 0; + gc_platform_visit_global_conservative_roots + (enqueue_conservative_roots, heap, &possibly_interior); + } } static void -mark_and_locally_enqueue_mutator_conservative_roots(uintptr_t low, - uintptr_t high, - struct gc_heap *heap, - void *data) { - trace_conservative_edges(low, high, - gc_mutator_conservative_roots_may_be_interior() - ? trace_possibly_interior_conservative_ref_and_enqueue_locally - : trace_conservative_ref_and_enqueue_locally, - heap, data); -} - -static inline void -trace_mutator_conservative_roots(struct gc_mutator *mut, - void (*trace_range)(uintptr_t low, - uintptr_t high, - struct gc_heap *heap, - void *data), - struct gc_heap *heap, - void *data) { - if (gc_has_mutator_conservative_roots()) - gc_stack_visit(&mut->stack, trace_range, heap, data); -} - -// Mark the roots of a mutator that is stopping for GC. We can't -// enqueue them directly, so we send them to the controller in a buffer. -static void -trace_stopping_mutator_roots(struct gc_mutator *mut) { - GC_ASSERT(mutator_should_mark_while_stopping(mut)); - struct gc_heap *heap = mutator_heap(mut); - trace_mutator_conservative_roots(mut, - mark_and_locally_enqueue_mutator_conservative_roots, - heap, mut); - gc_trace_mutator_roots(mut->roots, trace_and_enqueue_locally, heap, mut); -} - -static void -trace_mutator_conservative_roots_with_lock(struct gc_mutator *mut) { - trace_mutator_conservative_roots(mut, - mark_and_globally_enqueue_mutator_conservative_roots, - mutator_heap(mut), - NULL); -} - -static void -trace_mutator_roots_with_lock(struct gc_mutator *mut) { - trace_mutator_conservative_roots_with_lock(mut); - gc_trace_mutator_roots(mut->roots, trace_and_enqueue_globally, - mutator_heap(mut), NULL); -} - -static void -trace_mutator_roots_with_lock_before_stop(struct gc_mutator *mut) { - gc_stack_capture_hot(&mut->stack); - if (mutator_should_mark_while_stopping(mut)) - trace_mutator_roots_with_lock(mut); - else - enqueue_mutator_for_tracing(mut); -} - -static void -release_stopping_mutator_roots(struct gc_mutator *mut) { - mutator_mark_buf_release(&mut->mark_buf); +enqueue_pinned_roots(struct gc_heap *heap) { + GC_ASSERT(!heap_nofl_space(heap)->evacuating); + enqueue_mutator_conservative_roots(heap); + enqueue_global_conservative_roots(heap); } static void @@ -622,56 +446,6 @@ wait_for_mutators_to_stop(struct gc_heap *heap) { pthread_cond_wait(&heap->collector_cond, &heap->lock); } -static void -trace_mutator_conservative_roots_after_stop(struct gc_heap *heap) { - int active_mutators_already_marked = heap_should_mark_while_stopping(heap); - if (!active_mutators_already_marked) - for (struct gc_mutator *mut = atomic_load(&heap->mutator_trace_list); - mut; - mut = mut->next) - trace_mutator_conservative_roots_with_lock(mut); - - for (struct gc_mutator *mut = heap->inactive_mutators; - mut; - mut = mut->next) - trace_mutator_conservative_roots_with_lock(mut); -} - -static void -trace_mutator_roots_after_stop(struct gc_heap *heap) { - struct gc_mutator *mut = atomic_load(&heap->mutator_trace_list); - int active_mutators_already_marked = heap_should_mark_while_stopping(heap); - while (mut) { - // Also collect any already-marked grey objects and put them on the - // global trace queue. - if (active_mutators_already_marked) - gc_tracer_enqueue_roots(&heap->tracer, mut->mark_buf.objects, - mut->mark_buf.size); - else - trace_mutator_roots_with_lock(mut); - // Also unlink mutator_trace_list chain. - struct gc_mutator *next = mut->next; - mut->next = NULL; - mut = next; - } - atomic_store(&heap->mutator_trace_list, NULL); - - for (struct gc_mutator *mut = heap->inactive_mutators; mut; mut = mut->next) - trace_mutator_roots_with_lock(mut); -} - -static void -trace_global_conservative_roots(struct gc_heap *heap) { - if (gc_has_global_conservative_roots()) - gc_platform_visit_global_conservative_roots - (mark_and_globally_enqueue_heap_conservative_roots, heap, NULL); -} - -static void -enqueue_generational_root(struct gc_ref ref, struct gc_heap *heap) { - gc_tracer_enqueue_root(&heap->tracer, ref); -} - void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size, struct gc_edge edge, struct gc_ref new_val) { @@ -679,22 +453,6 @@ gc_write_barrier_extern(struct gc_ref obj, size_t obj_size, gc_object_set_remembered(obj); } -static void -trace_generational_roots(struct gc_heap *heap) { - // TODO: Add lospace nursery. - if (atomic_load(&heap->gc_kind) == GC_COLLECTION_MINOR) { - nofl_space_trace_remembered_set(heap_nofl_space(heap), - enqueue_generational_root, - heap); - large_object_space_trace_remembered_set(heap_large_object_space(heap), - enqueue_generational_root, - heap); - } else { - nofl_space_clear_remembered_set(heap_nofl_space(heap)); - large_object_space_clear_remembered_set(heap_large_object_space(heap)); - } -} - static enum gc_collection_kind pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) GC_NEVER_INLINE; @@ -733,11 +491,6 @@ pause_mutator_for_collection_with_lock(struct gc_mutator *mut) { MUTATOR_EVENT(mut, mutator_stopping); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); gc_stack_capture_hot(&mut->stack); - if (mutator_should_mark_while_stopping(mut)) - // No need to collect results in mark buf; we can enqueue roots directly. - trace_mutator_roots_with_lock(mut); - else - enqueue_mutator_for_tracing(mut); return pause_mutator_for_collection(heap, mut); } @@ -749,13 +502,9 @@ pause_mutator_for_collection_without_lock(struct gc_mutator *mut) { MUTATOR_EVENT(mut, mutator_stopping); nofl_finish_sweeping(&mut->allocator, heap_nofl_space(heap)); gc_stack_capture_hot(&mut->stack); - if (mutator_should_mark_while_stopping(mut)) - trace_stopping_mutator_roots(mut); - enqueue_mutator_for_tracing(mut); heap_lock(heap); pause_mutator_for_collection(heap, mut); heap_unlock(heap); - release_stopping_mutator_roots(mut); } static inline void @@ -838,7 +587,6 @@ determine_collection_kind(struct gc_heap *heap, struct nofl_space *nofl_space = heap_nofl_space(heap); enum gc_collection_kind previous_gc_kind = atomic_load(&heap->gc_kind); enum gc_collection_kind gc_kind; - int mark_while_stopping = 1; double yield = heap_last_gc_yield(heap); double fragmentation = heap_fragmentation(heap); ssize_t pending = atomic_load_explicit(&nofl_space->pending_unavailable_bytes, @@ -856,17 +604,6 @@ determine_collection_kind(struct gc_heap *heap, // free blocks, and we decided not to expand the heap. Let's do an // evacuating major collection to maximize the free block yield. gc_kind = GC_COLLECTION_COMPACTING; - - // Generally speaking, we allow mutators to mark their own stacks - // before pausing. This is a limited form of concurrent marking, as - // other mutators might be running, not having received the signal - // to stop yet. In a compacting collection, this results in pinned - // roots, because we haven't started evacuating yet and instead mark - // in place. However as in this case we are trying to reclaim free - // blocks, try to avoid any pinning caused by the ragged-stop - // marking. Of course if the mutator has conservative roots we will - // have pinning anyway and might as well allow ragged stops. - mark_while_stopping = gc_has_conservative_roots(); } else if (previous_gc_kind == GC_COLLECTION_COMPACTING && fragmentation >= heap->fragmentation_low_threshold) { DEBUG("continuing evacuation due to fragmentation %.2f%% > %.2f%%\n", @@ -912,7 +649,6 @@ determine_collection_kind(struct gc_heap *heap, gc_kind == GC_COLLECTION_COMPACTING) { DEBUG("welp. conservative heap scanning, no evacuation for you\n"); gc_kind = GC_COLLECTION_MAJOR; - mark_while_stopping = 1; } // If this is the first in a series of minor collections, reset the @@ -927,34 +663,49 @@ determine_collection_kind(struct gc_heap *heap, yield * 100., clamped * 100.); } - atomic_store_explicit(&heap->mark_while_stopping, mark_while_stopping, - memory_order_release); - atomic_store(&heap->gc_kind, gc_kind); return gc_kind; } static void -trace_conservative_roots_after_stop(struct gc_heap *heap) { - GC_ASSERT(!heap_nofl_space(heap)->evacuating); - if (gc_has_mutator_conservative_roots()) - trace_mutator_conservative_roots_after_stop(heap); - if (gc_has_global_conservative_roots()) - trace_global_conservative_roots(heap); +enqueue_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) { + gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); } static void -trace_pinned_roots_after_stop(struct gc_heap *heap) { - GC_ASSERT(!heap_nofl_space(heap)->evacuating); - trace_conservative_roots_after_stop(heap); +enqueue_remembered_object(struct gc_ref ref, struct gc_heap *heap) { + gc_tracer_add_root(&heap->tracer, gc_root_remembered_object(ref)); } static void -trace_roots_after_stop(struct gc_heap *heap) { - trace_mutator_roots_after_stop(heap); - gc_trace_heap_roots(heap->roots, trace_and_enqueue_globally, heap, NULL); - gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL); - trace_generational_roots(heap); +enqueue_generational_roots(struct gc_heap *heap, + enum gc_collection_kind gc_kind) { + // TODO: Add lospace nursery. + if (gc_kind == GC_COLLECTION_MINOR) { + for (size_t i = 0; i < heap_nofl_space(heap)->nslabs; i++) + gc_tracer_add_root(&heap->tracer, gc_root_remembered_slab(i)); + large_object_space_trace_remembered_set(heap_large_object_space(heap), + enqueue_remembered_object, + heap); + } else { + nofl_space_clear_remembered_set(heap_nofl_space(heap)); + large_object_space_clear_remembered_set(heap_large_object_space(heap)); + } +} + +static void +enqueue_relocatable_roots(struct gc_heap *heap, + enum gc_collection_kind gc_kind) { + for (struct gc_mutator *mut = heap->mutators; + mut; + mut = mut->next) { + if (mut->roots) + gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut)); + } + if (heap->roots) + gc_tracer_add_root(&heap->tracer, gc_root_heap(heap)); + gc_visit_finalizer_roots(heap->finalizer_state, enqueue_root_edge, heap, NULL); + enqueue_generational_roots(heap, gc_kind); } static void @@ -970,15 +721,6 @@ resolve_ephemerons_eagerly(struct gc_heap *heap) { gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); } -static int -enqueue_resolved_ephemerons(struct gc_heap *heap) { - struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); - if (!resolved) - return 0; - gc_trace_resolved_ephemerons(resolved, trace_and_enqueue_globally, heap, NULL); - return 1; -} - static void trace_resolved_ephemerons(struct gc_heap *heap) { for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); @@ -995,7 +737,7 @@ resolve_finalizers(struct gc_heap *heap) { priority < gc_finalizer_priority_count(); priority++) { if (gc_resolve_finalizers(heap->finalizer_state, priority, - visit_root_edge, heap, NULL)) { + enqueue_root_edge, heap, NULL)) { gc_tracer_trace(&heap->tracer); trace_resolved_ephemerons(heap); } @@ -1008,6 +750,8 @@ sweep_ephemerons(struct gc_heap *heap) { return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); } +static void collect(struct gc_mutator *mut, + enum gc_collection_kind requested_kind) GC_NEVER_INLINE; static void collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { struct gc_heap *heap = mutator_heap(mut); @@ -1020,6 +764,13 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { } MUTATOR_EVENT(mut, mutator_cause_gc); DEBUG("start collect #%ld:\n", heap->count); + HEAP_EVENT(heap, requesting_stop); + request_mutators_to_stop(heap); + gc_stack_capture_hot(&mut->stack); + nofl_finish_sweeping(&mut->allocator, nofl_space); + HEAP_EVENT(heap, waiting_for_stop); + wait_for_mutators_to_stop(heap); + HEAP_EVENT(heap, mutators_stopped); enum gc_collection_kind gc_kind = determine_collection_kind(heap, requested_kind); int is_minor = gc_kind == GC_COLLECTION_MINOR; @@ -1029,22 +780,17 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { gc_extern_space_start_gc(exspace, is_minor); resolve_ephemerons_lazily(heap); gc_tracer_prepare(&heap->tracer); - HEAP_EVENT(heap, requesting_stop); - request_mutators_to_stop(heap); - trace_mutator_roots_with_lock_before_stop(mut); - nofl_finish_sweeping(&mut->allocator, nofl_space); - HEAP_EVENT(heap, waiting_for_stop); - wait_for_mutators_to_stop(heap); - HEAP_EVENT(heap, mutators_stopped); double yield = heap_last_gc_yield(heap); double fragmentation = heap_fragmentation(heap); HEAP_EVENT(heap, live_data_size, heap->size * (1 - yield)); DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation); detect_out_of_memory(heap); - trace_pinned_roots_after_stop(heap); - nofl_space_start_gc(nofl_space, gc_kind); - trace_roots_after_stop(heap); + enqueue_pinned_roots(heap); + if (gc_kind == GC_COLLECTION_COMPACTING) + gc_tracer_trace_roots(&heap->tracer); HEAP_EVENT(heap, roots_traced); + enqueue_relocatable_roots(heap, gc_kind); + nofl_space_start_gc(nofl_space, gc_kind); gc_tracer_trace(&heap->tracer); HEAP_EVENT(heap, heap_traced); resolve_ephemerons_eagerly(heap); @@ -1334,7 +1080,6 @@ gc_init_for_thread(struct gc_stack_addr *stack_base, void gc_finish_for_thread(struct gc_mutator *mut) { remove_mutator(mutator_heap(mut), mut); - mutator_mark_buf_destroy(&mut->mark_buf); free(mut); } @@ -1343,8 +1088,6 @@ deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { GC_ASSERT(mut->next == NULL); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); heap_lock(heap); - mut->next = heap->inactive_mutators; - heap->inactive_mutators = mut; heap->inactive_mutator_count++; gc_stack_capture_hot(&mut->stack); if (all_mutators_stopped(heap)) @@ -1357,18 +1100,12 @@ reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { heap_lock(heap); while (mutators_are_stopping(heap)) pthread_cond_wait(&heap->mutator_cond, &heap->lock); - struct gc_mutator **prev = &heap->inactive_mutators; - while (*prev != mut) - prev = &(*prev)->next; - *prev = mut->next; - mut->next = NULL; heap->inactive_mutator_count--; heap_unlock(heap); } -void* gc_call_without_gc(struct gc_mutator *mut, - void* (*f)(void*), - void *data) { +void* +gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*), void *data) { struct gc_heap *heap = mutator_heap(mut); deactivate_mutator(heap, mut); void *ret = f(data); diff --git a/src/nofl-space.h b/src/nofl-space.h index 037dff0d2..5a217cdb0 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -808,14 +808,19 @@ nofl_space_set_ephemeron_flag(struct gc_ref ref) { } } +struct gc_trace_worker; + // Note that it's quite possible (and even likely) that any given remset // byte doesn't hold any roots, if all stores were to nursery objects. STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0); static void nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab, size_t card, - void (*enqueue)(struct gc_ref, struct gc_heap*), - struct gc_heap *heap) { + void (*trace_object)(struct gc_ref, + struct gc_heap*, + struct gc_trace_worker*), + struct gc_heap *heap, + struct gc_trace_worker *worker) { uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0]; size_t granule_base = card * NOFL_GRANULES_PER_REMSET_BYTE; for (size_t granule_in_remset = 0; @@ -829,32 +834,33 @@ nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab, size_t granule = granule_base + granule_offset; uintptr_t addr = first_addr_in_slab + granule * NOFL_GRANULE_SIZE; GC_ASSERT(nofl_metadata_byte_for_addr(addr) == &slab->metadata[granule]); - enqueue(gc_ref(addr), heap); + trace_object(gc_ref(addr), heap, worker); } } } static void -nofl_space_trace_remembered_set(struct nofl_space *space, - void (*enqueue)(struct gc_ref, - struct gc_heap*), - struct gc_heap *heap) { - GC_ASSERT(!space->evacuating); - for (size_t s = 0; s < space->nslabs; s++) { - struct nofl_slab *slab = space->slabs[s]; - uint8_t *remset = slab->remembered_set; - for (size_t card_base = 0; - card_base < NOFL_REMSET_BYTES_PER_SLAB; - card_base += 8) { - uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base); - if (!remset_bytes) continue; - memset(remset + card_base, 0, 8); - while (remset_bytes) { - size_t card_offset = count_zero_bytes(remset_bytes); - remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8)); - nofl_space_trace_card(space, slab, card_base + card_offset, - enqueue, heap); - } +nofl_space_trace_remembered_slab(struct nofl_space *space, + size_t slab_idx, + void (*trace_object)(struct gc_ref, + struct gc_heap*, + struct gc_trace_worker*), + struct gc_heap *heap, + struct gc_trace_worker *worker) { + GC_ASSERT(slab_idx < space->nslabs); + struct nofl_slab *slab = space->slabs[slab_idx]; + uint8_t *remset = slab->remembered_set; + for (size_t card_base = 0; + card_base < NOFL_REMSET_BYTES_PER_SLAB; + card_base += 8) { + uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base); + if (!remset_bytes) continue; + memset(remset + card_base, 0, 8); + while (remset_bytes) { + size_t card_offset = count_zero_bytes(remset_bytes); + remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8)); + nofl_space_trace_card(space, slab, card_base + card_offset, + trace_object, heap, worker); } } } diff --git a/src/parallel-tracer.h b/src/parallel-tracer.h index 888c0fad6..20d66730f 100644 --- a/src/parallel-tracer.h +++ b/src/parallel-tracer.h @@ -57,6 +57,7 @@ struct gc_tracer { long epoch; pthread_mutex_t lock; pthread_cond_t cond; + int trace_roots_only; struct root_worklist roots; struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT]; }; @@ -112,6 +113,7 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, tracer->heap = heap; atomic_init(&tracer->active_tracers, 0); tracer->epoch = 0; + tracer->trace_roots_only = 0; pthread_mutex_init(&tracer->lock, NULL); pthread_cond_init(&tracer->cond, NULL); root_worklist_init(&tracer->roots); @@ -299,32 +301,52 @@ trace_with_data(struct gc_tracer *tracer, atomic_fetch_add_explicit(&tracer->active_tracers, 1, memory_order_acq_rel); worker->data = data; - size_t n = 0; DEBUG("tracer #%zu: running trace loop\n", worker->id); - do { - struct gc_root root = root_worklist_pop(&tracer->roots); - if (root.kind == GC_ROOT_KIND_NONE) - break; - trace_root(root, heap, worker); - } while (1); - - do { - while (1) { - struct gc_ref ref; - if (!local_worklist_empty(&worker->local)) { - ref = local_worklist_pop(&worker->local); - } else { - ref = trace_worker_steal(worker); - if (!gc_ref_is_heap_object(ref)) - break; - } - trace_one(ref, heap, worker); + { + DEBUG("tracer #%zu: tracing roots\n", worker->id); + size_t n = 0; + do { + struct gc_root root = root_worklist_pop(&tracer->roots); + if (root.kind == GC_ROOT_KIND_NONE) + break; + trace_root(root, heap, worker); n++; - } - } while (trace_worker_should_continue(worker)); + } while (1); - DEBUG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n); + DEBUG("tracer #%zu: done tracing roots, %zu roots traced\n", worker->id, n); + } + + if (tracer->trace_roots_only) { + // Unlike the full trace where work is generated during the trace, a + // roots-only trace consumes work monotonically; any object enqueued as a + // result of marking roots isn't ours to deal with. However we do need to + // synchronize with remote workers to ensure they have completed their + // work items. + if (worker->id == 0) { + for (size_t i = 1; i < tracer->worker_count; i++) + pthread_mutex_lock(&tracer->workers[i].lock); + } + } else { + DEBUG("tracer #%zu: tracing objects\n", worker->id); + size_t n = 0; + do { + while (1) { + struct gc_ref ref; + if (!local_worklist_empty(&worker->local)) { + ref = local_worklist_pop(&worker->local); + } else { + ref = trace_worker_steal(worker); + if (!gc_ref_is_heap_object(ref)) + break; + } + trace_one(ref, heap, worker); + n++; + } + } while (trace_worker_should_continue(worker)); + + DEBUG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n); + } worker->data = NULL; atomic_fetch_sub_explicit(&tracer->active_tracers, 1, memory_order_acq_rel); @@ -336,17 +358,28 @@ trace_worker_trace(struct gc_trace_worker *worker) { worker->heap, worker); } -static inline void -gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref ref) { - struct shared_worklist *worker0_deque = &tracer->workers[0].shared; - shared_worklist_push(worker0_deque, ref); -} +static inline int +gc_tracer_should_parallelize(struct gc_tracer *tracer) { + if (root_worklist_size(&tracer->roots) > 1) + return 1; -static inline void -gc_tracer_enqueue_roots(struct gc_tracer *tracer, struct gc_ref *objv, - size_t count) { - struct shared_worklist *worker0_deque = &tracer->workers[0].shared; - shared_worklist_push_many(worker0_deque, objv, count); + if (tracer->trace_roots_only) + return 0; + + size_t nonempty_worklists = 0; + ssize_t parallel_threshold = + LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT; + for (size_t i = 0; i < tracer->worker_count; i++) { + ssize_t size = shared_worklist_size(&tracer->workers[i].shared); + if (!size) + continue; + nonempty_worklists++; + if (nonempty_worklists > 1) + return 1; + if (size >= parallel_threshold) + return 1; + } + return 0; } static inline void @@ -356,10 +389,7 @@ gc_tracer_trace(struct gc_tracer *tracer) { for (int i = 1; i < tracer->worker_count; i++) pthread_mutex_unlock(&tracer->workers[i].lock); - ssize_t parallel_threshold = - LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT; - if (root_worklist_size(&tracer->roots) > 1 || - shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) { + if (gc_tracer_should_parallelize(tracer)) { DEBUG("waking workers\n"); tracer_unpark_all_workers(tracer); } else { @@ -372,4 +402,16 @@ gc_tracer_trace(struct gc_tracer *tracer) { DEBUG("trace finished\n"); } +static inline void +gc_tracer_trace_roots(struct gc_tracer *tracer) { + DEBUG("starting roots-only trace\n"); + + tracer->trace_roots_only = 1; + gc_tracer_trace(tracer); + tracer->trace_roots_only = 0; + + GC_ASSERT_EQ(atomic_load(&tracer->active_tracers), 0); + DEBUG("roots-only trace finished\n"); +} + #endif // PARALLEL_TRACER_H diff --git a/src/root.h b/src/root.h index 5228dcb4f..46e019b06 100644 --- a/src/root.h +++ b/src/root.h @@ -2,6 +2,7 @@ #define ROOT_H #include "gc-edge.h" +#include "extents.h" struct gc_ephemeron; struct gc_heap; @@ -11,8 +12,12 @@ enum gc_root_kind { GC_ROOT_KIND_NONE, GC_ROOT_KIND_HEAP, GC_ROOT_KIND_MUTATOR, + GC_ROOT_KIND_CONSERVATIVE_EDGES, + GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES, GC_ROOT_KIND_RESOLVED_EPHEMERONS, GC_ROOT_KIND_EDGE, + GC_ROOT_KIND_REMEMBERED_OBJECT, + GC_ROOT_KIND_REMEMBERED_SLAB, }; struct gc_root { @@ -21,22 +26,38 @@ struct gc_root { struct gc_heap *heap; struct gc_mutator *mutator; struct gc_ephemeron *resolved_ephemerons; + struct extent_range range; struct gc_edge edge; + struct gc_ref ref; + size_t idx; }; }; -static inline struct gc_root gc_root_heap(struct gc_heap* heap) { +static inline struct gc_root +gc_root_heap(struct gc_heap* heap) { struct gc_root ret = { GC_ROOT_KIND_HEAP }; ret.heap = heap; return ret; } -static inline struct gc_root gc_root_mutator(struct gc_mutator* mutator) { +static inline struct gc_root +gc_root_mutator(struct gc_mutator* mutator) { struct gc_root ret = { GC_ROOT_KIND_MUTATOR }; ret.mutator = mutator; return ret; } +static inline struct gc_root +gc_root_conservative_edges(uintptr_t lo_addr, uintptr_t hi_addr, + int possibly_interior) { + enum gc_root_kind kind = possibly_interior + ? GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES + : GC_ROOT_KIND_CONSERVATIVE_EDGES; + struct gc_root ret = { kind }; + ret.range = (struct extent_range) {lo_addr, hi_addr}; + return ret; +} + static inline struct gc_root gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) { struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS }; @@ -51,4 +72,18 @@ gc_root_edge(struct gc_edge edge) { return ret; } +static inline struct gc_root +gc_root_remembered_object(struct gc_ref ref) { + struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_OBJECT }; + ret.ref = ref; + return ret; +} + +static inline struct gc_root +gc_root_remembered_slab(size_t idx) { + struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_SLAB }; + ret.idx = idx; + return ret; +} + #endif // ROOT_H diff --git a/src/serial-tracer.h b/src/serial-tracer.h index 96ab7e563..b9575fddb 100644 --- a/src/serial-tracer.h +++ b/src/serial-tracer.h @@ -12,6 +12,7 @@ struct gc_tracer { struct gc_heap *heap; + int trace_roots_only; struct root_worklist roots; struct simple_worklist worklist; }; @@ -30,6 +31,7 @@ static int gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, size_t parallelism) { tracer->heap = heap; + tracer->trace_roots_only = 0; root_worklist_init(&tracer->roots); return simple_worklist_init(&tracer->worklist); } @@ -43,19 +45,11 @@ gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) { root_worklist_push(&tracer->roots, root); } -static inline void -gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref obj) { - simple_worklist_push(&tracer->worklist, obj); -} -static inline void -gc_tracer_enqueue_roots(struct gc_tracer *tracer, struct gc_ref *objs, - size_t count) { - simple_worklist_push_many(&tracer->worklist, objs, count); -} static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) { - gc_tracer_enqueue_root(worker->tracer, ref); + simple_worklist_push(&worker->tracer->worklist, ref); } + static inline void tracer_trace_with_data(struct gc_tracer *tracer, struct gc_heap *heap, struct gc_trace_worker *worker, @@ -68,12 +62,14 @@ tracer_trace_with_data(struct gc_tracer *tracer, struct gc_heap *heap, trace_root(root, heap, worker); } while (1); root_worklist_reset(&tracer->roots); - do { - struct gc_ref obj = simple_worklist_pop(&tracer->worklist); - if (!gc_ref_is_heap_object(obj)) - break; - trace_one(obj, heap, worker); - } while (1); + if (!tracer->trace_roots_only) { + do { + struct gc_ref obj = simple_worklist_pop(&tracer->worklist); + if (!gc_ref_is_heap_object(obj)) + break; + trace_one(obj, heap, worker); + } while (1); + } } static inline void gc_tracer_trace(struct gc_tracer *tracer) { @@ -82,4 +78,11 @@ gc_tracer_trace(struct gc_tracer *tracer) { &worker); } +static inline void +gc_tracer_trace_roots(struct gc_tracer *tracer) { + tracer->trace_roots_only = 1; + gc_tracer_trace(tracer); + tracer->trace_roots_only = 0; +} + #endif // SERIAL_TRACER_H diff --git a/src/tracer.h b/src/tracer.h index ec6a140b1..c563a7018 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -47,11 +47,8 @@ static void gc_tracer_prepare(struct gc_tracer *tracer); static void gc_tracer_release(struct gc_tracer *tracer); // Add root objects to the trace. Call before tracer_trace. -static inline void gc_tracer_enqueue_root(struct gc_tracer *tracer, - struct gc_ref obj); -static inline void gc_tracer_enqueue_roots(struct gc_tracer *tracer, - struct gc_ref *objs, - size_t count); +static inline void gc_tracer_add_root(struct gc_tracer *tracer, + struct gc_root root); // Given that an object has been shaded grey, enqueue for tracing. static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, @@ -59,10 +56,10 @@ static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, static inline struct gc_trace_worker_data* gc_trace_worker_data(struct gc_trace_worker *worker) GC_ALWAYS_INLINE; -static inline void gc_tracer_add_root(struct gc_tracer *tracer, - struct gc_root root); +// Just trace roots. +static inline void gc_tracer_trace_roots(struct gc_tracer *tracer); -// Run the full trace. +// Run the full trace, including roots. static inline void gc_tracer_trace(struct gc_tracer *tracer); #endif // TRACER_H