From 5084730471a15b6776c548f9e6779e61b9638705 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 10 Jul 2024 15:46:11 +0200 Subject: [PATCH] Add parallel root-tracing phase --- benchmarks/simple-gc-embedder.h | 8 ++-- src/gc-ephemeron-internal.h | 16 ++++--- src/gc-ephemeron.c | 22 +++++----- src/parallel-tracer.h | 51 +++++++++++++++++----- src/root-worklist.h | 76 +++++++++++++++++++++++++++++++++ src/root.h | 43 +++++++++++++++++++ src/semi.c | 7 ++- src/serial-tracer.h | 20 +++++++++ src/tracer.h | 8 ++++ src/whippet.c | 14 +++++- 10 files changed, 231 insertions(+), 34 deletions(-) create mode 100644 src/root-worklist.h create mode 100644 src/root.h diff --git a/benchmarks/simple-gc-embedder.h b/benchmarks/simple-gc-embedder.h index 5f77a7cc9..23fc54a5d 100644 --- a/benchmarks/simple-gc-embedder.h +++ b/benchmarks/simple-gc-embedder.h @@ -172,11 +172,11 @@ static inline size_t gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) { GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED); switch (tag_live_alloc_kind(fwd->data)) { -#define SCAN_OBJECT(name, Name, NAME) \ +#define OBJECT_SIZE(name, Name, NAME) \ case ALLOC_KIND_##NAME: \ - return name##_size(gc_ref_heap_object(ref)); \ - FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT) -#undef SCAN_OBJECT + return name##_size(gc_ref_heap_object(fwd->ref)); + FOR_EACH_HEAP_OBJECT_KIND(OBJECT_SIZE) +#undef OBJECT_SIZE default: GC_CRASH(); } diff --git a/src/gc-ephemeron-internal.h b/src/gc-ephemeron-internal.h index 8894bbd8f..3d34cf188 100644 --- a/src/gc-ephemeron-internal.h +++ b/src/gc-ephemeron-internal.h @@ -32,12 +32,16 @@ gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state, struct gc_heap *heap, size_t shard, size_t nshards); -GC_INTERNAL int -gc_pop_resolved_ephemerons(struct gc_heap *heap, - void (*visit)(struct gc_edge edge, - struct gc_heap *heap, - void *visit_data), - void *trace_data); +GC_INTERNAL struct gc_ephemeron* +gc_pop_resolved_ephemerons(struct gc_heap *heap); + +GC_INTERNAL void +gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data); GC_INTERNAL void gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state, diff --git a/src/gc-ephemeron.c b/src/gc-ephemeron.c index 8a42c1a84..90f82e8ca 100644 --- a/src/gc-ephemeron.c +++ b/src/gc-ephemeron.c @@ -521,23 +521,25 @@ gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state, } } -int -gc_pop_resolved_ephemerons(struct gc_heap *heap, - void (*visit)(struct gc_edge edge, - struct gc_heap *heap, - void *visit_data), - void *trace_data) { +struct gc_ephemeron* +gc_pop_resolved_ephemerons(struct gc_heap *heap) { struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap); - struct gc_ephemeron *resolved = atomic_exchange(&state->resolved, NULL); - if (!resolved) - return 0; + return atomic_exchange(&state->resolved, NULL); +} + +void +gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data) { for (; resolved; resolved = resolved->resolved) { visit(gc_ephemeron_value_edge(resolved), heap, trace_data); // RESOLVED -> TRACED. atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED, memory_order_release); } - return 1; } void diff --git a/src/parallel-tracer.h b/src/parallel-tracer.h index 0e93ca0dd..dea87cc9f 100644 --- a/src/parallel-tracer.h +++ b/src/parallel-tracer.h @@ -10,10 +10,17 @@ #include "debug.h" #include "gc-inline.h" #include "local-worklist.h" +#include "root-worklist.h" #include "shared-worklist.h" #include "spin.h" #include "tracer.h" +#ifdef VERBOSE_LOGGING +#define LOG(...) fprintf (stderr, "LOG: " __VA_ARGS__) +#else +#define LOG(...) do { } while (0) +#endif + enum trace_worker_state { TRACE_WORKER_STOPPED, TRACE_WORKER_IDLE, @@ -36,6 +43,11 @@ struct gc_trace_worker { struct gc_trace_worker_data *data; }; +static inline struct gc_trace_worker_data* +gc_trace_worker_data(struct gc_trace_worker *worker) { + return worker->data; +} + #define TRACE_WORKERS_MAX_COUNT 8 struct gc_tracer { @@ -45,6 +57,7 @@ struct gc_tracer { long epoch; pthread_mutex_t lock; pthread_cond_t cond; + struct root_worklist roots; struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT]; }; @@ -101,6 +114,7 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, tracer->epoch = 0; pthread_mutex_init(&tracer->lock, NULL); pthread_cond_init(&tracer->cond, NULL); + root_worklist_init(&tracer->roots); size_t desired_worker_count = parallelism; ASSERT(desired_worker_count); if (desired_worker_count > TRACE_WORKERS_MAX_COUNT) @@ -121,13 +135,18 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, static void gc_tracer_prepare(struct gc_tracer *tracer) { for (size_t i = 0; i < tracer->worker_count; i++) - tracer->workers[i].steal_id = 0; + tracer->workers[i].steal_id = (i + 1) % tracer->worker_count; } static void gc_tracer_release(struct gc_tracer *tracer) { for (size_t i = 0; i < tracer->worker_count; i++) shared_worklist_release(&tracer->workers[i].shared); } +static inline void +gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) { + root_worklist_push(&tracer->roots, root); +} + static inline void tracer_unpark_all_workers(struct gc_tracer *tracer) { long old_epoch = @@ -161,6 +180,7 @@ tracer_share(struct gc_trace_worker *worker) { static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) { + ASSERT(gc_ref_is_heap_object(ref)); if (local_worklist_full(&worker->local)) tracer_share(worker); local_worklist_push(&worker->local, ref); @@ -182,33 +202,33 @@ static struct gc_ref trace_worker_steal_from_any(struct gc_trace_worker *worker, struct gc_tracer *tracer) { for (size_t i = 0; i < tracer->worker_count; i++) { - DEBUG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id); + LOG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id); struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id); if (gc_ref_is_heap_object(obj)) { - DEBUG("tracer #%zu: stealing got %p\n", worker->id, + LOG("tracer #%zu: stealing got %p\n", worker->id, gc_ref_heap_object(obj)); return obj; } worker->steal_id = (worker->steal_id + 1) % tracer->worker_count; } - DEBUG("tracer #%zu: failed to steal\n", worker->id); + LOG("tracer #%zu: failed to steal\n", worker->id); return gc_ref_null(); } static int trace_worker_can_steal_from_any(struct gc_trace_worker *worker, struct gc_tracer *tracer) { - DEBUG("tracer #%zu: checking if any worker has tasks\n", worker->id); + LOG("tracer #%zu: checking if any worker has tasks\n", worker->id); for (size_t i = 0; i < tracer->worker_count; i++) { int res = tracer_can_steal_from_worker(tracer, worker->steal_id); if (res) { - DEBUG("tracer #%zu: worker #%zu has tasks!\n", worker->id, + LOG("tracer #%zu: worker #%zu has tasks!\n", worker->id, worker->steal_id); return 1; } worker->steal_id = (worker->steal_id + 1) % tracer->worker_count; } - DEBUG("tracer #%zu: nothing to steal\n", worker->id); + LOG("tracer #%zu: nothing to steal\n", worker->id); return 0; } @@ -241,7 +261,7 @@ trace_worker_should_continue(struct gc_trace_worker *worker) { return !done; } // spin - DEBUG("checking for termination: spinning #%zu\n", spin_count); + LOG("checking for termination: spinning #%zu\n", spin_count); yield_for_spin(spin_count); } } @@ -254,13 +274,13 @@ trace_worker_steal(struct gc_trace_worker *worker) { // overflowed. In that case avoid contention by trying to pop // something from the worker's own queue. { - DEBUG("tracer #%zu: trying to pop worker's own deque\n", worker->id); + LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id); struct gc_ref obj = shared_worklist_try_pop(&worker->shared); if (gc_ref_is_heap_object(obj)) return obj; } - DEBUG("tracer #%zu: trying to steal\n", worker->id); + LOG("tracer #%zu: trying to steal\n", worker->id); struct gc_ref obj = trace_worker_steal_from_any(worker, tracer); if (gc_ref_is_heap_object(obj)) return obj; @@ -279,6 +299,13 @@ trace_with_data(struct gc_tracer *tracer, size_t n = 0; DEBUG("tracer #%zu: running trace loop\n", worker->id); + do { + struct gc_root root = root_worklist_pop(&tracer->roots); + if (root.kind == GC_ROOT_KIND_NONE) + break; + trace_root(root, heap, worker); + } while (1); + do { while (1) { struct gc_ref ref; @@ -325,7 +352,8 @@ gc_tracer_trace(struct gc_tracer *tracer) { ssize_t parallel_threshold = LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT; - if (shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) { + if (root_worklist_size(&tracer->roots) > 1 || + shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) { DEBUG("waking workers\n"); tracer_unpark_all_workers(tracer); } else { @@ -333,6 +361,7 @@ gc_tracer_trace(struct gc_tracer *tracer) { } trace_worker_trace(&tracer->workers[0]); + root_worklist_reset(&tracer->roots); DEBUG("trace finished\n"); } diff --git a/src/root-worklist.h b/src/root-worklist.h new file mode 100644 index 000000000..45ede8595 --- /dev/null +++ b/src/root-worklist.h @@ -0,0 +1,76 @@ +#ifndef ROOT_WORKLIST_H +#define ROOT_WORKLIST_H + +#include +#include +#include + +#include "assert.h" +#include "debug.h" +#include "gc-inline.h" +#include "gc-ref.h" +#include "root.h" + +// A single-producer, multiple-consumer worklist that has two phases: +// one in which roots are added by the producer, then one in which roots +// are consumed from the worklist. Roots are never added once the +// consumer phase starts. +struct root_worklist { + size_t size; + size_t read; + size_t write; + struct gc_root *buf; +}; + +void +root_worklist_alloc(struct root_worklist *q) { + q->buf = realloc(q->buf, q->size * sizeof(struct gc_root)); + if (!q->buf) { + perror("Failed to grow root worklist"); + GC_CRASH(); + } +} + +static void +root_worklist_init(struct root_worklist *q) { + q->size = 16; + q->read = 0; + q->write = 0; + q->buf = NULL; + root_worklist_alloc(q); +} + +static inline void +root_worklist_push(struct root_worklist *q, struct gc_root root) { + if (UNLIKELY(q->write == q->size)) { + q->size *= 2; + root_worklist_alloc(q); + } + q->buf[q->write++] = root; +} + +// Not atomic. +static inline size_t +root_worklist_size(struct root_worklist *q) { + return q->write - q->read; +} + +static inline struct gc_root +root_worklist_pop(struct root_worklist *q) { + size_t idx = atomic_fetch_add(&q->read, 1); + if (idx < q->write) + return q->buf[idx]; + return (struct gc_root){ GC_ROOT_KIND_NONE, }; +} + +static void +root_worklist_reset(struct root_worklist *q) { + q->read = q->write = 0; +} + +static void +root_worklist_destroy(struct root_worklist *q) { + free(q->buf); +} + +#endif // ROOT_WORKLIST_H diff --git a/src/root.h b/src/root.h new file mode 100644 index 000000000..a6a91f987 --- /dev/null +++ b/src/root.h @@ -0,0 +1,43 @@ +#ifndef ROOT_H +#define ROOT_H + +struct gc_ephemeron; +struct gc_heap; +struct gc_mutator; + +enum gc_root_kind { + GC_ROOT_KIND_NONE, + GC_ROOT_KIND_HEAP, + GC_ROOT_KIND_MUTATOR, + GC_ROOT_KIND_RESOLVED_EPHEMERONS +}; + +struct gc_root { + enum gc_root_kind kind; + union { + struct gc_heap *heap; + struct gc_mutator *mutator; + struct gc_ephemeron *resolved_ephemerons; + }; +}; + +static inline struct gc_root gc_root_heap(struct gc_heap* heap) { + struct gc_root ret = { GC_ROOT_KIND_HEAP }; + ret.heap = heap; + return ret; +} + +static inline struct gc_root gc_root_mutator(struct gc_mutator* mutator) { + struct gc_root ret = { GC_ROOT_KIND_MUTATOR }; + ret.mutator = mutator; + return ret; +} + +static inline struct gc_root +gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) { + struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS }; + ret.resolved_ephemerons = resolved; + return ret; +} + +#endif // ROOT_H diff --git a/src/semi.c b/src/semi.c index fdcd03792..af9134bd7 100644 --- a/src/semi.c +++ b/src/semi.c @@ -380,9 +380,14 @@ static void collect(struct gc_mutator *mut, size_t for_alloc) { HEAP_EVENT(heap, heap_traced); gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); heap->check_pending_ephemerons = 1; - while (gc_pop_resolved_ephemerons(heap, trace, NULL)) + do { + struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); + if (!resolved) + break; + gc_trace_resolved_ephemerons(resolved, trace, heap, NULL); while(grey < semi->hp) grey = scan(heap, gc_ref(grey)); + } while (1); HEAP_EVENT(heap, ephemerons_traced); large_object_space_finish_gc(large, 0); gc_extern_space_finish_gc(heap->extern_space, 0); diff --git a/src/serial-tracer.h b/src/serial-tracer.h index 9e128be91..96ab7e563 100644 --- a/src/serial-tracer.h +++ b/src/serial-tracer.h @@ -7,10 +7,12 @@ #include "assert.h" #include "debug.h" #include "simple-worklist.h" +#include "root-worklist.h" #include "tracer.h" struct gc_tracer { struct gc_heap *heap; + struct root_worklist roots; struct simple_worklist worklist; }; @@ -19,10 +21,16 @@ struct gc_trace_worker { struct gc_trace_worker_data *data; }; +static inline struct gc_trace_worker_data* +gc_trace_worker_data(struct gc_trace_worker *worker) { + return worker->data; +} + static int gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, size_t parallelism) { tracer->heap = heap; + root_worklist_init(&tracer->roots); return simple_worklist_init(&tracer->worklist); } static void gc_tracer_prepare(struct gc_tracer *tracer) {} @@ -30,6 +38,11 @@ static void gc_tracer_release(struct gc_tracer *tracer) { simple_worklist_release(&tracer->worklist); } +static inline void +gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) { + root_worklist_push(&tracer->roots, root); +} + static inline void gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref obj) { simple_worklist_push(&tracer->worklist, obj); @@ -48,6 +61,13 @@ tracer_trace_with_data(struct gc_tracer *tracer, struct gc_heap *heap, struct gc_trace_worker *worker, struct gc_trace_worker_data *data) { worker->data = data; + do { + struct gc_root root = root_worklist_pop(&tracer->roots); + if (root.kind == GC_ROOT_KIND_NONE) + break; + trace_root(root, heap, worker); + } while (1); + root_worklist_reset(&tracer->roots); do { struct gc_ref obj = simple_worklist_pop(&tracer->worklist); if (!gc_ref_is_heap_object(obj)) diff --git a/src/tracer.h b/src/tracer.h index 64f6dcdc6..ec6a140b1 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -3,6 +3,7 @@ #include "gc-ref.h" #include "gc-edge.h" +#include "root.h" struct gc_heap; @@ -19,6 +20,8 @@ struct gc_trace_worker_data; // Visit all fields in an object. static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, struct gc_trace_worker *worker) GC_ALWAYS_INLINE; +static inline void trace_root(struct gc_root root, struct gc_heap *heap, + struct gc_trace_worker *worker) GC_ALWAYS_INLINE; static void gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, @@ -53,6 +56,11 @@ static inline void gc_tracer_enqueue_roots(struct gc_tracer *tracer, // Given that an object has been shaded grey, enqueue for tracing. static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) GC_ALWAYS_INLINE; +static inline struct gc_trace_worker_data* +gc_trace_worker_data(struct gc_trace_worker *worker) GC_ALWAYS_INLINE; + +static inline void gc_tracer_add_root(struct gc_tracer *tracer, + struct gc_root root); // Run the full trace. static inline void gc_tracer_trace(struct gc_tracer *tracer); diff --git a/src/whippet.c b/src/whippet.c index 9674560c2..3e17f5422 100644 --- a/src/whippet.c +++ b/src/whippet.c @@ -1228,6 +1228,13 @@ static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, gc_trace_object(ref, tracer_visit, heap, worker, NULL); } +static inline void trace_root(struct gc_root root, + struct gc_heap *heap, + struct gc_trace_worker *worker) { + // We don't use parallel root tracing yet. + GC_CRASH(); +} + static void mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low, uintptr_t high, @@ -1876,8 +1883,11 @@ static void resolve_ephemerons_eagerly(struct gc_heap *heap) { } static int enqueue_resolved_ephemerons(struct gc_heap *heap) { - return gc_pop_resolved_ephemerons(heap, trace_and_enqueue_globally, - NULL); + struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); + if (!resolved) + return 0; + gc_trace_resolved_ephemerons(resolved, trace_and_enqueue_globally, heap, NULL); + return 1; } static void sweep_ephemerons(struct gc_heap *heap) {