1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Add parallel root-tracing phase

This commit is contained in:
Andy Wingo 2024-07-10 15:46:11 +02:00
parent ff1e1b1d44
commit 5084730471
10 changed files with 231 additions and 34 deletions

View file

@ -172,11 +172,11 @@ static inline size_t
gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) { gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED); GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
switch (tag_live_alloc_kind(fwd->data)) { switch (tag_live_alloc_kind(fwd->data)) {
#define SCAN_OBJECT(name, Name, NAME) \ #define OBJECT_SIZE(name, Name, NAME) \
case ALLOC_KIND_##NAME: \ case ALLOC_KIND_##NAME: \
return name##_size(gc_ref_heap_object(ref)); \ return name##_size(gc_ref_heap_object(fwd->ref));
FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT) FOR_EACH_HEAP_OBJECT_KIND(OBJECT_SIZE)
#undef SCAN_OBJECT #undef OBJECT_SIZE
default: default:
GC_CRASH(); GC_CRASH();
} }

View file

@ -32,11 +32,15 @@ gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state,
struct gc_heap *heap, size_t shard, struct gc_heap *heap, size_t shard,
size_t nshards); size_t nshards);
GC_INTERNAL int GC_INTERNAL struct gc_ephemeron*
gc_pop_resolved_ephemerons(struct gc_heap *heap, gc_pop_resolved_ephemerons(struct gc_heap *heap);
GC_INTERNAL void
gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
void (*visit)(struct gc_edge edge, void (*visit)(struct gc_edge edge,
struct gc_heap *heap, struct gc_heap *heap,
void *visit_data), void *visit_data),
struct gc_heap *heap,
void *trace_data); void *trace_data);
GC_INTERNAL void GC_INTERNAL void

View file

@ -521,23 +521,25 @@ gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state,
} }
} }
int struct gc_ephemeron*
gc_pop_resolved_ephemerons(struct gc_heap *heap, gc_pop_resolved_ephemerons(struct gc_heap *heap) {
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
return atomic_exchange(&state->resolved, NULL);
}
void
gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
void (*visit)(struct gc_edge edge, void (*visit)(struct gc_edge edge,
struct gc_heap *heap, struct gc_heap *heap,
void *visit_data), void *visit_data),
struct gc_heap *heap,
void *trace_data) { void *trace_data) {
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
struct gc_ephemeron *resolved = atomic_exchange(&state->resolved, NULL);
if (!resolved)
return 0;
for (; resolved; resolved = resolved->resolved) { for (; resolved; resolved = resolved->resolved) {
visit(gc_ephemeron_value_edge(resolved), heap, trace_data); visit(gc_ephemeron_value_edge(resolved), heap, trace_data);
// RESOLVED -> TRACED. // RESOLVED -> TRACED.
atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED, atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED,
memory_order_release); memory_order_release);
} }
return 1;
} }
void void

View file

@ -10,10 +10,17 @@
#include "debug.h" #include "debug.h"
#include "gc-inline.h" #include "gc-inline.h"
#include "local-worklist.h" #include "local-worklist.h"
#include "root-worklist.h"
#include "shared-worklist.h" #include "shared-worklist.h"
#include "spin.h" #include "spin.h"
#include "tracer.h" #include "tracer.h"
#ifdef VERBOSE_LOGGING
#define LOG(...) fprintf (stderr, "LOG: " __VA_ARGS__)
#else
#define LOG(...) do { } while (0)
#endif
enum trace_worker_state { enum trace_worker_state {
TRACE_WORKER_STOPPED, TRACE_WORKER_STOPPED,
TRACE_WORKER_IDLE, TRACE_WORKER_IDLE,
@ -36,6 +43,11 @@ struct gc_trace_worker {
struct gc_trace_worker_data *data; struct gc_trace_worker_data *data;
}; };
static inline struct gc_trace_worker_data*
gc_trace_worker_data(struct gc_trace_worker *worker) {
return worker->data;
}
#define TRACE_WORKERS_MAX_COUNT 8 #define TRACE_WORKERS_MAX_COUNT 8
struct gc_tracer { struct gc_tracer {
@ -45,6 +57,7 @@ struct gc_tracer {
long epoch; long epoch;
pthread_mutex_t lock; pthread_mutex_t lock;
pthread_cond_t cond; pthread_cond_t cond;
struct root_worklist roots;
struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT]; struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT];
}; };
@ -101,6 +114,7 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
tracer->epoch = 0; tracer->epoch = 0;
pthread_mutex_init(&tracer->lock, NULL); pthread_mutex_init(&tracer->lock, NULL);
pthread_cond_init(&tracer->cond, NULL); pthread_cond_init(&tracer->cond, NULL);
root_worklist_init(&tracer->roots);
size_t desired_worker_count = parallelism; size_t desired_worker_count = parallelism;
ASSERT(desired_worker_count); ASSERT(desired_worker_count);
if (desired_worker_count > TRACE_WORKERS_MAX_COUNT) if (desired_worker_count > TRACE_WORKERS_MAX_COUNT)
@ -121,13 +135,18 @@ gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
static void gc_tracer_prepare(struct gc_tracer *tracer) { static void gc_tracer_prepare(struct gc_tracer *tracer) {
for (size_t i = 0; i < tracer->worker_count; i++) for (size_t i = 0; i < tracer->worker_count; i++)
tracer->workers[i].steal_id = 0; tracer->workers[i].steal_id = (i + 1) % tracer->worker_count;
} }
static void gc_tracer_release(struct gc_tracer *tracer) { static void gc_tracer_release(struct gc_tracer *tracer) {
for (size_t i = 0; i < tracer->worker_count; i++) for (size_t i = 0; i < tracer->worker_count; i++)
shared_worklist_release(&tracer->workers[i].shared); shared_worklist_release(&tracer->workers[i].shared);
} }
static inline void
gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) {
root_worklist_push(&tracer->roots, root);
}
static inline void static inline void
tracer_unpark_all_workers(struct gc_tracer *tracer) { tracer_unpark_all_workers(struct gc_tracer *tracer) {
long old_epoch = long old_epoch =
@ -161,6 +180,7 @@ tracer_share(struct gc_trace_worker *worker) {
static inline void static inline void
gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) { gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) {
ASSERT(gc_ref_is_heap_object(ref));
if (local_worklist_full(&worker->local)) if (local_worklist_full(&worker->local))
tracer_share(worker); tracer_share(worker);
local_worklist_push(&worker->local, ref); local_worklist_push(&worker->local, ref);
@ -182,33 +202,33 @@ static struct gc_ref
trace_worker_steal_from_any(struct gc_trace_worker *worker, trace_worker_steal_from_any(struct gc_trace_worker *worker,
struct gc_tracer *tracer) { struct gc_tracer *tracer) {
for (size_t i = 0; i < tracer->worker_count; i++) { for (size_t i = 0; i < tracer->worker_count; i++) {
DEBUG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id); LOG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id);
struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id); struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id);
if (gc_ref_is_heap_object(obj)) { if (gc_ref_is_heap_object(obj)) {
DEBUG("tracer #%zu: stealing got %p\n", worker->id, LOG("tracer #%zu: stealing got %p\n", worker->id,
gc_ref_heap_object(obj)); gc_ref_heap_object(obj));
return obj; return obj;
} }
worker->steal_id = (worker->steal_id + 1) % tracer->worker_count; worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
} }
DEBUG("tracer #%zu: failed to steal\n", worker->id); LOG("tracer #%zu: failed to steal\n", worker->id);
return gc_ref_null(); return gc_ref_null();
} }
static int static int
trace_worker_can_steal_from_any(struct gc_trace_worker *worker, trace_worker_can_steal_from_any(struct gc_trace_worker *worker,
struct gc_tracer *tracer) { struct gc_tracer *tracer) {
DEBUG("tracer #%zu: checking if any worker has tasks\n", worker->id); LOG("tracer #%zu: checking if any worker has tasks\n", worker->id);
for (size_t i = 0; i < tracer->worker_count; i++) { for (size_t i = 0; i < tracer->worker_count; i++) {
int res = tracer_can_steal_from_worker(tracer, worker->steal_id); int res = tracer_can_steal_from_worker(tracer, worker->steal_id);
if (res) { if (res) {
DEBUG("tracer #%zu: worker #%zu has tasks!\n", worker->id, LOG("tracer #%zu: worker #%zu has tasks!\n", worker->id,
worker->steal_id); worker->steal_id);
return 1; return 1;
} }
worker->steal_id = (worker->steal_id + 1) % tracer->worker_count; worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
} }
DEBUG("tracer #%zu: nothing to steal\n", worker->id); LOG("tracer #%zu: nothing to steal\n", worker->id);
return 0; return 0;
} }
@ -241,7 +261,7 @@ trace_worker_should_continue(struct gc_trace_worker *worker) {
return !done; return !done;
} }
// spin // spin
DEBUG("checking for termination: spinning #%zu\n", spin_count); LOG("checking for termination: spinning #%zu\n", spin_count);
yield_for_spin(spin_count); yield_for_spin(spin_count);
} }
} }
@ -254,13 +274,13 @@ trace_worker_steal(struct gc_trace_worker *worker) {
// overflowed. In that case avoid contention by trying to pop // overflowed. In that case avoid contention by trying to pop
// something from the worker's own queue. // something from the worker's own queue.
{ {
DEBUG("tracer #%zu: trying to pop worker's own deque\n", worker->id); LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id);
struct gc_ref obj = shared_worklist_try_pop(&worker->shared); struct gc_ref obj = shared_worklist_try_pop(&worker->shared);
if (gc_ref_is_heap_object(obj)) if (gc_ref_is_heap_object(obj))
return obj; return obj;
} }
DEBUG("tracer #%zu: trying to steal\n", worker->id); LOG("tracer #%zu: trying to steal\n", worker->id);
struct gc_ref obj = trace_worker_steal_from_any(worker, tracer); struct gc_ref obj = trace_worker_steal_from_any(worker, tracer);
if (gc_ref_is_heap_object(obj)) if (gc_ref_is_heap_object(obj))
return obj; return obj;
@ -279,6 +299,13 @@ trace_with_data(struct gc_tracer *tracer,
size_t n = 0; size_t n = 0;
DEBUG("tracer #%zu: running trace loop\n", worker->id); DEBUG("tracer #%zu: running trace loop\n", worker->id);
do {
struct gc_root root = root_worklist_pop(&tracer->roots);
if (root.kind == GC_ROOT_KIND_NONE)
break;
trace_root(root, heap, worker);
} while (1);
do { do {
while (1) { while (1) {
struct gc_ref ref; struct gc_ref ref;
@ -325,7 +352,8 @@ gc_tracer_trace(struct gc_tracer *tracer) {
ssize_t parallel_threshold = ssize_t parallel_threshold =
LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT; LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
if (shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) { if (root_worklist_size(&tracer->roots) > 1 ||
shared_worklist_size(&tracer->workers[0].shared) >= parallel_threshold) {
DEBUG("waking workers\n"); DEBUG("waking workers\n");
tracer_unpark_all_workers(tracer); tracer_unpark_all_workers(tracer);
} else { } else {
@ -333,6 +361,7 @@ gc_tracer_trace(struct gc_tracer *tracer) {
} }
trace_worker_trace(&tracer->workers[0]); trace_worker_trace(&tracer->workers[0]);
root_worklist_reset(&tracer->roots);
DEBUG("trace finished\n"); DEBUG("trace finished\n");
} }

76
src/root-worklist.h Normal file
View file

@ -0,0 +1,76 @@
#ifndef ROOT_WORKLIST_H
#define ROOT_WORKLIST_H
#include <stdatomic.h>
#include <sys/mman.h>
#include <unistd.h>
#include "assert.h"
#include "debug.h"
#include "gc-inline.h"
#include "gc-ref.h"
#include "root.h"
// A single-producer, multiple-consumer worklist that has two phases:
// one in which roots are added by the producer, then one in which roots
// are consumed from the worklist. Roots are never added once the
// consumer phase starts.
struct root_worklist {
size_t size;
size_t read;
size_t write;
struct gc_root *buf;
};
void
root_worklist_alloc(struct root_worklist *q) {
q->buf = realloc(q->buf, q->size * sizeof(struct gc_root));
if (!q->buf) {
perror("Failed to grow root worklist");
GC_CRASH();
}
}
static void
root_worklist_init(struct root_worklist *q) {
q->size = 16;
q->read = 0;
q->write = 0;
q->buf = NULL;
root_worklist_alloc(q);
}
static inline void
root_worklist_push(struct root_worklist *q, struct gc_root root) {
if (UNLIKELY(q->write == q->size)) {
q->size *= 2;
root_worklist_alloc(q);
}
q->buf[q->write++] = root;
}
// Not atomic.
static inline size_t
root_worklist_size(struct root_worklist *q) {
return q->write - q->read;
}
static inline struct gc_root
root_worklist_pop(struct root_worklist *q) {
size_t idx = atomic_fetch_add(&q->read, 1);
if (idx < q->write)
return q->buf[idx];
return (struct gc_root){ GC_ROOT_KIND_NONE, };
}
static void
root_worklist_reset(struct root_worklist *q) {
q->read = q->write = 0;
}
static void
root_worklist_destroy(struct root_worklist *q) {
free(q->buf);
}
#endif // ROOT_WORKLIST_H

43
src/root.h Normal file
View file

@ -0,0 +1,43 @@
#ifndef ROOT_H
#define ROOT_H
struct gc_ephemeron;
struct gc_heap;
struct gc_mutator;
enum gc_root_kind {
GC_ROOT_KIND_NONE,
GC_ROOT_KIND_HEAP,
GC_ROOT_KIND_MUTATOR,
GC_ROOT_KIND_RESOLVED_EPHEMERONS
};
struct gc_root {
enum gc_root_kind kind;
union {
struct gc_heap *heap;
struct gc_mutator *mutator;
struct gc_ephemeron *resolved_ephemerons;
};
};
static inline struct gc_root gc_root_heap(struct gc_heap* heap) {
struct gc_root ret = { GC_ROOT_KIND_HEAP };
ret.heap = heap;
return ret;
}
static inline struct gc_root gc_root_mutator(struct gc_mutator* mutator) {
struct gc_root ret = { GC_ROOT_KIND_MUTATOR };
ret.mutator = mutator;
return ret;
}
static inline struct gc_root
gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) {
struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS };
ret.resolved_ephemerons = resolved;
return ret;
}
#endif // ROOT_H

View file

@ -380,9 +380,14 @@ static void collect(struct gc_mutator *mut, size_t for_alloc) {
HEAP_EVENT(heap, heap_traced); HEAP_EVENT(heap, heap_traced);
gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
heap->check_pending_ephemerons = 1; heap->check_pending_ephemerons = 1;
while (gc_pop_resolved_ephemerons(heap, trace, NULL)) do {
struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
if (!resolved)
break;
gc_trace_resolved_ephemerons(resolved, trace, heap, NULL);
while(grey < semi->hp) while(grey < semi->hp)
grey = scan(heap, gc_ref(grey)); grey = scan(heap, gc_ref(grey));
} while (1);
HEAP_EVENT(heap, ephemerons_traced); HEAP_EVENT(heap, ephemerons_traced);
large_object_space_finish_gc(large, 0); large_object_space_finish_gc(large, 0);
gc_extern_space_finish_gc(heap->extern_space, 0); gc_extern_space_finish_gc(heap->extern_space, 0);

View file

@ -7,10 +7,12 @@
#include "assert.h" #include "assert.h"
#include "debug.h" #include "debug.h"
#include "simple-worklist.h" #include "simple-worklist.h"
#include "root-worklist.h"
#include "tracer.h" #include "tracer.h"
struct gc_tracer { struct gc_tracer {
struct gc_heap *heap; struct gc_heap *heap;
struct root_worklist roots;
struct simple_worklist worklist; struct simple_worklist worklist;
}; };
@ -19,10 +21,16 @@ struct gc_trace_worker {
struct gc_trace_worker_data *data; struct gc_trace_worker_data *data;
}; };
static inline struct gc_trace_worker_data*
gc_trace_worker_data(struct gc_trace_worker *worker) {
return worker->data;
}
static int static int
gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
size_t parallelism) { size_t parallelism) {
tracer->heap = heap; tracer->heap = heap;
root_worklist_init(&tracer->roots);
return simple_worklist_init(&tracer->worklist); return simple_worklist_init(&tracer->worklist);
} }
static void gc_tracer_prepare(struct gc_tracer *tracer) {} static void gc_tracer_prepare(struct gc_tracer *tracer) {}
@ -30,6 +38,11 @@ static void gc_tracer_release(struct gc_tracer *tracer) {
simple_worklist_release(&tracer->worklist); simple_worklist_release(&tracer->worklist);
} }
static inline void
gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) {
root_worklist_push(&tracer->roots, root);
}
static inline void static inline void
gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref obj) { gc_tracer_enqueue_root(struct gc_tracer *tracer, struct gc_ref obj) {
simple_worklist_push(&tracer->worklist, obj); simple_worklist_push(&tracer->worklist, obj);
@ -48,6 +61,13 @@ tracer_trace_with_data(struct gc_tracer *tracer, struct gc_heap *heap,
struct gc_trace_worker *worker, struct gc_trace_worker *worker,
struct gc_trace_worker_data *data) { struct gc_trace_worker_data *data) {
worker->data = data; worker->data = data;
do {
struct gc_root root = root_worklist_pop(&tracer->roots);
if (root.kind == GC_ROOT_KIND_NONE)
break;
trace_root(root, heap, worker);
} while (1);
root_worklist_reset(&tracer->roots);
do { do {
struct gc_ref obj = simple_worklist_pop(&tracer->worklist); struct gc_ref obj = simple_worklist_pop(&tracer->worklist);
if (!gc_ref_is_heap_object(obj)) if (!gc_ref_is_heap_object(obj))

View file

@ -3,6 +3,7 @@
#include "gc-ref.h" #include "gc-ref.h"
#include "gc-edge.h" #include "gc-edge.h"
#include "root.h"
struct gc_heap; struct gc_heap;
@ -19,6 +20,8 @@ struct gc_trace_worker_data;
// Visit all fields in an object. // Visit all fields in an object.
static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
struct gc_trace_worker *worker) GC_ALWAYS_INLINE; struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
static inline void trace_root(struct gc_root root, struct gc_heap *heap,
struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
static void static void
gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
@ -53,6 +56,11 @@ static inline void gc_tracer_enqueue_roots(struct gc_tracer *tracer,
// Given that an object has been shaded grey, enqueue for tracing. // Given that an object has been shaded grey, enqueue for tracing.
static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker,
struct gc_ref ref) GC_ALWAYS_INLINE; struct gc_ref ref) GC_ALWAYS_INLINE;
static inline struct gc_trace_worker_data*
gc_trace_worker_data(struct gc_trace_worker *worker) GC_ALWAYS_INLINE;
static inline void gc_tracer_add_root(struct gc_tracer *tracer,
struct gc_root root);
// Run the full trace. // Run the full trace.
static inline void gc_tracer_trace(struct gc_tracer *tracer); static inline void gc_tracer_trace(struct gc_tracer *tracer);

View file

@ -1228,6 +1228,13 @@ static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
gc_trace_object(ref, tracer_visit, heap, worker, NULL); gc_trace_object(ref, tracer_visit, heap, worker, NULL);
} }
static inline void trace_root(struct gc_root root,
struct gc_heap *heap,
struct gc_trace_worker *worker) {
// We don't use parallel root tracing yet.
GC_CRASH();
}
static void static void
mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low, mark_and_globally_enqueue_mutator_conservative_roots(uintptr_t low,
uintptr_t high, uintptr_t high,
@ -1876,8 +1883,11 @@ static void resolve_ephemerons_eagerly(struct gc_heap *heap) {
} }
static int enqueue_resolved_ephemerons(struct gc_heap *heap) { static int enqueue_resolved_ephemerons(struct gc_heap *heap) {
return gc_pop_resolved_ephemerons(heap, trace_and_enqueue_globally, struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
NULL); if (!resolved)
return 0;
gc_trace_resolved_ephemerons(resolved, trace_and_enqueue_globally, heap, NULL);
return 1;
} }
static void sweep_ephemerons(struct gc_heap *heap) { static void sweep_ephemerons(struct gc_heap *heap) {