diff --git a/Makefile b/Makefile index 9ef8b8f85..e6e568b71 100644 --- a/Makefile +++ b/Makefile @@ -17,10 +17,10 @@ bdw-%: bdw.h conservative-roots.h %-types.h %.c semi-%: semi.h precise-roots.h %-types.h heap-objects.h %.c $(COMPILE) -DGC_SEMI -o $@ $*.c -mark-sweep-%: mark-sweep.h precise-roots.h serial-marker.h assert.h debug.h %-types.h heap-objects.h %.c +mark-sweep-%: mark-sweep.h precise-roots.h serial-tracer.h assert.h debug.h %-types.h heap-objects.h %.c $(COMPILE) -DGC_MARK_SWEEP -o $@ $*.c -parallel-mark-sweep-%: mark-sweep.h precise-roots.h parallel-marker.h assert.h debug.h %-types.h heap-objects.h %.c +parallel-mark-sweep-%: mark-sweep.h precise-roots.h parallel-tracer.h assert.h debug.h %-types.h heap-objects.h %.c $(COMPILE) -DGC_PARALLEL_MARK_SWEEP -o $@ $*.c check: $(addprefix test-$(TARGET),$(TARGETS)) diff --git a/mark-sweep.h b/mark-sweep.h index 48d268e01..0dc20e550 100644 --- a/mark-sweep.h +++ b/mark-sweep.h @@ -11,9 +11,9 @@ #include "inline.h" #include "precise-roots.h" #ifdef GC_PARALLEL_MARK -#include "parallel-marker.h" +#include "parallel-tracer.h" #else -#include "serial-marker.h" +#include "serial-tracer.h" #endif #define GRANULE_SIZE 8 @@ -124,12 +124,12 @@ struct mark_space { void *mem; size_t mem_size; long count; - struct marker marker; struct mutator *deactivated_mutators; }; struct heap { struct mark_space mark_space; + struct tracer tracer; }; struct mutator_mark_buf { @@ -148,8 +148,8 @@ struct mutator { struct mutator *next; }; -static inline struct marker* mark_space_marker(struct mark_space *space) { - return &space->marker; +static inline struct tracer* heap_tracer(struct heap *heap) { + return &heap->tracer; } static inline struct mark_space* heap_mark_space(struct heap *heap) { return &heap->mark_space; @@ -174,7 +174,7 @@ static inline void clear_memory(uintptr_t addr, size_t size) { memset((char*)addr, 0, size); } -static void collect(struct mark_space *space, struct mutator *mut) NEVER_INLINE; +static void collect(struct heap *heap, struct mutator *mut) NEVER_INLINE; static inline uint8_t* mark_byte(struct mark_space *space, struct gcobj *obj) { ASSERT(space->heap_base <= (uintptr_t) obj); @@ -183,8 +183,8 @@ static inline uint8_t* mark_byte(struct mark_space *space, struct gcobj *obj) { return &space->mark_bytes[granule]; } -static inline int mark_object(struct mark_space *space, struct gcobj *obj) { - uint8_t *byte = mark_byte(space, obj); +static inline int trace_object(struct heap *heap, struct gcobj *obj) { + uint8_t *byte = mark_byte(heap_mark_space(heap), obj); if (*byte) return 0; *byte = 1; @@ -195,7 +195,7 @@ static inline void trace_one(struct gcobj *obj, void *mark_data) { switch (tag_live_alloc_kind(obj->tag)) { #define SCAN_OBJECT(name, Name, NAME) \ case ALLOC_KIND_##NAME: \ - visit_##name##_fields((Name*)obj, marker_visit, mark_data); \ + visit_##name##_fields((Name*)obj, tracer_visit, mark_data); \ break; FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT) #undef SCAN_OBJECT @@ -317,11 +317,12 @@ static void mutator_mark_buf_destroy(struct mutator_mark_buf *buf) { // Mark the roots of a mutator that is stopping for GC. We can't // enqueue them directly, so we send them to the controller in a buffer. static void mark_stopping_mutator_roots(struct mutator *mut) { - struct mark_space *space = mutator_mark_space(mut); + struct heap *heap = mutator_heap(mut); + struct mark_space *space = heap_mark_space(heap); struct mutator_mark_buf *local_roots = &mut->mark_buf; for (struct handle *h = mut->roots; h; h = h->next) { struct gcobj *root = h->v; - if (root && mark_object(space, root)) + if (root && trace_object(heap, root)) mutator_mark_buf_push(local_roots, root); } @@ -336,11 +337,11 @@ static void mark_stopping_mutator_roots(struct mutator *mut) { // Mark the roots of the mutator that causes GC. static void mark_controlling_mutator_roots(struct mutator *mut) { - struct mark_space *space = mutator_mark_space(mut); + struct heap *heap = mutator_heap(mut); for (struct handle *h = mut->roots; h; h = h->next) { struct gcobj *root = h->v; - if (root && mark_object(space, root)) - marker_enqueue_root(&space->marker, root); + if (root && trace_object(heap, root)) + tracer_enqueue_root(&heap->tracer, root); } } @@ -359,16 +360,17 @@ static void mark_inactive_mutators(struct mark_space *space) { mark_controlling_mutator_roots(mut); } -static void mark_global_roots(struct mark_space *space) { +static void mark_global_roots(struct heap *heap) { + struct mark_space *space = heap_mark_space(heap); for (struct handle *h = space->global_roots; h; h = h->next) { struct gcobj *obj = h->v; - if (obj && mark_object(space, obj)) - marker_enqueue_root(&space->marker, obj); + if (obj && trace_object(heap, obj)) + tracer_enqueue_root(&heap->tracer, obj); } struct mutator_mark_buf *roots = atomic_load(&space->mutator_roots); for (; roots; roots = roots->next) - marker_enqueue_roots(&space->marker, roots->objects, roots->size); + tracer_enqueue_roots(&heap->tracer, roots->objects, roots->size); atomic_store(&space->mutator_roots, NULL); } @@ -425,16 +427,17 @@ static void reset_sweeper(struct mark_space *space) { space->sweep = space->heap_base; } -static void collect(struct mark_space *space, struct mutator *mut) { +static void collect(struct heap *heap, struct mutator *mut) { + struct mark_space *space = heap_mark_space(heap); DEBUG("start collect #%ld:\n", space->count); - marker_prepare(space); + tracer_prepare(heap); request_mutators_to_stop(space); mark_controlling_mutator_roots(mut); wait_for_mutators_to_stop(space); mark_inactive_mutators(space); - mark_global_roots(space); - marker_trace(space); - marker_release(space); + mark_global_roots(heap); + tracer_trace(heap); + tracer_release(heap); clear_global_freelists(space); reset_sweeper(space); space->count++; @@ -614,7 +617,7 @@ static int sweep(struct mark_space *space, } static void* allocate_medium(struct mutator *mut, enum alloc_kind kind, - size_t granules) { + size_t granules) { struct mark_space *space = mutator_mark_space(mut); struct gcobj_freelists *small_objects = space_has_multiple_mutators(space) ? &space->small_objects : &mut->small_objects; @@ -651,7 +654,7 @@ static void* allocate_medium(struct mutator *mut, enum alloc_kind kind, fprintf(stderr, "ran out of space, heap size %zu\n", space->heap_size); abort(); } else { - collect(space, mut); + collect(mutator_heap(mut), mut); swept_from_beginning = 1; } } @@ -739,7 +742,7 @@ static void fill_small_from_global(struct mutator *mut, fprintf(stderr, "ran out of space, heap size %zu\n", space->heap_size); abort(); } else { - collect(space, mut); + collect(mutator_heap(mut), mut); swept_from_beginning = 1; } } @@ -838,7 +841,7 @@ static int initialize_gc(size_t size, struct heap **heap, space->heap_base = ((uintptr_t) mem) + overhead; space->heap_size = size - overhead; space->sweep = space->heap_base + space->heap_size; - if (!marker_init(space)) + if (!tracer_init(*heap)) abort(); reclaim(space, NULL, NOT_SMALL_OBJECT, (void*)space->heap_base, size_to_granules(space->heap_size)); diff --git a/parallel-marker.h b/parallel-marker.h deleted file mode 100644 index e39704852..000000000 --- a/parallel-marker.h +++ /dev/null @@ -1,642 +0,0 @@ -#ifndef PARALLEL_MARKER_H -#define PARALLEL_MARKER_H - -#include -#include -#include -#include - -#include "assert.h" -#include "debug.h" -#include "inline.h" - -// The Chase-Lev work-stealing deque, as initially described in "Dynamic -// Circular Work-Stealing Deque" (Chase and Lev, SPAA'05) -// (https://www.dre.vanderbilt.edu/~schmidt/PDF/work-stealing-dequeue.pdf) -// and improved with C11 atomics in "Correct and Efficient Work-Stealing -// for Weak Memory Models" (Lê et al, PPoPP'13) -// (http://www.di.ens.fr/%7Ezappa/readings/ppopp13.pdf). - -struct gcobj; - -struct mark_buf { - unsigned log_size; - size_t size; - struct gcobj **data; -}; - -// Min size: 8 kB on 64-bit systems, 4 kB on 32-bit. -#define mark_buf_min_log_size ((unsigned) 10) -// Max size: 2 GB on 64-bit systems, 1 GB on 32-bit. -#define mark_buf_max_log_size ((unsigned) 28) - -static int -mark_buf_init(struct mark_buf *buf, unsigned log_size) { - ASSERT(log_size >= mark_buf_min_log_size); - ASSERT(log_size <= mark_buf_max_log_size); - size_t size = (1 << log_size) * sizeof(struct gcobj *); - void *mem = mmap(NULL, size, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { - perror("Failed to grow work-stealing dequeue"); - DEBUG("Failed to allocate %zu bytes", size); - return 0; - } - buf->log_size = log_size; - buf->size = 1 << log_size; - buf->data = mem; - return 1; -} - -static inline size_t -mark_buf_size(struct mark_buf *buf) { - return buf->size; -} - -static inline size_t -mark_buf_byte_size(struct mark_buf *buf) { - return mark_buf_size(buf) * sizeof(struct gcobj *); -} - -static void -mark_buf_release(struct mark_buf *buf) { - if (buf->data) - madvise(buf->data, mark_buf_byte_size(buf), MADV_DONTNEED); -} - -static void -mark_buf_destroy(struct mark_buf *buf) { - if (buf->data) { - munmap(buf->data, mark_buf_byte_size(buf)); - buf->data = NULL; - buf->log_size = 0; - buf->size = 0; - } -} - -static inline struct gcobj * -mark_buf_get(struct mark_buf *buf, size_t i) { - return atomic_load_explicit(&buf->data[i & (buf->size - 1)], - memory_order_relaxed); -} - -static inline void -mark_buf_put(struct mark_buf *buf, size_t i, struct gcobj * o) { - return atomic_store_explicit(&buf->data[i & (buf->size - 1)], - o, - memory_order_relaxed); -} - -static inline int -mark_buf_grow(struct mark_buf *from, struct mark_buf *to, - size_t b, size_t t) { - if (from->log_size == mark_buf_max_log_size) - return 0; - if (!mark_buf_init (to, from->log_size + 1)) - return 0; - for (size_t i=t; ibufs[0], mark_buf_min_log_size); - // Note, this fence isn't in the paper, I added it out of caution. - atomic_thread_fence(memory_order_release); - return ret; -} - -static void -mark_deque_release(struct mark_deque *q) { - for (int i = LOAD_RELAXED(&q->active); i >= 0; i--) - mark_buf_release(&q->bufs[i]); -} - -static void -mark_deque_destroy(struct mark_deque *q) { - for (int i = LOAD_RELAXED(&q->active); i >= 0; i--) - mark_buf_destroy(&q->bufs[i]); -} - -static int -mark_deque_grow(struct mark_deque *q, int cur, size_t b, size_t t) { - if (!mark_buf_grow(&q->bufs[cur], &q->bufs[cur + 1], b, t)) { - fprintf(stderr, "failed to grow deque!!\n"); - abort(); - } - - cur++; - STORE_RELAXED(&q->active, cur); - return cur; -} - -static void -mark_deque_push(struct mark_deque *q, struct gcobj * x) { - size_t b = LOAD_RELAXED(&q->bottom); - size_t t = LOAD_ACQUIRE(&q->top); - int active = LOAD_RELAXED(&q->active); - - if (b - t > mark_buf_size(&q->bufs[active]) - 1) /* Full queue. */ - active = mark_deque_grow(q, active, b, t); - - mark_buf_put(&q->bufs[active], b, x); - atomic_thread_fence(memory_order_release); - STORE_RELAXED(&q->bottom, b + 1); -} - -static void -mark_deque_push_many(struct mark_deque *q, struct gcobj **objv, size_t count) { - size_t b = LOAD_RELAXED(&q->bottom); - size_t t = LOAD_ACQUIRE(&q->top); - int active = LOAD_RELAXED(&q->active); - - while (b - t > mark_buf_size(&q->bufs[active]) - count) /* Full queue. */ - active = mark_deque_grow(q, active, b, t); - - for (size_t i = 0; i < count; i++) - mark_buf_put(&q->bufs[active], b + i, objv[i]); - atomic_thread_fence(memory_order_release); - STORE_RELAXED(&q->bottom, b + count); -} - -static struct gcobj * -mark_deque_try_pop(struct mark_deque *q) { - size_t b = LOAD_RELAXED(&q->bottom); - b = b - 1; - int active = LOAD_RELAXED(&q->active); - STORE_RELAXED(&q->bottom, b); - atomic_thread_fence(memory_order_seq_cst); - size_t t = LOAD_RELAXED(&q->top); - struct gcobj * x; - if (t <= b) { // Non-empty queue. - x = mark_buf_get(&q->bufs[active], b); - if (t == b) { // Single last element in queue. - if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, - memory_order_seq_cst, - memory_order_relaxed)) - // Failed race. - x = NULL; - STORE_RELAXED(&q->bottom, b + 1); - } - } else { // Empty queue. - x = NULL; - STORE_RELAXED(&q->bottom, b + 1); - } - return x; -} - -static struct gcobj * -mark_deque_steal(struct mark_deque *q) { - while (1) { - size_t t = LOAD_ACQUIRE(&q->top); - atomic_thread_fence(memory_order_seq_cst); - size_t b = LOAD_ACQUIRE(&q->bottom); - if (t >= b) - return NULL; - int active = LOAD_CONSUME(&q->active); - struct gcobj *x = x = mark_buf_get(&q->bufs[active], t); - if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, - memory_order_seq_cst, - memory_order_relaxed)) - // Failed race. - continue; - return x; - } -} - -static int -mark_deque_can_steal(struct mark_deque *q) { - size_t t = LOAD_ACQUIRE(&q->top); - atomic_thread_fence(memory_order_seq_cst); - size_t b = LOAD_ACQUIRE(&q->bottom); - return t < b; -} - -#undef LOAD_RELAXED -#undef STORE_RELAXED -#undef LOAD_ACQUIRE -#undef STORE_RELEASE -#undef LOAD_CONSUME - -#define LOCAL_MARK_QUEUE_SIZE 1024 -#define LOCAL_MARK_QUEUE_MASK (LOCAL_MARK_QUEUE_SIZE - 1) -#define LOCAL_MARK_QUEUE_SHARE_AMOUNT (LOCAL_MARK_QUEUE_SIZE * 3 / 4) -struct local_mark_queue { - size_t read; - size_t write; - struct gcobj * data[LOCAL_MARK_QUEUE_SIZE]; -}; - -static inline void -local_mark_queue_init(struct local_mark_queue *q) { - q->read = q->write = 0; -} -static inline void -local_mark_queue_poison(struct local_mark_queue *q) { - q->read = 0; q->write = LOCAL_MARK_QUEUE_SIZE; -} -static inline size_t -local_mark_queue_size(struct local_mark_queue *q) { - return q->write - q->read; -} -static inline int -local_mark_queue_empty(struct local_mark_queue *q) { - return local_mark_queue_size(q) == 0; -} -static inline int -local_mark_queue_full(struct local_mark_queue *q) { - return local_mark_queue_size(q) >= LOCAL_MARK_QUEUE_SIZE; -} -static inline void -local_mark_queue_push(struct local_mark_queue *q, struct gcobj * v) { - q->data[q->write++ & LOCAL_MARK_QUEUE_MASK] = v; -} -static inline struct gcobj * -local_mark_queue_pop(struct local_mark_queue *q) { - return q->data[q->read++ & LOCAL_MARK_QUEUE_MASK]; -} - -enum mark_worker_state { - MARK_WORKER_STOPPED, - MARK_WORKER_IDLE, - MARK_WORKER_MARKING, - MARK_WORKER_STOPPING, - MARK_WORKER_DEAD -}; - -struct mark_worker { - struct mark_space *space; - size_t id; - size_t steal_id; - pthread_t thread; - enum mark_worker_state state; - pthread_mutex_t lock; - pthread_cond_t cond; - struct mark_deque deque; -}; - -#define MARK_WORKERS_MAX_COUNT 8 - -struct marker { - atomic_size_t active_markers; - size_t worker_count; - atomic_size_t running_markers; - long count; - pthread_mutex_t lock; - pthread_cond_t cond; - struct mark_worker workers[MARK_WORKERS_MAX_COUNT]; -}; - -struct local_marker { - struct mark_worker *worker; - struct mark_deque *share_deque; - struct mark_space *space; - struct local_mark_queue local; -}; - -struct context; -static inline struct marker* mark_space_marker(struct mark_space *space); - -static size_t number_of_current_processors(void) { return 1; } - -static int -mark_worker_init(struct mark_worker *worker, struct mark_space *space, - struct marker *marker, size_t id) { - worker->space = space; - worker->id = id; - worker->steal_id = 0; - worker->thread = 0; - worker->state = MARK_WORKER_STOPPED; - pthread_mutex_init(&worker->lock, NULL); - pthread_cond_init(&worker->cond, NULL); - return mark_deque_init(&worker->deque); -} - -static void mark_worker_mark(struct mark_worker *worker); - -static void* -mark_worker_thread(void *data) { - struct mark_worker *worker = data; - - pthread_mutex_lock(&worker->lock); - while (1) { - switch (worker->state) { - case MARK_WORKER_IDLE: - pthread_cond_wait(&worker->cond, &worker->lock); - break; - case MARK_WORKER_MARKING: - mark_worker_mark(worker); - worker->state = MARK_WORKER_IDLE; - break; - case MARK_WORKER_STOPPING: - worker->state = MARK_WORKER_DEAD; - pthread_mutex_unlock(&worker->lock); - return NULL; - default: - abort(); - } - } -} - -static int -mark_worker_spawn(struct mark_worker *worker) { - pthread_mutex_lock(&worker->lock); - ASSERT(worker->state == MARK_WORKER_STOPPED); - worker->state = MARK_WORKER_IDLE; - pthread_mutex_unlock(&worker->lock); - - if (pthread_create(&worker->thread, NULL, mark_worker_thread, worker)) { - perror("spawning marker thread failed"); - worker->state = MARK_WORKER_STOPPED; - return 0; - } - - return 1; -} - -static void -mark_worker_request_mark(struct mark_worker *worker) { - struct marker *marker = mark_space_marker(worker->space); - - pthread_mutex_lock(&worker->lock); - ASSERT(worker->state == MARK_WORKER_IDLE); - worker->state = MARK_WORKER_MARKING; - pthread_cond_signal(&worker->cond); - pthread_mutex_unlock(&worker->lock); -} - -static void -mark_worker_finished_marking(struct mark_worker *worker) { - // Signal controller that we are done with marking. - struct marker *marker = mark_space_marker(worker->space); - - if (atomic_fetch_sub(&marker->running_markers, 1) == 1) { - pthread_mutex_lock(&marker->lock); - marker->count++; - pthread_cond_signal(&marker->cond); - pthread_mutex_unlock(&marker->lock); - } -} - -static void -mark_worker_request_stop(struct mark_worker *worker) { - pthread_mutex_lock(&worker->lock); - ASSERT(worker->state == MARK_WORKER_IDLE); - worker->state = MARK_WORKER_STOPPING; - pthread_cond_signal(&worker->cond); - pthread_mutex_unlock(&worker->lock); -} - -static int -marker_init(struct mark_space *space) { - struct marker *marker = mark_space_marker(space); - atomic_init(&marker->active_markers, 0); - atomic_init(&marker->running_markers, 0); - marker->count = 0; - pthread_mutex_init(&marker->lock, NULL); - pthread_cond_init(&marker->cond, NULL); - size_t desired_worker_count = 0; - if (getenv("GC_MARKERS")) - desired_worker_count = atoi(getenv("GC_MARKERS")); - if (desired_worker_count == 0) - desired_worker_count = number_of_current_processors(); - if (desired_worker_count > MARK_WORKERS_MAX_COUNT) - desired_worker_count = MARK_WORKERS_MAX_COUNT; - for (size_t i = 0; i < desired_worker_count; i++) { - if (!mark_worker_init(&marker->workers[i], space, marker, i)) - break; - if (mark_worker_spawn(&marker->workers[i])) - marker->worker_count++; - else - break; - } - return marker->worker_count > 0; -} - -static void marker_prepare(struct mark_space *space) { - struct marker *marker = mark_space_marker(space); - for (size_t i = 0; i < marker->worker_count; i++) - marker->workers[i].steal_id = 0; -} -static void marker_release(struct mark_space *space) { - struct marker *marker = mark_space_marker(space); - for (size_t i = 0; i < marker->worker_count; i++) - mark_deque_release(&marker->workers[i].deque); -} - -struct gcobj; -static inline void marker_visit(void **loc, void *mark_data) ALWAYS_INLINE; -static inline void trace_one(struct gcobj *obj, void *mark_data) ALWAYS_INLINE; -static inline int mark_object(struct mark_space *space, - struct gcobj *obj) ALWAYS_INLINE; - -static inline void -marker_share(struct local_marker *mark) { - DEBUG("marker #%zu: sharing\n", mark->worker->id); - for (size_t i = 0; i < LOCAL_MARK_QUEUE_SHARE_AMOUNT; i++) - mark_deque_push(mark->share_deque, local_mark_queue_pop(&mark->local)); -} - -static inline void -marker_visit(void **loc, void *mark_data) { - struct local_marker *mark = mark_data; - struct gcobj *obj = *loc; - if (obj && mark_object(mark->space, obj)) { - if (local_mark_queue_full(&mark->local)) - marker_share(mark); - local_mark_queue_push(&mark->local, obj); - } -} - -static struct gcobj * -marker_steal_from_worker(struct marker *marker, size_t id) { - ASSERT(id < marker->worker_count); - return mark_deque_steal(&marker->workers[id].deque); -} - -static int -marker_can_steal_from_worker(struct marker *marker, size_t id) { - ASSERT(id < marker->worker_count); - return mark_deque_can_steal(&marker->workers[id].deque); -} - -static struct gcobj * -mark_worker_steal_from_any(struct mark_worker *worker, struct marker *marker) { - size_t steal_id = worker->steal_id; - for (size_t i = 0; i < marker->worker_count; i++) { - steal_id = (steal_id + 1) % marker->worker_count; - DEBUG("marker #%zu: stealing from #%zu\n", worker->id, steal_id); - struct gcobj * obj = marker_steal_from_worker(marker, steal_id); - if (obj) { - DEBUG("marker #%zu: stealing got %p\n", worker->id, obj); - worker->steal_id = steal_id; - return obj; - } - } - DEBUG("marker #%zu: failed to steal\n", worker->id); - return 0; -} - -static int -mark_worker_can_steal_from_any(struct mark_worker *worker, struct marker *marker) { - size_t steal_id = worker->steal_id; - DEBUG("marker #%zu: checking if any worker has tasks\n", worker->id); - for (size_t i = 0; i < marker->worker_count; i++) { - steal_id = (steal_id + 1) % marker->worker_count; - int res = marker_can_steal_from_worker(marker, steal_id); - if (res) { - DEBUG("marker #%zu: worker #%zu has tasks!\n", worker->id, steal_id); - worker->steal_id = steal_id; - return 1; - } - } - DEBUG("marker #%zu: nothing to steal\n", worker->id); - return 0; -} - -static int -mark_worker_check_termination(struct mark_worker *worker, - struct marker *marker) { - // We went around all workers and nothing. Enter termination phase. - if (atomic_fetch_sub_explicit(&marker->active_markers, 1, - memory_order_relaxed) == 1) { - DEBUG(" ->> marker #%zu: DONE (no spinning) <<-\n", worker->id); - return 1; - } - - size_t spin_count = 0; - while (1) { - if (mark_worker_can_steal_from_any(worker, marker)) { - atomic_fetch_add_explicit(&marker->active_markers, 1, - memory_order_relaxed); - return 0; - } - if (atomic_load_explicit(&marker->active_markers, - memory_order_relaxed) == 0) { - DEBUG(" ->> marker #%zu: DONE <<-\n", worker->id); - return 1; - } - // spin - DEBUG("marker #%zu: spinning #%zu\n", worker->id, spin_count); - if (spin_count < 10) - __builtin_ia32_pause(); - else if (spin_count < 20) - sched_yield(); - else if (spin_count < 40) - usleep(0); - else - usleep(1); - spin_count++; - } -} - -static struct gcobj * -mark_worker_steal(struct local_marker *mark) { - struct marker *marker = mark_space_marker(mark->space); - struct mark_worker *worker = mark->worker; - - while (1) { - DEBUG("marker #%zu: trying to steal\n", worker->id); - struct gcobj *obj = mark_worker_steal_from_any(worker, marker); - if (obj) - return obj; - - if (mark_worker_check_termination(worker, marker)) - return NULL; - } -} - -static void -mark_worker_mark(struct mark_worker *worker) { - struct local_marker mark; - mark.worker = worker; - mark.share_deque = &worker->deque; - mark.space = worker->space; - local_mark_queue_init(&mark.local); - - size_t n = 0; - DEBUG("marker #%zu: running mark loop\n", worker->id); - while (1) { - struct gcobj * obj; - if (!local_mark_queue_empty(&mark.local)) { - obj = local_mark_queue_pop(&mark.local); - } else { - obj = mark_worker_steal(&mark); - if (!obj) - break; - } - trace_one(obj, &mark); - n++; - } - DEBUG("marker #%zu: done marking, %zu objects traced\n", worker->id, n); - - mark_worker_finished_marking(worker); -} - -static inline void -marker_enqueue_root(struct marker *marker, struct gcobj *obj) { - struct mark_deque *worker0_deque = &marker->workers[0].deque; - mark_deque_push(worker0_deque, obj); -} - -static inline void -marker_enqueue_roots(struct marker *marker, struct gcobj **objv, - size_t count) { - struct mark_deque *worker0_deque = &marker->workers[0].deque; - mark_deque_push_many(worker0_deque, objv, count); -} - -static inline void -marker_trace(struct mark_space *space) { - struct marker *marker = mark_space_marker(space); - - pthread_mutex_lock(&marker->lock); - long mark_count = marker->count; - pthread_mutex_unlock(&marker->lock); - - DEBUG("starting trace; %zu workers\n", marker->worker_count); - DEBUG("waking workers\n"); - atomic_store_explicit(&marker->active_markers, marker->worker_count, - memory_order_release); - atomic_store_explicit(&marker->running_markers, marker->worker_count, - memory_order_release); - for (size_t i = 0; i < marker->worker_count; i++) - mark_worker_request_mark(&marker->workers[i]); - - DEBUG("waiting on markers\n"); - - pthread_mutex_lock(&marker->lock); - while (marker->count <= mark_count) - pthread_cond_wait(&marker->cond, &marker->lock); - pthread_mutex_unlock(&marker->lock); - - DEBUG("trace finished\n"); -} - -#endif // PARALLEL_MARKER_H diff --git a/parallel-tracer.h b/parallel-tracer.h new file mode 100644 index 000000000..f96e93754 --- /dev/null +++ b/parallel-tracer.h @@ -0,0 +1,643 @@ +#ifndef PARALLEL_TRACER_H +#define PARALLEL_TRACER_H + +#include +#include +#include +#include + +#include "assert.h" +#include "debug.h" +#include "inline.h" + +// The Chase-Lev work-stealing deque, as initially described in "Dynamic +// Circular Work-Stealing Deque" (Chase and Lev, SPAA'05) +// (https://www.dre.vanderbilt.edu/~schmidt/PDF/work-stealing-dequeue.pdf) +// and improved with C11 atomics in "Correct and Efficient Work-Stealing +// for Weak Memory Models" (Lê et al, PPoPP'13) +// (http://www.di.ens.fr/%7Ezappa/readings/ppopp13.pdf). + +struct gcobj; + +struct trace_buf { + unsigned log_size; + size_t size; + struct gcobj **data; +}; + +// Min size: 8 kB on 64-bit systems, 4 kB on 32-bit. +#define trace_buf_min_log_size ((unsigned) 10) +// Max size: 2 GB on 64-bit systems, 1 GB on 32-bit. +#define trace_buf_max_log_size ((unsigned) 28) + +static int +trace_buf_init(struct trace_buf *buf, unsigned log_size) { + ASSERT(log_size >= trace_buf_min_log_size); + ASSERT(log_size <= trace_buf_max_log_size); + size_t size = (1 << log_size) * sizeof(struct gcobj *); + void *mem = mmap(NULL, size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + perror("Failed to grow work-stealing dequeue"); + DEBUG("Failed to allocate %zu bytes", size); + return 0; + } + buf->log_size = log_size; + buf->size = 1 << log_size; + buf->data = mem; + return 1; +} + +static inline size_t +trace_buf_size(struct trace_buf *buf) { + return buf->size; +} + +static inline size_t +trace_buf_byte_size(struct trace_buf *buf) { + return trace_buf_size(buf) * sizeof(struct gcobj *); +} + +static void +trace_buf_release(struct trace_buf *buf) { + if (buf->data) + madvise(buf->data, trace_buf_byte_size(buf), MADV_DONTNEED); +} + +static void +trace_buf_destroy(struct trace_buf *buf) { + if (buf->data) { + munmap(buf->data, trace_buf_byte_size(buf)); + buf->data = NULL; + buf->log_size = 0; + buf->size = 0; + } +} + +static inline struct gcobj * +trace_buf_get(struct trace_buf *buf, size_t i) { + return atomic_load_explicit(&buf->data[i & (buf->size - 1)], + memory_order_relaxed); +} + +static inline void +trace_buf_put(struct trace_buf *buf, size_t i, struct gcobj * o) { + return atomic_store_explicit(&buf->data[i & (buf->size - 1)], + o, + memory_order_relaxed); +} + +static inline int +trace_buf_grow(struct trace_buf *from, struct trace_buf *to, + size_t b, size_t t) { + if (from->log_size == trace_buf_max_log_size) + return 0; + if (!trace_buf_init (to, from->log_size + 1)) + return 0; + for (size_t i=t; ibufs[0], trace_buf_min_log_size); + // Note, this fence isn't in the paper, I added it out of caution. + atomic_thread_fence(memory_order_release); + return ret; +} + +static void +trace_deque_release(struct trace_deque *q) { + for (int i = LOAD_RELAXED(&q->active); i >= 0; i--) + trace_buf_release(&q->bufs[i]); +} + +static void +trace_deque_destroy(struct trace_deque *q) { + for (int i = LOAD_RELAXED(&q->active); i >= 0; i--) + trace_buf_destroy(&q->bufs[i]); +} + +static int +trace_deque_grow(struct trace_deque *q, int cur, size_t b, size_t t) { + if (!trace_buf_grow(&q->bufs[cur], &q->bufs[cur + 1], b, t)) { + fprintf(stderr, "failed to grow deque!!\n"); + abort(); + } + + cur++; + STORE_RELAXED(&q->active, cur); + return cur; +} + +static void +trace_deque_push(struct trace_deque *q, struct gcobj * x) { + size_t b = LOAD_RELAXED(&q->bottom); + size_t t = LOAD_ACQUIRE(&q->top); + int active = LOAD_RELAXED(&q->active); + + if (b - t > trace_buf_size(&q->bufs[active]) - 1) /* Full queue. */ + active = trace_deque_grow(q, active, b, t); + + trace_buf_put(&q->bufs[active], b, x); + atomic_thread_fence(memory_order_release); + STORE_RELAXED(&q->bottom, b + 1); +} + +static void +trace_deque_push_many(struct trace_deque *q, struct gcobj **objv, size_t count) { + size_t b = LOAD_RELAXED(&q->bottom); + size_t t = LOAD_ACQUIRE(&q->top); + int active = LOAD_RELAXED(&q->active); + + while (b - t > trace_buf_size(&q->bufs[active]) - count) /* Full queue. */ + active = trace_deque_grow(q, active, b, t); + + for (size_t i = 0; i < count; i++) + trace_buf_put(&q->bufs[active], b + i, objv[i]); + atomic_thread_fence(memory_order_release); + STORE_RELAXED(&q->bottom, b + count); +} + +static struct gcobj * +trace_deque_try_pop(struct trace_deque *q) { + size_t b = LOAD_RELAXED(&q->bottom); + b = b - 1; + int active = LOAD_RELAXED(&q->active); + STORE_RELAXED(&q->bottom, b); + atomic_thread_fence(memory_order_seq_cst); + size_t t = LOAD_RELAXED(&q->top); + struct gcobj * x; + if (t <= b) { // Non-empty queue. + x = trace_buf_get(&q->bufs[active], b); + if (t == b) { // Single last element in queue. + if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, + memory_order_seq_cst, + memory_order_relaxed)) + // Failed race. + x = NULL; + STORE_RELAXED(&q->bottom, b + 1); + } + } else { // Empty queue. + x = NULL; + STORE_RELAXED(&q->bottom, b + 1); + } + return x; +} + +static struct gcobj * +trace_deque_steal(struct trace_deque *q) { + while (1) { + size_t t = LOAD_ACQUIRE(&q->top); + atomic_thread_fence(memory_order_seq_cst); + size_t b = LOAD_ACQUIRE(&q->bottom); + if (t >= b) + return NULL; + int active = LOAD_CONSUME(&q->active); + struct gcobj *x = x = trace_buf_get(&q->bufs[active], t); + if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, + memory_order_seq_cst, + memory_order_relaxed)) + // Failed race. + continue; + return x; + } +} + +static int +trace_deque_can_steal(struct trace_deque *q) { + size_t t = LOAD_ACQUIRE(&q->top); + atomic_thread_fence(memory_order_seq_cst); + size_t b = LOAD_ACQUIRE(&q->bottom); + return t < b; +} + +#undef LOAD_RELAXED +#undef STORE_RELAXED +#undef LOAD_ACQUIRE +#undef STORE_RELEASE +#undef LOAD_CONSUME + +#define LOCAL_TRACE_QUEUE_SIZE 1024 +#define LOCAL_TRACE_QUEUE_MASK (LOCAL_TRACE_QUEUE_SIZE - 1) +#define LOCAL_TRACE_QUEUE_SHARE_AMOUNT (LOCAL_TRACE_QUEUE_SIZE * 3 / 4) +struct local_trace_queue { + size_t read; + size_t write; + struct gcobj * data[LOCAL_TRACE_QUEUE_SIZE]; +}; + +static inline void +local_trace_queue_init(struct local_trace_queue *q) { + q->read = q->write = 0; +} +static inline void +local_trace_queue_poison(struct local_trace_queue *q) { + q->read = 0; q->write = LOCAL_TRACE_QUEUE_SIZE; +} +static inline size_t +local_trace_queue_size(struct local_trace_queue *q) { + return q->write - q->read; +} +static inline int +local_trace_queue_empty(struct local_trace_queue *q) { + return local_trace_queue_size(q) == 0; +} +static inline int +local_trace_queue_full(struct local_trace_queue *q) { + return local_trace_queue_size(q) >= LOCAL_TRACE_QUEUE_SIZE; +} +static inline void +local_trace_queue_push(struct local_trace_queue *q, struct gcobj * v) { + q->data[q->write++ & LOCAL_TRACE_QUEUE_MASK] = v; +} +static inline struct gcobj * +local_trace_queue_pop(struct local_trace_queue *q) { + return q->data[q->read++ & LOCAL_TRACE_QUEUE_MASK]; +} + +enum trace_worker_state { + TRACE_WORKER_STOPPED, + TRACE_WORKER_IDLE, + TRACE_WORKER_TRACING, + TRACE_WORKER_STOPPING, + TRACE_WORKER_DEAD +}; + +struct heap; +struct trace_worker { + struct heap *heap; + size_t id; + size_t steal_id; + pthread_t thread; + enum trace_worker_state state; + pthread_mutex_t lock; + pthread_cond_t cond; + struct trace_deque deque; +}; + +#define TRACE_WORKERS_MAX_COUNT 8 + +struct tracer { + atomic_size_t active_tracers; + size_t worker_count; + atomic_size_t running_tracers; + long count; + pthread_mutex_t lock; + pthread_cond_t cond; + struct trace_worker workers[TRACE_WORKERS_MAX_COUNT]; +}; + +struct local_tracer { + struct trace_worker *worker; + struct trace_deque *share_deque; + struct heap *heap; + struct local_trace_queue local; +}; + +struct context; +static inline struct tracer* heap_tracer(struct heap *heap); + +static size_t number_of_current_processors(void) { return 1; } + +static int +trace_worker_init(struct trace_worker *worker, struct heap *heap, + struct tracer *tracer, size_t id) { + worker->heap = heap; + worker->id = id; + worker->steal_id = 0; + worker->thread = 0; + worker->state = TRACE_WORKER_STOPPED; + pthread_mutex_init(&worker->lock, NULL); + pthread_cond_init(&worker->cond, NULL); + return trace_deque_init(&worker->deque); +} + +static void trace_worker_trace(struct trace_worker *worker); + +static void* +trace_worker_thread(void *data) { + struct trace_worker *worker = data; + + pthread_mutex_lock(&worker->lock); + while (1) { + switch (worker->state) { + case TRACE_WORKER_IDLE: + pthread_cond_wait(&worker->cond, &worker->lock); + break; + case TRACE_WORKER_TRACING: + trace_worker_trace(worker); + worker->state = TRACE_WORKER_IDLE; + break; + case TRACE_WORKER_STOPPING: + worker->state = TRACE_WORKER_DEAD; + pthread_mutex_unlock(&worker->lock); + return NULL; + default: + abort(); + } + } +} + +static int +trace_worker_spawn(struct trace_worker *worker) { + pthread_mutex_lock(&worker->lock); + ASSERT(worker->state == TRACE_WORKER_STOPPED); + worker->state = TRACE_WORKER_IDLE; + pthread_mutex_unlock(&worker->lock); + + if (pthread_create(&worker->thread, NULL, trace_worker_thread, worker)) { + perror("spawning tracer thread failed"); + worker->state = TRACE_WORKER_STOPPED; + return 0; + } + + return 1; +} + +static void +trace_worker_request_trace(struct trace_worker *worker) { + struct tracer *tracer = heap_tracer(worker->heap); + + pthread_mutex_lock(&worker->lock); + ASSERT(worker->state == TRACE_WORKER_IDLE); + worker->state = TRACE_WORKER_TRACING; + pthread_cond_signal(&worker->cond); + pthread_mutex_unlock(&worker->lock); +} + +static void +trace_worker_finished_tracing(struct trace_worker *worker) { + // Signal controller that we are done with tracing. + struct tracer *tracer = heap_tracer(worker->heap); + + if (atomic_fetch_sub(&tracer->running_tracers, 1) == 1) { + pthread_mutex_lock(&tracer->lock); + tracer->count++; + pthread_cond_signal(&tracer->cond); + pthread_mutex_unlock(&tracer->lock); + } +} + +static void +trace_worker_request_stop(struct trace_worker *worker) { + pthread_mutex_lock(&worker->lock); + ASSERT(worker->state == TRACE_WORKER_IDLE); + worker->state = TRACE_WORKER_STOPPING; + pthread_cond_signal(&worker->cond); + pthread_mutex_unlock(&worker->lock); +} + +static int +tracer_init(struct heap *heap) { + struct tracer *tracer = heap_tracer(heap); + atomic_init(&tracer->active_tracers, 0); + atomic_init(&tracer->running_tracers, 0); + tracer->count = 0; + pthread_mutex_init(&tracer->lock, NULL); + pthread_cond_init(&tracer->cond, NULL); + size_t desired_worker_count = 0; + if (getenv("GC_TRACERS")) + desired_worker_count = atoi(getenv("GC_TRACERS")); + if (desired_worker_count == 0) + desired_worker_count = number_of_current_processors(); + if (desired_worker_count > TRACE_WORKERS_MAX_COUNT) + desired_worker_count = TRACE_WORKERS_MAX_COUNT; + for (size_t i = 0; i < desired_worker_count; i++) { + if (!trace_worker_init(&tracer->workers[i], heap, tracer, i)) + break; + if (trace_worker_spawn(&tracer->workers[i])) + tracer->worker_count++; + else + break; + } + return tracer->worker_count > 0; +} + +static void tracer_prepare(struct heap *heap) { + struct tracer *tracer = heap_tracer(heap); + for (size_t i = 0; i < tracer->worker_count; i++) + tracer->workers[i].steal_id = 0; +} +static void tracer_release(struct heap *heap) { + struct tracer *tracer = heap_tracer(heap); + for (size_t i = 0; i < tracer->worker_count; i++) + trace_deque_release(&tracer->workers[i].deque); +} + +struct gcobj; +static inline void tracer_visit(void **loc, void *trace_data) ALWAYS_INLINE; +static inline void trace_one(struct gcobj *obj, void *trace_data) ALWAYS_INLINE; +static inline int trace_object(struct heap *heap, + struct gcobj *obj) ALWAYS_INLINE; + +static inline void +tracer_share(struct local_tracer *trace) { + DEBUG("tracer #%zu: sharing\n", trace->worker->id); + for (size_t i = 0; i < LOCAL_TRACE_QUEUE_SHARE_AMOUNT; i++) + trace_deque_push(trace->share_deque, local_trace_queue_pop(&trace->local)); +} + +static inline void +tracer_visit(void **loc, void *trace_data) { + struct local_tracer *trace = trace_data; + struct gcobj *obj = *loc; + if (obj && trace_object(trace->heap, obj)) { + if (local_trace_queue_full(&trace->local)) + tracer_share(trace); + local_trace_queue_push(&trace->local, obj); + } +} + +static struct gcobj * +tracer_steal_from_worker(struct tracer *tracer, size_t id) { + ASSERT(id < tracer->worker_count); + return trace_deque_steal(&tracer->workers[id].deque); +} + +static int +tracer_can_steal_from_worker(struct tracer *tracer, size_t id) { + ASSERT(id < tracer->worker_count); + return trace_deque_can_steal(&tracer->workers[id].deque); +} + +static struct gcobj * +trace_worker_steal_from_any(struct trace_worker *worker, struct tracer *tracer) { + size_t steal_id = worker->steal_id; + for (size_t i = 0; i < tracer->worker_count; i++) { + steal_id = (steal_id + 1) % tracer->worker_count; + DEBUG("tracer #%zu: stealing from #%zu\n", worker->id, steal_id); + struct gcobj * obj = tracer_steal_from_worker(tracer, steal_id); + if (obj) { + DEBUG("tracer #%zu: stealing got %p\n", worker->id, obj); + worker->steal_id = steal_id; + return obj; + } + } + DEBUG("tracer #%zu: failed to steal\n", worker->id); + return 0; +} + +static int +trace_worker_can_steal_from_any(struct trace_worker *worker, struct tracer *tracer) { + size_t steal_id = worker->steal_id; + DEBUG("tracer #%zu: checking if any worker has tasks\n", worker->id); + for (size_t i = 0; i < tracer->worker_count; i++) { + steal_id = (steal_id + 1) % tracer->worker_count; + int res = tracer_can_steal_from_worker(tracer, steal_id); + if (res) { + DEBUG("tracer #%zu: worker #%zu has tasks!\n", worker->id, steal_id); + worker->steal_id = steal_id; + return 1; + } + } + DEBUG("tracer #%zu: nothing to steal\n", worker->id); + return 0; +} + +static int +trace_worker_check_termination(struct trace_worker *worker, + struct tracer *tracer) { + // We went around all workers and nothing. Enter termination phase. + if (atomic_fetch_sub_explicit(&tracer->active_tracers, 1, + memory_order_relaxed) == 1) { + DEBUG(" ->> tracer #%zu: DONE (no spinning) <<-\n", worker->id); + return 1; + } + + size_t spin_count = 0; + while (1) { + if (trace_worker_can_steal_from_any(worker, tracer)) { + atomic_fetch_add_explicit(&tracer->active_tracers, 1, + memory_order_relaxed); + return 0; + } + if (atomic_load_explicit(&tracer->active_tracers, + memory_order_relaxed) == 0) { + DEBUG(" ->> tracer #%zu: DONE <<-\n", worker->id); + return 1; + } + // spin + DEBUG("tracer #%zu: spinning #%zu\n", worker->id, spin_count); + if (spin_count < 10) + __builtin_ia32_pause(); + else if (spin_count < 20) + sched_yield(); + else if (spin_count < 40) + usleep(0); + else + usleep(1); + spin_count++; + } +} + +static struct gcobj * +trace_worker_steal(struct local_tracer *trace) { + struct tracer *tracer = heap_tracer(trace->heap); + struct trace_worker *worker = trace->worker; + + while (1) { + DEBUG("tracer #%zu: trying to steal\n", worker->id); + struct gcobj *obj = trace_worker_steal_from_any(worker, tracer); + if (obj) + return obj; + + if (trace_worker_check_termination(worker, tracer)) + return NULL; + } +} + +static void +trace_worker_trace(struct trace_worker *worker) { + struct local_tracer trace; + trace.worker = worker; + trace.share_deque = &worker->deque; + trace.heap = worker->heap; + local_trace_queue_init(&trace.local); + + size_t n = 0; + DEBUG("tracer #%zu: running trace loop\n", worker->id); + while (1) { + struct gcobj * obj; + if (!local_trace_queue_empty(&trace.local)) { + obj = local_trace_queue_pop(&trace.local); + } else { + obj = trace_worker_steal(&trace); + if (!obj) + break; + } + trace_one(obj, &trace); + n++; + } + DEBUG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n); + + trace_worker_finished_tracing(worker); +} + +static inline void +tracer_enqueue_root(struct tracer *tracer, struct gcobj *obj) { + struct trace_deque *worker0_deque = &tracer->workers[0].deque; + trace_deque_push(worker0_deque, obj); +} + +static inline void +tracer_enqueue_roots(struct tracer *tracer, struct gcobj **objv, + size_t count) { + struct trace_deque *worker0_deque = &tracer->workers[0].deque; + trace_deque_push_many(worker0_deque, objv, count); +} + +static inline void +tracer_trace(struct heap *heap) { + struct tracer *tracer = heap_tracer(heap); + + pthread_mutex_lock(&tracer->lock); + long trace_count = tracer->count; + pthread_mutex_unlock(&tracer->lock); + + DEBUG("starting trace; %zu workers\n", tracer->worker_count); + DEBUG("waking workers\n"); + atomic_store_explicit(&tracer->active_tracers, tracer->worker_count, + memory_order_release); + atomic_store_explicit(&tracer->running_tracers, tracer->worker_count, + memory_order_release); + for (size_t i = 0; i < tracer->worker_count; i++) + trace_worker_request_trace(&tracer->workers[i]); + + DEBUG("waiting on tracers\n"); + + pthread_mutex_lock(&tracer->lock); + while (tracer->count <= trace_count) + pthread_cond_wait(&tracer->cond, &tracer->lock); + pthread_mutex_unlock(&tracer->lock); + + DEBUG("trace finished\n"); +} + +#endif // PARALLEL_TRACER_H diff --git a/serial-marker.h b/serial-marker.h deleted file mode 100644 index 5f5330b6d..000000000 --- a/serial-marker.h +++ /dev/null @@ -1,168 +0,0 @@ -#ifndef SERIAL_TRACE_H -#define SERIAL_TRACE_H - -#include -#include - -#include "assert.h" -#include "debug.h" - -struct gcobj; - -struct mark_queue { - size_t size; - size_t read; - size_t write; - struct gcobj **buf; -}; - -static const size_t mark_queue_max_size = - (1ULL << (sizeof(struct gcobj *) * 8 - 1)) / sizeof(struct gcobj *); -static const size_t mark_queue_release_byte_threshold = 1 * 1024 * 1024; - -static struct gcobj ** -mark_queue_alloc(size_t size) { - void *mem = mmap(NULL, size * sizeof(struct gcobj *), PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { - perror("Failed to grow mark queue"); - DEBUG("Failed to allocate %zu bytes", size); - return NULL; - } - return mem; -} - -static int -mark_queue_init(struct mark_queue *q) { - q->size = getpagesize() / sizeof(struct gcobj *); - q->read = 0; - q->write = 0; - q->buf = mark_queue_alloc(q->size); - return !!q->buf; -} - -static inline struct gcobj * -mark_queue_get(struct mark_queue *q, size_t idx) { - return q->buf[idx & (q->size - 1)]; -} - -static inline void -mark_queue_put(struct mark_queue *q, size_t idx, struct gcobj *x) { - q->buf[idx & (q->size - 1)] = x; -} - -static int mark_queue_grow(struct mark_queue *q) NEVER_INLINE; - -static int -mark_queue_grow(struct mark_queue *q) { - size_t old_size = q->size; - struct gcobj **old_buf = q->buf; - if (old_size >= mark_queue_max_size) { - DEBUG("mark queue already at max size of %zu bytes", old_size); - return 0; - } - - size_t new_size = old_size * 2; - struct gcobj **new_buf = mark_queue_alloc(new_size); - if (!new_buf) - return 0; - - size_t old_mask = old_size - 1; - size_t new_mask = new_size - 1; - - for (size_t i = q->read; i < q->write; i++) - new_buf[i & new_mask] = old_buf[i & old_mask]; - - munmap(old_buf, old_size * sizeof(struct gcobj *)); - - q->size = new_size; - q->buf = new_buf; - return 1; -} - -static inline void -mark_queue_push(struct mark_queue *q, struct gcobj *p) { - if (UNLIKELY(q->write - q->read == q->size)) { - if (!mark_queue_grow(q)) - abort(); - } - mark_queue_put(q, q->write++, p); -} - -static inline void -mark_queue_push_many(struct mark_queue *q, struct gcobj **pv, size_t count) { - while (q->size - (q->write - q->read) < count) { - if (!mark_queue_grow(q)) - abort(); - } - for (size_t i = 0; i < count; i++) - mark_queue_put(q, q->write++, pv[i]); -} - -static inline struct gcobj* -mark_queue_pop(struct mark_queue *q) { - if (UNLIKELY(q->read == q->write)) - return NULL; - return mark_queue_get(q, q->read++); -} - -static void -mark_queue_release(struct mark_queue *q) { - size_t byte_size = q->size * sizeof(struct gcobj *); - if (byte_size >= mark_queue_release_byte_threshold) - madvise(q->buf, byte_size, MADV_DONTNEED); - q->read = q->write = 0; -} - -static void -mark_queue_destroy(struct mark_queue *q) { - size_t byte_size = q->size * sizeof(struct gcobj *); - munmap(q->buf, byte_size); -} - -struct marker { - struct mark_queue queue; -}; - -struct mark_space; -static inline struct marker* mark_space_marker(struct mark_space *space); - -static int -marker_init(struct mark_space *space) { - return mark_queue_init(&mark_space_marker(space)->queue); -} -static void marker_prepare(struct mark_space *space) {} -static void marker_release(struct mark_space *space) { - mark_queue_release(&mark_space_marker(space)->queue); -} - -struct gcobj; -static inline void marker_visit(void **loc, void *mark_data) ALWAYS_INLINE; -static inline void trace_one(struct gcobj *obj, void *mark_data) ALWAYS_INLINE; -static inline int mark_object(struct mark_space *space, - struct gcobj *obj) ALWAYS_INLINE; - -static inline void -marker_enqueue_root(struct marker *marker, struct gcobj *obj) { - mark_queue_push(&marker->queue, obj); -} -static inline void -marker_enqueue_roots(struct marker *marker, struct gcobj **objs, - size_t count) { - mark_queue_push_many(&marker->queue, objs, count); -} -static inline void -marker_visit(void **loc, void *mark_data) { - struct mark_space *space = mark_data; - struct gcobj *obj = *loc; - if (obj && mark_object(space, obj)) - marker_enqueue_root(mark_space_marker(space), obj); -} -static inline void -marker_trace(struct mark_space *space) { - struct gcobj *obj; - while ((obj = mark_queue_pop(&mark_space_marker(space)->queue))) - trace_one(obj, space); -} - -#endif // SERIAL_MARK_H diff --git a/serial-tracer.h b/serial-tracer.h new file mode 100644 index 000000000..7bea9e63e --- /dev/null +++ b/serial-tracer.h @@ -0,0 +1,168 @@ +#ifndef SERIAL_TRACER_H +#define SERIAL_TRACER_H + +#include +#include + +#include "assert.h" +#include "debug.h" + +struct gcobj; + +struct trace_queue { + size_t size; + size_t read; + size_t write; + struct gcobj **buf; +}; + +static const size_t trace_queue_max_size = + (1ULL << (sizeof(struct gcobj *) * 8 - 1)) / sizeof(struct gcobj *); +static const size_t trace_queue_release_byte_threshold = 1 * 1024 * 1024; + +static struct gcobj ** +trace_queue_alloc(size_t size) { + void *mem = mmap(NULL, size * sizeof(struct gcobj *), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + perror("Failed to grow trace queue"); + DEBUG("Failed to allocate %zu bytes", size); + return NULL; + } + return mem; +} + +static int +trace_queue_init(struct trace_queue *q) { + q->size = getpagesize() / sizeof(struct gcobj *); + q->read = 0; + q->write = 0; + q->buf = trace_queue_alloc(q->size); + return !!q->buf; +} + +static inline struct gcobj * +trace_queue_get(struct trace_queue *q, size_t idx) { + return q->buf[idx & (q->size - 1)]; +} + +static inline void +trace_queue_put(struct trace_queue *q, size_t idx, struct gcobj *x) { + q->buf[idx & (q->size - 1)] = x; +} + +static int trace_queue_grow(struct trace_queue *q) NEVER_INLINE; + +static int +trace_queue_grow(struct trace_queue *q) { + size_t old_size = q->size; + struct gcobj **old_buf = q->buf; + if (old_size >= trace_queue_max_size) { + DEBUG("trace queue already at max size of %zu bytes", old_size); + return 0; + } + + size_t new_size = old_size * 2; + struct gcobj **new_buf = trace_queue_alloc(new_size); + if (!new_buf) + return 0; + + size_t old_mask = old_size - 1; + size_t new_mask = new_size - 1; + + for (size_t i = q->read; i < q->write; i++) + new_buf[i & new_mask] = old_buf[i & old_mask]; + + munmap(old_buf, old_size * sizeof(struct gcobj *)); + + q->size = new_size; + q->buf = new_buf; + return 1; +} + +static inline void +trace_queue_push(struct trace_queue *q, struct gcobj *p) { + if (UNLIKELY(q->write - q->read == q->size)) { + if (!trace_queue_grow(q)) + abort(); + } + trace_queue_put(q, q->write++, p); +} + +static inline void +trace_queue_push_many(struct trace_queue *q, struct gcobj **pv, size_t count) { + while (q->size - (q->write - q->read) < count) { + if (!trace_queue_grow(q)) + abort(); + } + for (size_t i = 0; i < count; i++) + trace_queue_put(q, q->write++, pv[i]); +} + +static inline struct gcobj* +trace_queue_pop(struct trace_queue *q) { + if (UNLIKELY(q->read == q->write)) + return NULL; + return trace_queue_get(q, q->read++); +} + +static void +trace_queue_release(struct trace_queue *q) { + size_t byte_size = q->size * sizeof(struct gcobj *); + if (byte_size >= trace_queue_release_byte_threshold) + madvise(q->buf, byte_size, MADV_DONTNEED); + q->read = q->write = 0; +} + +static void +trace_queue_destroy(struct trace_queue *q) { + size_t byte_size = q->size * sizeof(struct gcobj *); + munmap(q->buf, byte_size); +} + +struct tracer { + struct trace_queue queue; +}; + +struct heap; +static inline struct tracer* heap_tracer(struct heap *heap); + +static int +tracer_init(struct heap *heap) { + return trace_queue_init(&heap_tracer(heap)->queue); +} +static void tracer_prepare(struct heap *heap) {} +static void tracer_release(struct heap *heap) { + trace_queue_release(&heap_tracer(heap)->queue); +} + +struct gcobj; +static inline void tracer_visit(void **loc, void *trace_data) ALWAYS_INLINE; +static inline void trace_one(struct gcobj *obj, void *trace_data) ALWAYS_INLINE; +static inline int trace_object(struct heap *heap, + struct gcobj *obj) ALWAYS_INLINE; + +static inline void +tracer_enqueue_root(struct tracer *tracer, struct gcobj *obj) { + trace_queue_push(&tracer->queue, obj); +} +static inline void +tracer_enqueue_roots(struct tracer *tracer, struct gcobj **objs, + size_t count) { + trace_queue_push_many(&tracer->queue, objs, count); +} +static inline void +tracer_visit(void **loc, void *trace_data) { + struct heap *heap = trace_data; + struct gcobj *obj = *loc; + if (obj && trace_object(heap, obj)) + tracer_enqueue_root(heap_tracer(heap), obj); +} +static inline void +tracer_trace(struct heap *heap) { + struct gcobj *obj; + while ((obj = trace_queue_pop(&heap_tracer(heap)->queue))) + trace_one(obj, heap); +} + +#endif // SERIAL_TRACER_H