diff --git a/Makefile b/Makefile index db5f1a7c2..56c7325c4 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ TESTS = quads mt-gcbench ephemerons finalizers COLLECTORS = \ bdw \ semi \ + scc \ pcc \ \ whippet \ @@ -63,6 +64,9 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc` GC_STEM_semi = semi GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +GC_STEM_scc = scc +GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1 + GC_STEM_pcc = pcc GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 diff --git a/doc/collector-pcc.md b/doc/collector-pcc.md index a20e58e64..c79fb2aea 100644 --- a/doc/collector-pcc.md +++ b/doc/collector-pcc.md @@ -1,49 +1,23 @@ # Parallel copying collector -Whippet's `pcc` collector is a copying collector, like the more simple -[`semi`](./collector-semi.md), but supporting multiple mutator threads -and multiple tracing threads. +Whippet's `pcc` collector is a copying collector, exactly like +[`scc`](./collector-scc.md), but supporting multiple tracing threads. +See the discussion of `scc` for a general overview. -Like `semi`, `pcc` traces by evacuation: it moves all live objects on -every collection. (Exception: objects larger than 8192 bytes are -placed into a partitioned space which traces by marking in place instead -of copying.) Evacuation requires precise roots, so if your embedder -does not support precise roots, `pcc` is not for you. - -Again like `semi`, `pcc` generally requires a heap size at least twice -as large as the maximum live heap size, and performs best with ample -heap sizes; between 3× and 5× is best. +Also like `scc` and `semi`, `pcc` is not generational yet. If and when +`pcc` grows a young generation, it would be a great collector. ## Implementation notes -Unlike `semi` which has a single global bump-pointer allocation region, -`pcc` structures the heap into 64-kB blocks. In this way it supports -multiple mutator threads: mutators do local bump-pointer allocation into -their own block, and when their block is full, they fetch another from -the global store. - -The block size is 64 kB, but really it's 128 kB, because each block has -two halves: the active region and the copy reserve. Dividing each block -in two allows the collector to easily grow and shrink the heap while -ensuring there is always enough reserve space. - -Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in -a slab. The first block in a slab is used by the collector itself, to -keep metadata for the rest of the blocks, for example a chain pointer -allowing blocks to be collected in lists, a saved allocation pointer for -partially-filled blocks, whether the block is paged in or out, and so -on. - `pcc` supports tracing in parallel. This mechanism works somewhat like allocation, in which multiple trace workers compete to evacuate objects into their local allocation buffers; when an allocation buffer is full, the trace worker grabs another, just like mutators do. -However, unlike the simple semi-space collector which uses a Cheney grey -worklist, `pcc` uses the [fine-grained work-stealing parallel -tracer](../src/parallel-tracer.h) originally developed for [Whippet's -Immix-like collector](./collector-whippet.md). Each trace worker -maintains a [local queue of objects that need +To maintain a queue of objects to trace, `pcc` uses the [fine-grained +work-stealing parallel tracer](../src/parallel-tracer.h) originally +developed for [Whippet's Immix-like collector](./collector-whippet.md). +Each trace worker maintains a [local queue of objects that need tracing](../src/local-worklist.h), which currently has 1024 entries. If the local queue becomes full, the worker will publish 3/4 of those entries to the worker's [shared worklist](../src/shared-worklist.h). @@ -53,12 +27,11 @@ from its own shared worklist, then will try to steal from other workers. Because threads compete to evacuate objects, `pcc` uses [atomic compare-and-swap instead of simple forwarding pointer updates](./manual.md#forwarding-objects), which imposes around a ~30% -performance penalty. `pcc` generally starts to outperform `semi` when -it can trace with 2 threads, and gets better with each additional -thread. +performance penalty. `pcc` generally starts to outperform `scc` when it +can trace with 2 threads, and gets better with each additional thread. -The memory used for the external worklist is dynamically allocated from -the OS and is not currently counted as contributing to the heap size. -If you are targetting a microcontroller or something, probably you need -to choose a different kind of collector that never dynamically -allocates, such as `semi`. +As with `scc`, the memory used for the external worklist is dynamically +allocated from the OS and is not currently counted as contributing to +the heap size. If you are targetting a microcontroller or something, +probably you need to choose a different kind of collector that never +dynamically allocates, such as `semi`. diff --git a/doc/collector-scc.md b/doc/collector-scc.md new file mode 100644 index 000000000..2512bb9fd --- /dev/null +++ b/doc/collector-scc.md @@ -0,0 +1,62 @@ +# Serial copying collector + +Whippet's `scc` collector is a copying collector, like the more simple +[`semi`](./collector-semi.md), but supporting multiple mutator threads, +and using an external FIFO worklist instead of a Cheney worklist. + +Like `semi`, `scc` traces by evacuation: it moves all live objects on +every collection. (Exception: objects larger than 8192 bytes are +placed into a partitioned space which traces by marking in place instead +of copying.) Evacuation requires precise roots, so if your embedder +does not support precise roots, `scc` is not for you. + +Again like `semi`, `scc` generally requires a heap size at least twice +as large as the maximum live heap size, and performs best with ample +heap sizes; between 3× and 5× is best. + +Overall, `scc` is most useful for isolating the performance implications +of using a block-structured heap and of using an external worklist +rather than a Cheney worklist as `semi` does. It also supports multiple +mutator threads, so it is generally more useful than `semi`. Also, +compared to `pcc`, we can measure the overhead that `pcc` imposes to +atomically forward objects. + +But given a choice, you probably want `pcc`; though it's slower with +only one tracing thread, once you have more than once tracing thread +it's a win over `scc`. + +## Implementation notes + +Unlike `semi` which has a single global bump-pointer allocation region, +`scc` structures the heap into 64-kB blocks. In this way it supports +multiple mutator threads: mutators do local bump-pointer allocation into +their own block, and when their block is full, they fetch another from +the global store. + +The block size is 64 kB, but really it's 128 kB, because each block has +two halves: the active region and the copy reserve. Dividing each block +in two allows the collector to easily grow and shrink the heap while +ensuring there is always enough reserve space. + +Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in +a slab. The first block in a slab is used by the collector itself, to +keep metadata for the rest of the blocks, for example a chain pointer +allowing blocks to be collected in lists, a saved allocation pointer for +partially-filled blocks, whether the block is paged in or out, and so +on. + +Unlike the simple semi-space collector which uses a Cheney grey +worklist, `scc` uses a [simple first-in, first-out queue of objects to +be traced](../src/simple-worklist.h) originally developed for [Whippet's +Immix-like collector](./collector-whippet.md). Like a Cheney worklist, +this should result in objects being copied in breadth-first order. The +literature would suggest that depth-first is generally better for +locality, but that preserving allocation order is generally best. This +is something to experiment with in the future. + +The memory used for the external worklist is dynamically allocated from +the OS and is not currently counted as contributing to the heap size. +If you are targetting a microcontroller or something, probably you need +to choose a different kind of collector that never dynamically +allocates, such as `semi`. + diff --git a/doc/collectors.md b/doc/collectors.md index 81af46e59..c6fced97f 100644 --- a/doc/collectors.md +++ b/doc/collectors.md @@ -1,15 +1,17 @@ # Whippet collectors -Whippet has four collectors currently: - - [Semi-space collector (semi)](./collector-semi.md): For +Whippet has five collectors currently: + - [Semi-space collector (`semi`)](./collector-semi.md): For single-threaded embedders who are not too tight on memory. - - [Parallel copying collector (pcc)](./collector-pcc.md): Like semi, - but with support for multiple mutator threads. Faster than semi if + - [Serial copying collector (`scc`)](./collector-scc.md): Like `semi`, + but with support for multiple mutator threads. + - [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `scc`, + but with support for multiple tracing threads. Faster than `scc` if multiple cores are available at collection-time. - - [Whippet collector (whippet)](./collector-whippet.md): + - [Whippet collector (`whippet`)](./collector-whippet.md): Immix-inspired collector. Optionally parallel, conservative (stack and/or heap), and/or generational. - - [Boehm-Demers-Weiser collector (bdw)](./collector-bdw.md): + - [Boehm-Demers-Weiser collector (`bdw`)](./collector-bdw.md): Conservative mark-sweep collector, implemented by Boehm-Demers-Weiser library. @@ -18,11 +20,13 @@ Whippet has four collectors currently: If you are migrating an embedder off BDW-GC, then it could be reasonable to first go to `bdw`, then `stack-conservative-parallel-whippet`. -If you have an embedder with precise roots, use `semi` if -single-threaded, or `pcc` if multi-threaded. That will shake out -mutator/embedder bugs. Then if memory is tight, switch to +If you have an embedder with precise roots, use `pcc`. That will shake +out mutator/embedder bugs. Then if memory is tight, switch to `parallel-whippet`, possibly `parallel-generational-whippet`. +If you are aiming for maximum simplicity and minimal code size (ten +kilobytes or so), use `semi`. + If you are writing a new project, you have a choice as to whether to pay the development cost of precise roots or not. If you choose to not have precise roots, then go for `stack-conservative-parallel-whippet` diff --git a/embed.mk b/embed.mk index 9284781e0..020cb10d3 100644 --- a/embed.mk +++ b/embed.mk @@ -42,6 +42,9 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc` GC_STEM_semi = semi GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +GC_STEM_scc = scc +GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1 + GC_STEM_pcc = pcc GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 diff --git a/src/copy-space.h b/src/copy-space.h index fd6922b0f..2d6b2f246 100644 --- a/src/copy-space.h +++ b/src/copy-space.h @@ -484,6 +484,41 @@ copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge, } } +static inline int +copy_space_forward_nonatomic(struct copy_space *space, struct gc_edge edge, + struct gc_ref old_ref, struct copy_space_allocator *alloc) { + GC_ASSERT(copy_space_object_region(old_ref) != space->active_region); + + uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref); + if (forwarded) { + gc_edge_update(edge, gc_ref(forwarded)); + return 0; + } else { + size_t size; + gc_trace_object(old_ref, NULL, NULL, NULL, &size); + struct gc_ref new_ref = + copy_space_allocate(alloc, space, size, + copy_space_gc_during_evacuation, NULL); + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), size); + gc_object_forward_nonatomic(old_ref, new_ref); + gc_edge_update(edge, new_ref); + return 1; + } +} + +static int +copy_space_forward_if_traced_nonatomic(struct copy_space *space, + struct gc_edge edge, + struct gc_ref old_ref) { + GC_ASSERT(copy_space_object_region(old_ref) != space->active_region); + uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref); + if (forwarded) { + gc_edge_update(edge, gc_ref(forwarded)); + return 1; + } + return 0; +} + static inline int copy_space_contains(struct copy_space *space, struct gc_ref ref) { for (size_t i = 0; i < space->nextents; i++) diff --git a/src/scc.c b/src/scc.c new file mode 100644 index 000000000..28dcef0d2 --- /dev/null +++ b/src/scc.c @@ -0,0 +1,669 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "copy-space.h" +#include "debug.h" +#include "gc-align.h" +#include "gc-inline.h" +#include "gc-trace.h" +#include "large-object-space.h" +#include "serial-tracer.h" +#include "spin.h" +#include "scc-attrs.h" + +struct gc_heap { + struct copy_space copy_space; + struct large_object_space large_object_space; + struct gc_extern_space *extern_space; + size_t large_object_pages; + pthread_mutex_t lock; + pthread_cond_t collector_cond; + pthread_cond_t mutator_cond; + size_t size; + int collecting; + int check_pending_ephemerons; + struct gc_pending_ephemerons *pending_ephemerons; + struct gc_finalizer_state *finalizer_state; + size_t mutator_count; + size_t paused_mutator_count; + size_t inactive_mutator_count; + struct gc_heap_roots *roots; + struct gc_mutator *mutators; + long count; + struct gc_tracer tracer; + double pending_ephemerons_size_factor; + double pending_ephemerons_size_slop; + struct gc_event_listener event_listener; + void *event_listener_data; +}; + +#define HEAP_EVENT(heap, event, ...) \ + (heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__) +#define MUTATOR_EVENT(mut, event, ...) \ + (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__) + +struct gc_mutator { + struct copy_space_allocator allocator; + struct gc_heap *heap; + struct gc_mutator_roots *roots; + void *event_listener_data; + struct gc_mutator *next; + struct gc_mutator *prev; +}; + +struct gc_trace_worker_data { + struct copy_space_allocator allocator; +}; + +static inline struct copy_space* heap_copy_space(struct gc_heap *heap) { + return &heap->copy_space; +} +static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) { + return &heap->large_object_space; +} +static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) { + return heap->extern_space; +} +static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) { + return mutator->heap; +} + +static void +gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data), + struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker) { + struct gc_trace_worker_data data; + copy_space_allocator_init(&data.allocator, heap_copy_space(heap)); + f(tracer, heap, worker, &data); + copy_space_allocator_finish(&data.allocator, heap_copy_space(heap)); +} + +static inline int do_trace(struct gc_heap *heap, struct gc_edge edge, + struct gc_ref ref, + struct gc_trace_worker_data *data) { + if (!gc_ref_is_heap_object(ref)) + return 0; + if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref))) + return copy_space_forward_nonatomic(heap_copy_space(heap), edge, ref, + &data->allocator); + else if (large_object_space_contains(heap_large_object_space(heap), ref)) + return large_object_space_mark_object(heap_large_object_space(heap), ref); + else + return gc_extern_space_visit(heap_extern_space(heap), edge, ref); +} + +static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge, + struct gc_trace_worker *worker) { + struct gc_ref ref = gc_edge_ref(edge); + struct gc_trace_worker_data *data = gc_trace_worker_data(worker); + int is_new = do_trace(heap, edge, ref, data); + + if (is_new && heap->check_pending_ephemerons) + gc_resolve_pending_ephemerons(ref, heap); + + return is_new; +} + +int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { + struct gc_ref ref = gc_edge_ref(edge); + if (!gc_ref_is_heap_object(ref)) + return 0; + if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref))) + return copy_space_forward_if_traced_nonatomic(heap_copy_space(heap), edge, + ref); + if (large_object_space_contains(heap_large_object_space(heap), ref)) + return large_object_space_is_copied(heap_large_object_space(heap), ref); + GC_CRASH(); +} + +static int mutators_are_stopping(struct gc_heap *heap) { + return atomic_load_explicit(&heap->collecting, memory_order_relaxed); +} + +static inline void heap_lock(struct gc_heap *heap) { + pthread_mutex_lock(&heap->lock); +} +static inline void heap_unlock(struct gc_heap *heap) { + pthread_mutex_unlock(&heap->lock); +} + +// with heap lock +static inline int all_mutators_stopped(struct gc_heap *heap) { + return heap->mutator_count == + heap->paused_mutator_count + heap->inactive_mutator_count; +} + +static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + mut->heap = heap; + mut->event_listener_data = + heap->event_listener.mutator_added(heap->event_listener_data); + copy_space_allocator_init(&mut->allocator, heap_copy_space(heap)); + heap_lock(heap); + // We have no roots. If there is a GC currently in progress, we have + // nothing to add. Just wait until it's done. + while (mutators_are_stopping(heap)) + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + mut->next = mut->prev = NULL; + struct gc_mutator *tail = heap->mutators; + if (tail) { + mut->next = tail; + tail->prev = mut; + } + heap->mutators = mut; + heap->mutator_count++; + heap_unlock(heap); +} + +static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + MUTATOR_EVENT(mut, mutator_removed); + mut->heap = NULL; + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); + heap_lock(heap); + heap->mutator_count--; + if (mut->next) + mut->next->prev = mut->prev; + if (mut->prev) + mut->prev->next = mut->next; + else + heap->mutators = mut->next; + // We have no roots. If there is a GC stop currently in progress, + // maybe tell the controller it can continue. + if (mutators_are_stopping(heap) && all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + heap_unlock(heap); +} + +static void request_mutators_to_stop(struct gc_heap *heap) { + GC_ASSERT(!mutators_are_stopping(heap)); + atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed); +} + +static void allow_mutators_to_continue(struct gc_heap *heap) { + GC_ASSERT(mutators_are_stopping(heap)); + GC_ASSERT(all_mutators_stopped(heap)); + heap->paused_mutator_count--; + atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed); + GC_ASSERT(!mutators_are_stopping(heap)); + pthread_cond_broadcast(&heap->mutator_cond); +} + +static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) { + size_t previous = heap->large_object_pages; + heap->large_object_pages = npages; + GC_ASSERT(npages <= previous); + size_t bytes = (previous - npages) << + heap_large_object_space(heap)->page_size_log2; + copy_space_reacquire_memory(heap_copy_space(heap), bytes); +} + +void gc_mutator_set_roots(struct gc_mutator *mut, + struct gc_mutator_roots *roots) { + mut->roots = roots; +} +void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) { + heap->roots = roots; +} +void gc_heap_set_extern_space(struct gc_heap *heap, + struct gc_extern_space *space) { + heap->extern_space = space; +} + +static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap, + void *trace_data) GC_ALWAYS_INLINE; +static inline void +tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { + struct gc_trace_worker *worker = trace_data; + if (trace_edge(heap, edge, worker)) + gc_trace_worker_enqueue(worker, gc_edge_ref(edge)); +} + +static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, + struct gc_trace_worker *worker) { +#ifdef DEBUG + if (copy_space_contains(heap_copy_space(heap), ref)) + GC_ASSERT(copy_space_object_region(ref) == heap_copy_space(heap)->active_region); +#endif + gc_trace_object(ref, tracer_visit, heap, worker, NULL); +} + +static inline void trace_root(struct gc_root root, struct gc_heap *heap, + struct gc_trace_worker *worker) { + switch (root.kind) { + case GC_ROOT_KIND_HEAP: + gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker); + break; + case GC_ROOT_KIND_MUTATOR: + gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker); + break; + case GC_ROOT_KIND_RESOLVED_EPHEMERONS: + gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit, + heap, worker); + break; + case GC_ROOT_KIND_EDGE: + tracer_visit(root.edge, heap, worker); + break; + default: + GC_CRASH(); + } +} + +static void wait_for_mutators_to_stop(struct gc_heap *heap) { + heap->paused_mutator_count++; + while (!all_mutators_stopped(heap)) + pthread_cond_wait(&heap->collector_cond, &heap->lock); +} + +void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size, + struct gc_edge edge, struct gc_ref new_val) { +} + +static void +pause_mutator_for_collection(struct gc_heap *heap, + struct gc_mutator *mut) GC_NEVER_INLINE; +static void +pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) { + GC_ASSERT(mutators_are_stopping(heap)); + GC_ASSERT(!all_mutators_stopped(heap)); + MUTATOR_EVENT(mut, mutator_stopped); + heap->paused_mutator_count++; + if (all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + + do { + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + } while (mutators_are_stopping(heap)); + heap->paused_mutator_count--; + + MUTATOR_EVENT(mut, mutator_restarted); +} + +static void +pause_mutator_for_collection_with_lock(struct gc_mutator *mut) GC_NEVER_INLINE; +static void +pause_mutator_for_collection_with_lock(struct gc_mutator *mut) { + struct gc_heap *heap = mutator_heap(mut); + GC_ASSERT(mutators_are_stopping(heap)); + MUTATOR_EVENT(mut, mutator_stopping); + pause_mutator_for_collection(heap, mut); +} + +static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) GC_NEVER_INLINE; +static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) { + struct gc_heap *heap = mutator_heap(mut); + GC_ASSERT(mutators_are_stopping(heap)); + MUTATOR_EVENT(mut, mutator_stopping); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); + heap_lock(heap); + pause_mutator_for_collection(heap, mut); + heap_unlock(heap); +} + +static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) { + while (mutators_are_stopping(mutator_heap(mut))) + pause_mutator_for_collection_without_lock(mut); +} + +static int maybe_grow_heap(struct gc_heap *heap) { + return 0; +} + +static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap, + void *unused) { + gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); +} + +static void add_roots(struct gc_heap *heap) { + for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next) + gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut)); + gc_tracer_add_root(&heap->tracer, gc_root_heap(heap)); + gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL); +} + +static void resolve_ephemerons_lazily(struct gc_heap *heap) { + heap->check_pending_ephemerons = 0; +} + +static void resolve_ephemerons_eagerly(struct gc_heap *heap) { + heap->check_pending_ephemerons = 1; + gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); +} + +static void trace_resolved_ephemerons(struct gc_heap *heap) { + for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); + resolved; + resolved = gc_pop_resolved_ephemerons(heap)) { + gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved)); + gc_tracer_trace(&heap->tracer); + } +} + +static void resolve_finalizers(struct gc_heap *heap) { + for (size_t priority = 0; + priority < gc_finalizer_priority_count(); + priority++) { + if (gc_resolve_finalizers(heap->finalizer_state, priority, + visit_root_edge, heap, NULL)) { + gc_tracer_trace(&heap->tracer); + trace_resolved_ephemerons(heap); + } + } + gc_notify_finalizers(heap->finalizer_state, heap); +} + +static void sweep_ephemerons(struct gc_heap *heap) { + return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); +} + +static void collect(struct gc_mutator *mut) GC_NEVER_INLINE; +static void collect(struct gc_mutator *mut) { + struct gc_heap *heap = mutator_heap(mut); + struct copy_space *copy_space = heap_copy_space(heap); + struct large_object_space *lospace = heap_large_object_space(heap); + struct gc_extern_space *exspace = heap_extern_space(heap); + MUTATOR_EVENT(mut, mutator_cause_gc); + DEBUG("start collect #%ld:\n", heap->count); + HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING); + large_object_space_start_gc(lospace, 0); + gc_extern_space_start_gc(exspace, 0); + resolve_ephemerons_lazily(heap); + HEAP_EVENT(heap, requesting_stop); + request_mutators_to_stop(heap); + HEAP_EVENT(heap, waiting_for_stop); + wait_for_mutators_to_stop(heap); + HEAP_EVENT(heap, mutators_stopped); + copy_space_flip(copy_space); + gc_tracer_prepare(&heap->tracer); + add_roots(heap); + HEAP_EVENT(heap, roots_traced); + gc_tracer_trace(&heap->tracer); + HEAP_EVENT(heap, heap_traced); + resolve_ephemerons_eagerly(heap); + trace_resolved_ephemerons(heap); + HEAP_EVENT(heap, ephemerons_traced); + resolve_finalizers(heap); + HEAP_EVENT(heap, finalizers_traced); + sweep_ephemerons(heap); + gc_tracer_release(&heap->tracer); + copy_space_finish_gc(copy_space); + large_object_space_finish_gc(lospace, 0); + gc_extern_space_finish_gc(exspace, 0); + heap->count++; + heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); + size_t live_size = (copy_space->allocated_bytes_at_last_gc + + large_object_space_size_at_last_collection(lospace)); + HEAP_EVENT(heap, live_data_size, live_size); + maybe_grow_heap(heap); + if (!copy_space_page_out_blocks_until_memory_released(copy_space)) { + fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n", + heap->size, copy_space->nslabs); + GC_CRASH(); + } + HEAP_EVENT(heap, restarting_mutators); + allow_mutators_to_continue(heap); +} + +static void trigger_collection(struct gc_mutator *mut) { + struct gc_heap *heap = mutator_heap(mut); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); + heap_lock(heap); + long epoch = heap->count; + while (mutators_are_stopping(heap)) + pause_mutator_for_collection_with_lock(mut); + if (epoch == heap->count) + collect(mut); + heap_unlock(heap); +} + +void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) { + trigger_collection(mut); +} + +static void* allocate_large(struct gc_mutator *mut, size_t size) { + struct gc_heap *heap = mutator_heap(mut); + struct large_object_space *space = heap_large_object_space(heap); + + size_t npages = large_object_space_npages(space, size); + + copy_space_request_release_memory(heap_copy_space(heap), + npages << space->page_size_log2); + while (!copy_space_page_out_blocks_until_memory_released(heap_copy_space(heap))) + trigger_collection(mut); + atomic_fetch_add(&heap->large_object_pages, npages); + + void *ret = large_object_space_alloc(space, npages); + if (!ret) + ret = large_object_space_obtain_and_alloc(space, npages); + + if (!ret) { + perror("weird: we have the space but mmap didn't work"); + GC_CRASH(); + } + + return ret; +} + +static void get_more_empty_blocks_for_mutator(void *mut) { + trigger_collection(mut); +} + +void* gc_allocate_slow(struct gc_mutator *mut, size_t size) { + GC_ASSERT(size > 0); // allocating 0 bytes would be silly + + if (size > gc_allocator_large_threshold()) + return allocate_large(mut, size); + + struct gc_ref ret = copy_space_allocate(&mut->allocator, + heap_copy_space(mutator_heap(mut)), + size, + get_more_empty_blocks_for_mutator, + mut); + gc_clear_fresh_allocation(ret, size); + return gc_ref_heap_object(ret); +} + +void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) { + return gc_allocate(mut, size); +} + +struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) { + return gc_allocate(mut, gc_ephemeron_size()); +} + +void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value) { + gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value); +} + +struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) { + return heap->pending_ephemerons; +} + +unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) { + return heap->count; +} + +struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) { + return gc_allocate(mut, gc_finalizer_size()); +} + +void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer, + unsigned priority, struct gc_ref object, + struct gc_ref closure) { + gc_finalizer_init_internal(finalizer, object, closure); + gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state, + finalizer, priority); + // No write barrier. +} + +struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) { + return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state); +} + +void gc_set_finalizer_callback(struct gc_heap *heap, + gc_finalizer_callback callback) { + gc_finalizer_state_set_callback(heap->finalizer_state, callback); +} + +static int heap_prepare_pending_ephemerons(struct gc_heap *heap) { + struct gc_pending_ephemerons *cur = heap->pending_ephemerons; + size_t target = heap->size * heap->pending_ephemerons_size_factor; + double slop = heap->pending_ephemerons_size_slop; + + heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop); + + return !!heap->pending_ephemerons; +} + +struct gc_options { + struct gc_common_options common; +}; +int gc_option_from_string(const char *str) { + return gc_common_option_from_string(str); +} +struct gc_options* gc_allocate_options(void) { + struct gc_options *ret = malloc(sizeof(struct gc_options)); + gc_init_common_options(&ret->common); + return ret; +} +int gc_options_set_int(struct gc_options *options, int option, int value) { + return gc_common_options_set_int(&options->common, option, value); +} +int gc_options_set_size(struct gc_options *options, int option, + size_t value) { + return gc_common_options_set_size(&options->common, option, value); +} +int gc_options_set_double(struct gc_options *options, int option, + double value) { + return gc_common_options_set_double(&options->common, option, value); +} +int gc_options_parse_and_set(struct gc_options *options, int option, + const char *value) { + return gc_common_options_parse_and_set(&options->common, option, value); +} + +static int heap_init(struct gc_heap *heap, const struct gc_options *options) { + // *heap is already initialized to 0. + + pthread_mutex_init(&heap->lock, NULL); + pthread_cond_init(&heap->mutator_cond, NULL); + pthread_cond_init(&heap->collector_cond, NULL); + heap->size = options->common.heap_size; + + if (options->common.parallelism != 1) + fprintf(stderr, "warning: parallelism unimplemented in semispace copying collector\n"); + + if (!gc_tracer_init(&heap->tracer, heap, 1)) + GC_CRASH(); + + heap->pending_ephemerons_size_factor = 0.005; + heap->pending_ephemerons_size_slop = 0.5; + + if (!heap_prepare_pending_ephemerons(heap)) + GC_CRASH(); + + heap->finalizer_state = gc_make_finalizer_state(); + if (!heap->finalizer_state) + GC_CRASH(); + + return 1; +} + +int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, + struct gc_heap **heap, struct gc_mutator **mut, + struct gc_event_listener event_listener, + void *event_listener_data) { + GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT); + GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD); + GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator)); + GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), + offsetof(struct copy_space_allocator, hp)); + GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), + offsetof(struct copy_space_allocator, limit)); + + if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { + fprintf(stderr, "fixed heap size is currently required\n"); + return 0; + } + + *heap = calloc(1, sizeof(struct gc_heap)); + if (!*heap) GC_CRASH(); + + if (!heap_init(*heap, options)) + GC_CRASH(); + + (*heap)->event_listener = event_listener; + (*heap)->event_listener_data = event_listener_data; + HEAP_EVENT(*heap, init, (*heap)->size); + + struct copy_space *space = heap_copy_space(*heap); + if (!copy_space_init(space, (*heap)->size)) { + free(*heap); + *heap = NULL; + return 0; + } + + if (!large_object_space_init(heap_large_object_space(*heap), *heap)) + GC_CRASH(); + + *mut = calloc(1, sizeof(struct gc_mutator)); + if (!*mut) GC_CRASH(); + add_mutator(*heap, *mut); + return 1; +} + +struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base, + struct gc_heap *heap) { + struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator)); + if (!ret) + GC_CRASH(); + add_mutator(heap, ret); + return ret; +} + +void gc_finish_for_thread(struct gc_mutator *mut) { + remove_mutator(mutator_heap(mut), mut); + free(mut); +} + +static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + GC_ASSERT(mut->next == NULL); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); + heap_lock(heap); + heap->inactive_mutator_count++; + if (all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + heap_unlock(heap); +} + +static void reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + heap_lock(heap); + while (mutators_are_stopping(heap)) + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + heap->inactive_mutator_count--; + heap_unlock(heap); +} + +void* gc_call_without_gc(struct gc_mutator *mut, + void* (*f)(void*), + void *data) { + struct gc_heap *heap = mutator_heap(mut); + deactivate_mutator(heap, mut); + void *ret = f(data); + reactivate_mutator(heap, mut); + return ret; +}