diff --git a/Makefile b/Makefile index 2a1fded30..30d84ff71 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,6 @@ TESTS = quads mt-gcbench ephemerons finalizers COLLECTORS = \ bdw \ semi \ - scc \ pcc \ \ mmc \ @@ -64,9 +63,6 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc` GC_STEM_semi = semi GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 -GC_STEM_scc = scc -GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1 - GC_STEM_pcc = pcc GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 diff --git a/README.md b/README.md index 601646cc5..52e98e77b 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,10 @@ See the [documentation](./doc/README.md). - Finalization (supporting resuscitation) - Ephemerons (except on `bdw`, which has a polyfill) - Conservative roots (optionally with `mmc` or always with `bdw`) - - Precise roots (optionally with `mmc` or always with `semi` / `pcc` / - `scc`) + - Precise roots (optionally with `mmc` or always with `semi` / `pcc`) - Precise embedder-parameterized heap tracing (except with `bdw`) - Conservative heap tracing (optionally with `mmc`, always with `bdw`) - - Parallel tracing (except `semi` and `scc`) + - Parallel tracing (except `semi`) - Parallel mutators (except `semi`) - Inline allocation / write barrier fast paths (supporting JIT) - One unified API with no-overhead abstraction: switch collectors when @@ -36,8 +35,8 @@ See the [documentation](./doc/README.md). * [src/](./src/): The actual GC implementation, containing a number of collector implementations. The embedder chooses which collector to use at compile-time. See the [documentation](./doc/collectors.md) - for more on the different collectors (`semi`, `bdw`, `scc`, `pcc`, - and the different flavors of `mmc`). + for more on the different collectors (`semi`, `bdw`, `pcc`, and the + different flavors of `mmc`). * [benchmarks/](./benchmarks/): Benchmarks. A work in progress. * [test/](./test/): A dusty attic of minimal testing. diff --git a/api/scc-attrs.h b/api/scc-attrs.h deleted file mode 100644 index 4db408cad..000000000 --- a/api/scc-attrs.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef SCC_ATTRS_H -#define SCC_ATTRS_H - -#include "gc-config.h" -#include "gc-assert.h" -#include "gc-attrs.h" - -static const uintptr_t GC_ALIGNMENT = 8; -static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192; - -static inline enum gc_allocator_kind gc_allocator_kind(void) { - return GC_ALLOCATOR_INLINE_BUMP_POINTER; -} -static inline size_t gc_allocator_small_granule_size(void) { - return GC_ALIGNMENT; -} -static inline size_t gc_allocator_large_threshold(void) { - return GC_LARGE_OBJECT_THRESHOLD; -} - -static inline size_t gc_allocator_allocation_pointer_offset(void) { - return sizeof(uintptr_t) * 0; -} -static inline size_t gc_allocator_allocation_limit_offset(void) { - return sizeof(uintptr_t) * 1; -} - -static inline size_t gc_allocator_freelist_offset(size_t size) { - GC_CRASH(); -} - -static inline size_t gc_allocator_alloc_table_alignment(void) { - return 0; -} -static inline uint8_t gc_allocator_alloc_table_begin_pattern(void) { - GC_CRASH(); -} -static inline uint8_t gc_allocator_alloc_table_end_pattern(void) { - GC_CRASH(); -} - -static inline int gc_allocator_needs_clear(void) { - return 0; -} - -static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) { - return GC_WRITE_BARRIER_NONE; -} -static inline size_t gc_write_barrier_card_table_alignment(void) { - GC_CRASH(); -} -static inline size_t gc_write_barrier_card_size(void) { - GC_CRASH(); -} - -static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) { - return GC_SAFEPOINT_MECHANISM_COOPERATIVE; -} - -#endif // SCC_ATTRS_H diff --git a/doc/collector-pcc.md b/doc/collector-pcc.md index f2f3ff390..64af6c769 100644 --- a/doc/collector-pcc.md +++ b/doc/collector-pcc.md @@ -1,38 +1,84 @@ # Parallel copying collector -Whippet's `pcc` collector is a copying collector, exactly like -[`scc`](./collector-scc.md), but supporting multiple tracing threads. -See the discussion of `scc` for a general overview. +Whippet's `pcc` collector is a copying collector, like the more simple +[`semi`](./collector-semi.md), but supporting multiple mutator threads, +multiple tracing threads, and using an external FIFO worklist instead of +a Cheney worklist. -Also like `scc` and `semi`, `pcc` is not generational yet. If and when -`pcc` grows a young generation, it would be a great collector. +Like `semi`, `pcc` traces by evacuation: it moves all live objects on +every collection. (Exception: objects larger than 8192 bytes are +placed into a partitioned space which traces by marking in place instead +of copying.) Evacuation requires precise roots, so if your embedder +does not support precise roots, `pcc` is not for you. + +Again like `semi`, `pcc` generally requires a heap size at least twice +as large as the maximum live heap size, and performs best with ample +heap sizes; between 3× and 5× is best. + +Overall, `pcc` is a better version of `semi`. It should have broadly +the same performance characteristics with a single mutator and with +parallelism disabled, additionally allowing multiple mutators, and +scaling better with multiple tracing threads. + +Also like `semi`, `pcc` is not generational yet. If and when `pcc` +grows a young generation, it would be a great collector. ## Implementation notes +Unlike `semi` which has a single global bump-pointer allocation region, +`pcc` structures the heap into 64-kB blocks. In this way it supports +multiple mutator threads: mutators do local bump-pointer allocation into +their own block, and when their block is full, they fetch another from +the global store. + +The block size is 64 kB, but really it's 128 kB, because each block has +two halves: the active region and the copy reserve. Dividing each block +in two allows the collector to easily grow and shrink the heap while +ensuring there is always enough reserve space. + +Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in +a slab. The first block in a slab is used by the collector itself, to +keep metadata for the rest of the blocks, for example a chain pointer +allowing blocks to be collected in lists, a saved allocation pointer for +partially-filled blocks, whether the block is paged in or out, and so +on. + `pcc` supports tracing in parallel. This mechanism works somewhat like allocation, in which multiple trace workers compete to evacuate objects into their local allocation buffers; when an allocation buffer is full, the trace worker grabs another, just like mutators do. -To maintain a queue of objects to trace, `pcc` uses the [fine-grained -work-stealing parallel tracer](../src/parallel-tracer.h) originally -developed for [Whippet's Immix-like collector](./collector-whippet.md). -Each trace worker maintains a [local queue of objects that need -tracing](../src/local-worklist.h), which currently has 1024 entries. If -the local queue becomes full, the worker will publish 3/4 of those -entries to the worker's [shared worklist](../src/shared-worklist.h). -When a worker runs out of local work, it will first try to remove work -from its own shared worklist, then will try to steal from other workers. +Unlike the simple semi-space collector which uses a Cheney grey +worklist, `pcc` uses an external worklist. If parallelism is disabled +at compile-time, it uses a [simple first-in, first-out queue of objects +to be traced](../src/simple-worklist.h) originally developed for +[Whippet's Immix-like collector](./collector-whippet.md). Like a Cheney +worklist, this should result in objects being copied in breadth-first +order. The literature would suggest that depth-first is generally +better for locality, but that preserving allocation order is generally +best. This is something to experiment with in the future. -If only one tracing thread is enabled (`parallelism=1`), `pcc` uses +If parallelism is enabled, as it is by default, `pcc` uses the +[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h) +originally developed for [Whippet's Immix-like +collector](./collector-whippet.md). Each trace worker maintains a +[local queue of objects that need tracing](../src/local-worklist.h), +which currently has 1024 entries. If the local queue becomes full, the +worker will publish 3/4 of those entries to the worker's [shared +worklist](../src/shared-worklist.h). When a worker runs out of local +work, it will first try to remove work from its own shared worklist, +then will try to steal from other workers. + +If only one tracing thread is enabled at run-time (`parallelism=1`) (or +if parallelism is disabled at compile-time), `pcc` will evacuate by non-atomic forwarding, but if multiple threads compete to evacuate objects, `pcc` uses [atomic compare-and-swap instead of simple forwarding pointer updates](./manual.md#forwarding-objects). This imposes around a ~30% performance penalty but having multiple tracing threads is generally worth it, unless the object graph is itself serial. -As with `scc`, the memory used for the external worklist is dynamically -allocated from the OS and is not currently counted as contributing to -the heap size. If you are targetting a microcontroller or something, -probably you need to choose a different kind of collector that never -dynamically allocates, such as `semi`. +The memory used for the external worklist is dynamically allocated from +the OS and is not currently counted as contributing to the heap size. +If you are targetting a microcontroller or something, probably you need +to choose a different kind of collector that never dynamically +allocates, such as `semi`. diff --git a/doc/collector-scc.md b/doc/collector-scc.md deleted file mode 100644 index 2512bb9fd..000000000 --- a/doc/collector-scc.md +++ /dev/null @@ -1,62 +0,0 @@ -# Serial copying collector - -Whippet's `scc` collector is a copying collector, like the more simple -[`semi`](./collector-semi.md), but supporting multiple mutator threads, -and using an external FIFO worklist instead of a Cheney worklist. - -Like `semi`, `scc` traces by evacuation: it moves all live objects on -every collection. (Exception: objects larger than 8192 bytes are -placed into a partitioned space which traces by marking in place instead -of copying.) Evacuation requires precise roots, so if your embedder -does not support precise roots, `scc` is not for you. - -Again like `semi`, `scc` generally requires a heap size at least twice -as large as the maximum live heap size, and performs best with ample -heap sizes; between 3× and 5× is best. - -Overall, `scc` is most useful for isolating the performance implications -of using a block-structured heap and of using an external worklist -rather than a Cheney worklist as `semi` does. It also supports multiple -mutator threads, so it is generally more useful than `semi`. Also, -compared to `pcc`, we can measure the overhead that `pcc` imposes to -atomically forward objects. - -But given a choice, you probably want `pcc`; though it's slower with -only one tracing thread, once you have more than once tracing thread -it's a win over `scc`. - -## Implementation notes - -Unlike `semi` which has a single global bump-pointer allocation region, -`scc` structures the heap into 64-kB blocks. In this way it supports -multiple mutator threads: mutators do local bump-pointer allocation into -their own block, and when their block is full, they fetch another from -the global store. - -The block size is 64 kB, but really it's 128 kB, because each block has -two halves: the active region and the copy reserve. Dividing each block -in two allows the collector to easily grow and shrink the heap while -ensuring there is always enough reserve space. - -Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in -a slab. The first block in a slab is used by the collector itself, to -keep metadata for the rest of the blocks, for example a chain pointer -allowing blocks to be collected in lists, a saved allocation pointer for -partially-filled blocks, whether the block is paged in or out, and so -on. - -Unlike the simple semi-space collector which uses a Cheney grey -worklist, `scc` uses a [simple first-in, first-out queue of objects to -be traced](../src/simple-worklist.h) originally developed for [Whippet's -Immix-like collector](./collector-whippet.md). Like a Cheney worklist, -this should result in objects being copied in breadth-first order. The -literature would suggest that depth-first is generally better for -locality, but that preserving allocation order is generally best. This -is something to experiment with in the future. - -The memory used for the external worklist is dynamically allocated from -the OS and is not currently counted as contributing to the heap size. -If you are targetting a microcontroller or something, probably you need -to choose a different kind of collector that never dynamically -allocates, such as `semi`. - diff --git a/doc/collectors.md b/doc/collectors.md index 1c23f3e9d..ccb95ef76 100644 --- a/doc/collectors.md +++ b/doc/collectors.md @@ -3,10 +3,8 @@ Whippet has five collectors currently: - [Semi-space collector (`semi`)](./collector-semi.md): For single-threaded embedders who are not too tight on memory. - - [Serial copying collector (`scc`)](./collector-scc.md): Like `semi`, - but with support for multiple mutator threads. - - [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `scc`, - but with support for multiple tracing threads. + - [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `semi`, + but with support for multiple mutator and tracing threads. - [Mostly marking collector (`mmc`)](./collector-mmc.md): Immix-inspired collector. Optionally parallel, conservative (stack and/or heap), and/or generational. @@ -26,8 +24,6 @@ out mutator/embedder bugs. Then if memory is tight, switch to If you are aiming for maximum simplicity and minimal code size (ten kilobytes or so), use `semi`. -Only use `scc` if you are investigating GC internals. - If you are writing a new project, you have a choice as to whether to pay the development cost of precise roots or not. If you choose to not have precise roots, then go for `stack-conservative-parallel-mmc` directly. diff --git a/embed.mk b/embed.mk index fef8de8b1..d0608b345 100644 --- a/embed.mk +++ b/embed.mk @@ -42,9 +42,6 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc` GC_STEM_semi = semi GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 -GC_STEM_scc = scc -GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1 - GC_STEM_pcc = pcc GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 diff --git a/src/copy-space.h b/src/copy-space.h index f125a08e1..ba26444bb 100644 --- a/src/copy-space.h +++ b/src/copy-space.h @@ -523,7 +523,7 @@ static inline int copy_space_forward(struct copy_space *space, struct gc_edge edge, struct gc_ref old_ref, struct copy_space_allocator *alloc) { - if (space->atomic_forward) + if (GC_PARALLEL && space->atomic_forward) return copy_space_forward_atomic(space, edge, old_ref, alloc); return copy_space_forward_nonatomic(space, edge, old_ref, alloc); } @@ -531,7 +531,7 @@ copy_space_forward(struct copy_space *space, struct gc_edge edge, static inline int copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge, struct gc_ref old_ref) { - if (space->atomic_forward) + if (GC_PARALLEL && space->atomic_forward) return copy_space_forward_if_traced_atomic(space, edge, old_ref); return copy_space_forward_if_traced_nonatomic(space, edge, old_ref); } diff --git a/src/pcc.c b/src/pcc.c index 403bfe51f..0f5d84abd 100644 --- a/src/pcc.c +++ b/src/pcc.c @@ -15,7 +15,11 @@ #include "gc-inline.h" #include "gc-trace.h" #include "large-object-space.h" +#if GC_PARALLEL #include "parallel-tracer.h" +#else +#include "serial-tracer.h" +#endif #include "spin.h" #include "pcc-attrs.h" diff --git a/src/scc.c b/src/scc.c deleted file mode 100644 index 46ab98560..000000000 --- a/src/scc.c +++ /dev/null @@ -1,670 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gc-api.h" - -#define GC_IMPL 1 -#include "gc-internal.h" - -#include "copy-space.h" -#include "debug.h" -#include "gc-align.h" -#include "gc-inline.h" -#include "gc-trace.h" -#include "large-object-space.h" -#include "serial-tracer.h" -#include "spin.h" -#include "scc-attrs.h" - -struct gc_heap { - struct copy_space copy_space; - struct large_object_space large_object_space; - struct gc_extern_space *extern_space; - size_t large_object_pages; - pthread_mutex_t lock; - pthread_cond_t collector_cond; - pthread_cond_t mutator_cond; - size_t size; - int collecting; - int check_pending_ephemerons; - struct gc_pending_ephemerons *pending_ephemerons; - struct gc_finalizer_state *finalizer_state; - size_t mutator_count; - size_t paused_mutator_count; - size_t inactive_mutator_count; - struct gc_heap_roots *roots; - struct gc_mutator *mutators; - long count; - struct gc_tracer tracer; - double pending_ephemerons_size_factor; - double pending_ephemerons_size_slop; - struct gc_event_listener event_listener; - void *event_listener_data; -}; - -#define HEAP_EVENT(heap, event, ...) \ - (heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__) -#define MUTATOR_EVENT(mut, event, ...) \ - (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__) - -struct gc_mutator { - struct copy_space_allocator allocator; - struct gc_heap *heap; - struct gc_mutator_roots *roots; - void *event_listener_data; - struct gc_mutator *next; - struct gc_mutator *prev; -}; - -struct gc_trace_worker_data { - struct copy_space_allocator allocator; -}; - -static inline struct copy_space* heap_copy_space(struct gc_heap *heap) { - return &heap->copy_space; -} -static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) { - return &heap->large_object_space; -} -static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) { - return heap->extern_space; -} -static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) { - return mutator->heap; -} - -static void -gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, - struct gc_heap *heap, - struct gc_trace_worker *worker, - struct gc_trace_worker_data *data), - struct gc_tracer *tracer, - struct gc_heap *heap, - struct gc_trace_worker *worker) { - struct gc_trace_worker_data data; - copy_space_allocator_init(&data.allocator); - f(tracer, heap, worker, &data); - copy_space_allocator_finish(&data.allocator, heap_copy_space(heap)); -} - -static inline int do_trace(struct gc_heap *heap, struct gc_edge edge, - struct gc_ref ref, - struct gc_trace_worker_data *data) { - if (!gc_ref_is_heap_object(ref)) - return 0; - if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref))) - return copy_space_forward_nonatomic(heap_copy_space(heap), edge, ref, - &data->allocator); - else if (large_object_space_contains(heap_large_object_space(heap), ref)) - return large_object_space_mark_object(heap_large_object_space(heap), ref); - else - return gc_extern_space_visit(heap_extern_space(heap), edge, ref); -} - -static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge, - struct gc_trace_worker *worker) { - struct gc_ref ref = gc_edge_ref(edge); - struct gc_trace_worker_data *data = gc_trace_worker_data(worker); - int is_new = do_trace(heap, edge, ref, data); - - if (is_new && heap->check_pending_ephemerons) - gc_resolve_pending_ephemerons(ref, heap); - - return is_new; -} - -int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { - struct gc_ref ref = gc_edge_ref(edge); - if (!gc_ref_is_heap_object(ref)) - return 0; - if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref))) - return copy_space_forward_if_traced_nonatomic(heap_copy_space(heap), edge, - ref); - if (large_object_space_contains(heap_large_object_space(heap), ref)) - return large_object_space_is_copied(heap_large_object_space(heap), ref); - GC_CRASH(); -} - -static int mutators_are_stopping(struct gc_heap *heap) { - return atomic_load_explicit(&heap->collecting, memory_order_relaxed); -} - -static inline void heap_lock(struct gc_heap *heap) { - pthread_mutex_lock(&heap->lock); -} -static inline void heap_unlock(struct gc_heap *heap) { - pthread_mutex_unlock(&heap->lock); -} - -// with heap lock -static inline int all_mutators_stopped(struct gc_heap *heap) { - return heap->mutator_count == - heap->paused_mutator_count + heap->inactive_mutator_count; -} - -static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { - mut->heap = heap; - mut->event_listener_data = - heap->event_listener.mutator_added(heap->event_listener_data); - copy_space_allocator_init(&mut->allocator); - heap_lock(heap); - // We have no roots. If there is a GC currently in progress, we have - // nothing to add. Just wait until it's done. - while (mutators_are_stopping(heap)) - pthread_cond_wait(&heap->mutator_cond, &heap->lock); - mut->next = mut->prev = NULL; - struct gc_mutator *tail = heap->mutators; - if (tail) { - mut->next = tail; - tail->prev = mut; - } - heap->mutators = mut; - heap->mutator_count++; - heap_unlock(heap); -} - -static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { - MUTATOR_EVENT(mut, mutator_removed); - mut->heap = NULL; - copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); - heap_lock(heap); - heap->mutator_count--; - if (mut->next) - mut->next->prev = mut->prev; - if (mut->prev) - mut->prev->next = mut->next; - else - heap->mutators = mut->next; - // We have no roots. If there is a GC stop currently in progress, - // maybe tell the controller it can continue. - if (mutators_are_stopping(heap) && all_mutators_stopped(heap)) - pthread_cond_signal(&heap->collector_cond); - heap_unlock(heap); -} - -static void request_mutators_to_stop(struct gc_heap *heap) { - GC_ASSERT(!mutators_are_stopping(heap)); - atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed); -} - -static void allow_mutators_to_continue(struct gc_heap *heap) { - GC_ASSERT(mutators_are_stopping(heap)); - GC_ASSERT(all_mutators_stopped(heap)); - heap->paused_mutator_count--; - atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed); - GC_ASSERT(!mutators_are_stopping(heap)); - pthread_cond_broadcast(&heap->mutator_cond); -} - -static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) { - size_t previous = heap->large_object_pages; - heap->large_object_pages = npages; - GC_ASSERT(npages <= previous); - size_t bytes = (previous - npages) << - heap_large_object_space(heap)->page_size_log2; - copy_space_reacquire_memory(heap_copy_space(heap), bytes); -} - -void gc_mutator_set_roots(struct gc_mutator *mut, - struct gc_mutator_roots *roots) { - mut->roots = roots; -} -void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) { - heap->roots = roots; -} -void gc_heap_set_extern_space(struct gc_heap *heap, - struct gc_extern_space *space) { - heap->extern_space = space; -} - -static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap, - void *trace_data) GC_ALWAYS_INLINE; -static inline void -tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { - struct gc_trace_worker *worker = trace_data; - if (trace_edge(heap, edge, worker)) - gc_trace_worker_enqueue(worker, gc_edge_ref(edge)); -} - -static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, - struct gc_trace_worker *worker) { -#ifdef DEBUG - if (copy_space_contains(heap_copy_space(heap), ref)) - GC_ASSERT(copy_space_object_region(ref) == heap_copy_space(heap)->active_region); -#endif - gc_trace_object(ref, tracer_visit, heap, worker, NULL); -} - -static inline void trace_root(struct gc_root root, struct gc_heap *heap, - struct gc_trace_worker *worker) { - switch (root.kind) { - case GC_ROOT_KIND_HEAP: - gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker); - break; - case GC_ROOT_KIND_MUTATOR: - gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker); - break; - case GC_ROOT_KIND_RESOLVED_EPHEMERONS: - gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit, - heap, worker); - break; - case GC_ROOT_KIND_EDGE: - tracer_visit(root.edge, heap, worker); - break; - default: - GC_CRASH(); - } -} - -static void wait_for_mutators_to_stop(struct gc_heap *heap) { - heap->paused_mutator_count++; - while (!all_mutators_stopped(heap)) - pthread_cond_wait(&heap->collector_cond, &heap->lock); -} - -void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size, - struct gc_edge edge, struct gc_ref new_val) { -} - -static void -pause_mutator_for_collection(struct gc_heap *heap, - struct gc_mutator *mut) GC_NEVER_INLINE; -static void -pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) { - GC_ASSERT(mutators_are_stopping(heap)); - GC_ASSERT(!all_mutators_stopped(heap)); - MUTATOR_EVENT(mut, mutator_stopped); - heap->paused_mutator_count++; - if (all_mutators_stopped(heap)) - pthread_cond_signal(&heap->collector_cond); - - do { - pthread_cond_wait(&heap->mutator_cond, &heap->lock); - } while (mutators_are_stopping(heap)); - heap->paused_mutator_count--; - - MUTATOR_EVENT(mut, mutator_restarted); -} - -static void -pause_mutator_for_collection_with_lock(struct gc_mutator *mut) GC_NEVER_INLINE; -static void -pause_mutator_for_collection_with_lock(struct gc_mutator *mut) { - struct gc_heap *heap = mutator_heap(mut); - GC_ASSERT(mutators_are_stopping(heap)); - MUTATOR_EVENT(mut, mutator_stopping); - pause_mutator_for_collection(heap, mut); -} - -static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) GC_NEVER_INLINE; -static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) { - struct gc_heap *heap = mutator_heap(mut); - GC_ASSERT(mutators_are_stopping(heap)); - MUTATOR_EVENT(mut, mutator_stopping); - copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); - heap_lock(heap); - pause_mutator_for_collection(heap, mut); - heap_unlock(heap); -} - -static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) { - while (mutators_are_stopping(mutator_heap(mut))) - pause_mutator_for_collection_without_lock(mut); -} - -static int maybe_grow_heap(struct gc_heap *heap) { - return 0; -} - -static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap, - void *unused) { - gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); -} - -static void add_roots(struct gc_heap *heap) { - for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next) - gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut)); - gc_tracer_add_root(&heap->tracer, gc_root_heap(heap)); - gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL); -} - -static void resolve_ephemerons_lazily(struct gc_heap *heap) { - heap->check_pending_ephemerons = 0; -} - -static void resolve_ephemerons_eagerly(struct gc_heap *heap) { - heap->check_pending_ephemerons = 1; - gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); -} - -static void trace_resolved_ephemerons(struct gc_heap *heap) { - for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); - resolved; - resolved = gc_pop_resolved_ephemerons(heap)) { - gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved)); - gc_tracer_trace(&heap->tracer); - } -} - -static void resolve_finalizers(struct gc_heap *heap) { - for (size_t priority = 0; - priority < gc_finalizer_priority_count(); - priority++) { - if (gc_resolve_finalizers(heap->finalizer_state, priority, - visit_root_edge, heap, NULL)) { - gc_tracer_trace(&heap->tracer); - trace_resolved_ephemerons(heap); - } - } - gc_notify_finalizers(heap->finalizer_state, heap); -} - -static void sweep_ephemerons(struct gc_heap *heap) { - return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); -} - -static void collect(struct gc_mutator *mut) GC_NEVER_INLINE; -static void collect(struct gc_mutator *mut) { - struct gc_heap *heap = mutator_heap(mut); - struct copy_space *copy_space = heap_copy_space(heap); - struct large_object_space *lospace = heap_large_object_space(heap); - struct gc_extern_space *exspace = heap_extern_space(heap); - MUTATOR_EVENT(mut, mutator_cause_gc); - DEBUG("start collect #%ld:\n", heap->count); - large_object_space_start_gc(lospace, 0); - gc_extern_space_start_gc(exspace, 0); - resolve_ephemerons_lazily(heap); - HEAP_EVENT(heap, requesting_stop); - request_mutators_to_stop(heap); - HEAP_EVENT(heap, waiting_for_stop); - wait_for_mutators_to_stop(heap); - HEAP_EVENT(heap, mutators_stopped); - HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING); - copy_space_flip(copy_space); - gc_tracer_prepare(&heap->tracer); - add_roots(heap); - HEAP_EVENT(heap, roots_traced); - gc_tracer_trace(&heap->tracer); - HEAP_EVENT(heap, heap_traced); - resolve_ephemerons_eagerly(heap); - trace_resolved_ephemerons(heap); - HEAP_EVENT(heap, ephemerons_traced); - resolve_finalizers(heap); - HEAP_EVENT(heap, finalizers_traced); - sweep_ephemerons(heap); - gc_tracer_release(&heap->tracer); - copy_space_finish_gc(copy_space); - large_object_space_finish_gc(lospace, 0); - gc_extern_space_finish_gc(exspace, 0); - heap->count++; - heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); - size_t live_size = (copy_space->allocated_bytes_at_last_gc + - large_object_space_size_at_last_collection(lospace)); - HEAP_EVENT(heap, live_data_size, live_size); - maybe_grow_heap(heap); - if (!copy_space_page_out_blocks_until_memory_released(copy_space)) { - fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n", - heap->size, copy_space->nslabs); - GC_CRASH(); - } - HEAP_EVENT(heap, restarting_mutators); - allow_mutators_to_continue(heap); -} - -static void trigger_collection(struct gc_mutator *mut) { - struct gc_heap *heap = mutator_heap(mut); - copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); - heap_lock(heap); - long epoch = heap->count; - while (mutators_are_stopping(heap)) - pause_mutator_for_collection_with_lock(mut); - if (epoch == heap->count) - collect(mut); - heap_unlock(heap); -} - -void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) { - trigger_collection(mut); -} - -static void* allocate_large(struct gc_mutator *mut, size_t size) { - struct gc_heap *heap = mutator_heap(mut); - struct large_object_space *space = heap_large_object_space(heap); - - size_t npages = large_object_space_npages(space, size); - - copy_space_request_release_memory(heap_copy_space(heap), - npages << space->page_size_log2); - while (!copy_space_page_out_blocks_until_memory_released(heap_copy_space(heap))) - trigger_collection(mut); - atomic_fetch_add(&heap->large_object_pages, npages); - - void *ret = large_object_space_alloc(space, npages); - if (!ret) - ret = large_object_space_obtain_and_alloc(space, npages); - - if (!ret) { - perror("weird: we have the space but mmap didn't work"); - GC_CRASH(); - } - - return ret; -} - -static void get_more_empty_blocks_for_mutator(void *mut) { - trigger_collection(mut); -} - -void* gc_allocate_slow(struct gc_mutator *mut, size_t size) { - GC_ASSERT(size > 0); // allocating 0 bytes would be silly - - if (size > gc_allocator_large_threshold()) - return allocate_large(mut, size); - - struct gc_ref ret = copy_space_allocate(&mut->allocator, - heap_copy_space(mutator_heap(mut)), - size, - get_more_empty_blocks_for_mutator, - mut); - gc_clear_fresh_allocation(ret, size); - return gc_ref_heap_object(ret); -} - -void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) { - return gc_allocate(mut, size); -} - -struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) { - return gc_allocate(mut, gc_ephemeron_size()); -} - -void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron, - struct gc_ref key, struct gc_ref value) { - gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value); -} - -struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) { - return heap->pending_ephemerons; -} - -unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) { - return heap->count; -} - -struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) { - return gc_allocate(mut, gc_finalizer_size()); -} - -void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer, - unsigned priority, struct gc_ref object, - struct gc_ref closure) { - gc_finalizer_init_internal(finalizer, object, closure); - gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state, - finalizer, priority); - // No write barrier. -} - -struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) { - return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state); -} - -void gc_set_finalizer_callback(struct gc_heap *heap, - gc_finalizer_callback callback) { - gc_finalizer_state_set_callback(heap->finalizer_state, callback); -} - -static int heap_prepare_pending_ephemerons(struct gc_heap *heap) { - struct gc_pending_ephemerons *cur = heap->pending_ephemerons; - size_t target = heap->size * heap->pending_ephemerons_size_factor; - double slop = heap->pending_ephemerons_size_slop; - - heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop); - - return !!heap->pending_ephemerons; -} - -struct gc_options { - struct gc_common_options common; -}; -int gc_option_from_string(const char *str) { - return gc_common_option_from_string(str); -} -struct gc_options* gc_allocate_options(void) { - struct gc_options *ret = malloc(sizeof(struct gc_options)); - gc_init_common_options(&ret->common); - return ret; -} -int gc_options_set_int(struct gc_options *options, int option, int value) { - return gc_common_options_set_int(&options->common, option, value); -} -int gc_options_set_size(struct gc_options *options, int option, - size_t value) { - return gc_common_options_set_size(&options->common, option, value); -} -int gc_options_set_double(struct gc_options *options, int option, - double value) { - return gc_common_options_set_double(&options->common, option, value); -} -int gc_options_parse_and_set(struct gc_options *options, int option, - const char *value) { - return gc_common_options_parse_and_set(&options->common, option, value); -} - -static int heap_init(struct gc_heap *heap, const struct gc_options *options) { - // *heap is already initialized to 0. - - pthread_mutex_init(&heap->lock, NULL); - pthread_cond_init(&heap->mutator_cond, NULL); - pthread_cond_init(&heap->collector_cond, NULL); - heap->size = options->common.heap_size; - - if (options->common.parallelism != 1) - fprintf(stderr, "warning: parallelism unimplemented in semispace copying collector\n"); - - if (!gc_tracer_init(&heap->tracer, heap, 1)) - GC_CRASH(); - - heap->pending_ephemerons_size_factor = 0.005; - heap->pending_ephemerons_size_slop = 0.5; - - if (!heap_prepare_pending_ephemerons(heap)) - GC_CRASH(); - - heap->finalizer_state = gc_make_finalizer_state(); - if (!heap->finalizer_state) - GC_CRASH(); - - return 1; -} - -int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, - struct gc_heap **heap, struct gc_mutator **mut, - struct gc_event_listener event_listener, - void *event_listener_data) { - GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT); - GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD); - GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator)); - GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), - offsetof(struct copy_space_allocator, hp)); - GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), - offsetof(struct copy_space_allocator, limit)); - - if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { - fprintf(stderr, "fixed heap size is currently required\n"); - return 0; - } - - *heap = calloc(1, sizeof(struct gc_heap)); - if (!*heap) GC_CRASH(); - - if (!heap_init(*heap, options)) - GC_CRASH(); - - (*heap)->event_listener = event_listener; - (*heap)->event_listener_data = event_listener_data; - HEAP_EVENT(*heap, init, (*heap)->size); - - struct copy_space *space = heap_copy_space(*heap); - int atomic_forward = 0; - if (!copy_space_init(space, (*heap)->size, atomic_forward)) { - free(*heap); - *heap = NULL; - return 0; - } - - if (!large_object_space_init(heap_large_object_space(*heap), *heap)) - GC_CRASH(); - - *mut = calloc(1, sizeof(struct gc_mutator)); - if (!*mut) GC_CRASH(); - add_mutator(*heap, *mut); - return 1; -} - -struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base, - struct gc_heap *heap) { - struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator)); - if (!ret) - GC_CRASH(); - add_mutator(heap, ret); - return ret; -} - -void gc_finish_for_thread(struct gc_mutator *mut) { - remove_mutator(mutator_heap(mut), mut); - free(mut); -} - -static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { - GC_ASSERT(mut->next == NULL); - copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); - heap_lock(heap); - heap->inactive_mutator_count++; - if (all_mutators_stopped(heap)) - pthread_cond_signal(&heap->collector_cond); - heap_unlock(heap); -} - -static void reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { - heap_lock(heap); - while (mutators_are_stopping(heap)) - pthread_cond_wait(&heap->mutator_cond, &heap->lock); - heap->inactive_mutator_count--; - heap_unlock(heap); -} - -void* gc_call_without_gc(struct gc_mutator *mut, - void* (*f)(void*), - void *data) { - struct gc_heap *heap = mutator_heap(mut); - deactivate_mutator(heap, mut); - void *ret = f(data); - reactivate_mutator(heap, mut); - return ret; -}