1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-10 14:00:21 +02:00

Remove scc

PCC with GC_PARALLEL=0 is exactly equivalent to SCC.  Also now that PCC
will dynamically avoid atomic forwarding if parallelism is disabled at
run-time, there is no need to keep SCC around.
This commit is contained in:
Andy Wingo 2024-09-10 11:06:40 +02:00
parent 6545b34073
commit 1ff082705e
10 changed files with 78 additions and 832 deletions

View file

@ -2,7 +2,6 @@ TESTS = quads mt-gcbench ephemerons finalizers
COLLECTORS = \
bdw \
semi \
scc \
pcc \
\
mmc \
@ -64,9 +63,6 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc`
GC_STEM_semi = semi
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
GC_STEM_scc = scc
GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1
GC_STEM_pcc = pcc
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1

View file

@ -18,11 +18,10 @@ See the [documentation](./doc/README.md).
- Finalization (supporting resuscitation)
- Ephemerons (except on `bdw`, which has a polyfill)
- Conservative roots (optionally with `mmc` or always with `bdw`)
- Precise roots (optionally with `mmc` or always with `semi` / `pcc` /
`scc`)
- Precise roots (optionally with `mmc` or always with `semi` / `pcc`)
- Precise embedder-parameterized heap tracing (except with `bdw`)
- Conservative heap tracing (optionally with `mmc`, always with `bdw`)
- Parallel tracing (except `semi` and `scc`)
- Parallel tracing (except `semi`)
- Parallel mutators (except `semi`)
- Inline allocation / write barrier fast paths (supporting JIT)
- One unified API with no-overhead abstraction: switch collectors when
@ -36,8 +35,8 @@ See the [documentation](./doc/README.md).
* [src/](./src/): The actual GC implementation, containing a number of
collector implementations. The embedder chooses which collector to
use at compile-time. See the [documentation](./doc/collectors.md)
for more on the different collectors (`semi`, `bdw`, `scc`, `pcc`,
and the different flavors of `mmc`).
for more on the different collectors (`semi`, `bdw`, `pcc`, and the
different flavors of `mmc`).
* [benchmarks/](./benchmarks/): Benchmarks. A work in progress.
* [test/](./test/): A dusty attic of minimal testing.

View file

@ -1,60 +0,0 @@
#ifndef SCC_ATTRS_H
#define SCC_ATTRS_H
#include "gc-config.h"
#include "gc-assert.h"
#include "gc-attrs.h"
static const uintptr_t GC_ALIGNMENT = 8;
static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192;
static inline enum gc_allocator_kind gc_allocator_kind(void) {
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
}
static inline size_t gc_allocator_small_granule_size(void) {
return GC_ALIGNMENT;
}
static inline size_t gc_allocator_large_threshold(void) {
return GC_LARGE_OBJECT_THRESHOLD;
}
static inline size_t gc_allocator_allocation_pointer_offset(void) {
return sizeof(uintptr_t) * 0;
}
static inline size_t gc_allocator_allocation_limit_offset(void) {
return sizeof(uintptr_t) * 1;
}
static inline size_t gc_allocator_freelist_offset(size_t size) {
GC_CRASH();
}
static inline size_t gc_allocator_alloc_table_alignment(void) {
return 0;
}
static inline uint8_t gc_allocator_alloc_table_begin_pattern(void) {
GC_CRASH();
}
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
GC_CRASH();
}
static inline int gc_allocator_needs_clear(void) {
return 0;
}
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
return GC_WRITE_BARRIER_NONE;
}
static inline size_t gc_write_barrier_card_table_alignment(void) {
GC_CRASH();
}
static inline size_t gc_write_barrier_card_size(void) {
GC_CRASH();
}
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
}
#endif // SCC_ATTRS_H

View file

@ -1,38 +1,84 @@
# Parallel copying collector
Whippet's `pcc` collector is a copying collector, exactly like
[`scc`](./collector-scc.md), but supporting multiple tracing threads.
See the discussion of `scc` for a general overview.
Whippet's `pcc` collector is a copying collector, like the more simple
[`semi`](./collector-semi.md), but supporting multiple mutator threads,
multiple tracing threads, and using an external FIFO worklist instead of
a Cheney worklist.
Also like `scc` and `semi`, `pcc` is not generational yet. If and when
`pcc` grows a young generation, it would be a great collector.
Like `semi`, `pcc` traces by evacuation: it moves all live objects on
every collection. (Exception: objects larger than 8192 bytes are
placed into a partitioned space which traces by marking in place instead
of copying.) Evacuation requires precise roots, so if your embedder
does not support precise roots, `pcc` is not for you.
Again like `semi`, `pcc` generally requires a heap size at least twice
as large as the maximum live heap size, and performs best with ample
heap sizes; between 3× and 5× is best.
Overall, `pcc` is a better version of `semi`. It should have broadly
the same performance characteristics with a single mutator and with
parallelism disabled, additionally allowing multiple mutators, and
scaling better with multiple tracing threads.
Also like `semi`, `pcc` is not generational yet. If and when `pcc`
grows a young generation, it would be a great collector.
## Implementation notes
Unlike `semi` which has a single global bump-pointer allocation region,
`pcc` structures the heap into 64-kB blocks. In this way it supports
multiple mutator threads: mutators do local bump-pointer allocation into
their own block, and when their block is full, they fetch another from
the global store.
The block size is 64 kB, but really it's 128 kB, because each block has
two halves: the active region and the copy reserve. Dividing each block
in two allows the collector to easily grow and shrink the heap while
ensuring there is always enough reserve space.
Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
a slab. The first block in a slab is used by the collector itself, to
keep metadata for the rest of the blocks, for example a chain pointer
allowing blocks to be collected in lists, a saved allocation pointer for
partially-filled blocks, whether the block is paged in or out, and so
on.
`pcc` supports tracing in parallel. This mechanism works somewhat like
allocation, in which multiple trace workers compete to evacuate objects
into their local allocation buffers; when an allocation buffer is full,
the trace worker grabs another, just like mutators do.
To maintain a queue of objects to trace, `pcc` uses the [fine-grained
work-stealing parallel tracer](../src/parallel-tracer.h) originally
developed for [Whippet's Immix-like collector](./collector-whippet.md).
Each trace worker maintains a [local queue of objects that need
tracing](../src/local-worklist.h), which currently has 1024 entries. If
the local queue becomes full, the worker will publish 3/4 of those
entries to the worker's [shared worklist](../src/shared-worklist.h).
When a worker runs out of local work, it will first try to remove work
from its own shared worklist, then will try to steal from other workers.
Unlike the simple semi-space collector which uses a Cheney grey
worklist, `pcc` uses an external worklist. If parallelism is disabled
at compile-time, it uses a [simple first-in, first-out queue of objects
to be traced](../src/simple-worklist.h) originally developed for
[Whippet's Immix-like collector](./collector-whippet.md). Like a Cheney
worklist, this should result in objects being copied in breadth-first
order. The literature would suggest that depth-first is generally
better for locality, but that preserving allocation order is generally
best. This is something to experiment with in the future.
If only one tracing thread is enabled (`parallelism=1`), `pcc` uses
If parallelism is enabled, as it is by default, `pcc` uses the
[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h)
originally developed for [Whippet's Immix-like
collector](./collector-whippet.md). Each trace worker maintains a
[local queue of objects that need tracing](../src/local-worklist.h),
which currently has 1024 entries. If the local queue becomes full, the
worker will publish 3/4 of those entries to the worker's [shared
worklist](../src/shared-worklist.h). When a worker runs out of local
work, it will first try to remove work from its own shared worklist,
then will try to steal from other workers.
If only one tracing thread is enabled at run-time (`parallelism=1`) (or
if parallelism is disabled at compile-time), `pcc` will evacuate by
non-atomic forwarding, but if multiple threads compete to evacuate
objects, `pcc` uses [atomic compare-and-swap instead of simple
forwarding pointer updates](./manual.md#forwarding-objects). This
imposes around a ~30% performance penalty but having multiple tracing
threads is generally worth it, unless the object graph is itself serial.
As with `scc`, the memory used for the external worklist is dynamically
allocated from the OS and is not currently counted as contributing to
the heap size. If you are targetting a microcontroller or something,
probably you need to choose a different kind of collector that never
dynamically allocates, such as `semi`.
The memory used for the external worklist is dynamically allocated from
the OS and is not currently counted as contributing to the heap size.
If you are targetting a microcontroller or something, probably you need
to choose a different kind of collector that never dynamically
allocates, such as `semi`.

View file

@ -1,62 +0,0 @@
# Serial copying collector
Whippet's `scc` collector is a copying collector, like the more simple
[`semi`](./collector-semi.md), but supporting multiple mutator threads,
and using an external FIFO worklist instead of a Cheney worklist.
Like `semi`, `scc` traces by evacuation: it moves all live objects on
every collection. (Exception: objects larger than 8192 bytes are
placed into a partitioned space which traces by marking in place instead
of copying.) Evacuation requires precise roots, so if your embedder
does not support precise roots, `scc` is not for you.
Again like `semi`, `scc` generally requires a heap size at least twice
as large as the maximum live heap size, and performs best with ample
heap sizes; between 3× and 5× is best.
Overall, `scc` is most useful for isolating the performance implications
of using a block-structured heap and of using an external worklist
rather than a Cheney worklist as `semi` does. It also supports multiple
mutator threads, so it is generally more useful than `semi`. Also,
compared to `pcc`, we can measure the overhead that `pcc` imposes to
atomically forward objects.
But given a choice, you probably want `pcc`; though it's slower with
only one tracing thread, once you have more than once tracing thread
it's a win over `scc`.
## Implementation notes
Unlike `semi` which has a single global bump-pointer allocation region,
`scc` structures the heap into 64-kB blocks. In this way it supports
multiple mutator threads: mutators do local bump-pointer allocation into
their own block, and when their block is full, they fetch another from
the global store.
The block size is 64 kB, but really it's 128 kB, because each block has
two halves: the active region and the copy reserve. Dividing each block
in two allows the collector to easily grow and shrink the heap while
ensuring there is always enough reserve space.
Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
a slab. The first block in a slab is used by the collector itself, to
keep metadata for the rest of the blocks, for example a chain pointer
allowing blocks to be collected in lists, a saved allocation pointer for
partially-filled blocks, whether the block is paged in or out, and so
on.
Unlike the simple semi-space collector which uses a Cheney grey
worklist, `scc` uses a [simple first-in, first-out queue of objects to
be traced](../src/simple-worklist.h) originally developed for [Whippet's
Immix-like collector](./collector-whippet.md). Like a Cheney worklist,
this should result in objects being copied in breadth-first order. The
literature would suggest that depth-first is generally better for
locality, but that preserving allocation order is generally best. This
is something to experiment with in the future.
The memory used for the external worklist is dynamically allocated from
the OS and is not currently counted as contributing to the heap size.
If you are targetting a microcontroller or something, probably you need
to choose a different kind of collector that never dynamically
allocates, such as `semi`.

View file

@ -3,10 +3,8 @@
Whippet has five collectors currently:
- [Semi-space collector (`semi`)](./collector-semi.md): For
single-threaded embedders who are not too tight on memory.
- [Serial copying collector (`scc`)](./collector-scc.md): Like `semi`,
but with support for multiple mutator threads.
- [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `scc`,
but with support for multiple tracing threads.
- [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `semi`,
but with support for multiple mutator and tracing threads.
- [Mostly marking collector (`mmc`)](./collector-mmc.md):
Immix-inspired collector. Optionally parallel, conservative (stack
and/or heap), and/or generational.
@ -26,8 +24,6 @@ out mutator/embedder bugs. Then if memory is tight, switch to
If you are aiming for maximum simplicity and minimal code size (ten
kilobytes or so), use `semi`.
Only use `scc` if you are investigating GC internals.
If you are writing a new project, you have a choice as to whether to pay
the development cost of precise roots or not. If you choose to not have
precise roots, then go for `stack-conservative-parallel-mmc` directly.

View file

@ -42,9 +42,6 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc`
GC_STEM_semi = semi
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
GC_STEM_scc = scc
GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1
GC_STEM_pcc = pcc
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1

View file

@ -523,7 +523,7 @@ static inline int
copy_space_forward(struct copy_space *space, struct gc_edge edge,
struct gc_ref old_ref,
struct copy_space_allocator *alloc) {
if (space->atomic_forward)
if (GC_PARALLEL && space->atomic_forward)
return copy_space_forward_atomic(space, edge, old_ref, alloc);
return copy_space_forward_nonatomic(space, edge, old_ref, alloc);
}
@ -531,7 +531,7 @@ copy_space_forward(struct copy_space *space, struct gc_edge edge,
static inline int
copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge,
struct gc_ref old_ref) {
if (space->atomic_forward)
if (GC_PARALLEL && space->atomic_forward)
return copy_space_forward_if_traced_atomic(space, edge, old_ref);
return copy_space_forward_if_traced_nonatomic(space, edge, old_ref);
}

View file

@ -15,7 +15,11 @@
#include "gc-inline.h"
#include "gc-trace.h"
#include "large-object-space.h"
#if GC_PARALLEL
#include "parallel-tracer.h"
#else
#include "serial-tracer.h"
#endif
#include "spin.h"
#include "pcc-attrs.h"

670
src/scc.c
View file

@ -1,670 +0,0 @@
#include <pthread.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <string.h>
#include <unistd.h>
#include "gc-api.h"
#define GC_IMPL 1
#include "gc-internal.h"
#include "copy-space.h"
#include "debug.h"
#include "gc-align.h"
#include "gc-inline.h"
#include "gc-trace.h"
#include "large-object-space.h"
#include "serial-tracer.h"
#include "spin.h"
#include "scc-attrs.h"
struct gc_heap {
struct copy_space copy_space;
struct large_object_space large_object_space;
struct gc_extern_space *extern_space;
size_t large_object_pages;
pthread_mutex_t lock;
pthread_cond_t collector_cond;
pthread_cond_t mutator_cond;
size_t size;
int collecting;
int check_pending_ephemerons;
struct gc_pending_ephemerons *pending_ephemerons;
struct gc_finalizer_state *finalizer_state;
size_t mutator_count;
size_t paused_mutator_count;
size_t inactive_mutator_count;
struct gc_heap_roots *roots;
struct gc_mutator *mutators;
long count;
struct gc_tracer tracer;
double pending_ephemerons_size_factor;
double pending_ephemerons_size_slop;
struct gc_event_listener event_listener;
void *event_listener_data;
};
#define HEAP_EVENT(heap, event, ...) \
(heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__)
#define MUTATOR_EVENT(mut, event, ...) \
(mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__)
struct gc_mutator {
struct copy_space_allocator allocator;
struct gc_heap *heap;
struct gc_mutator_roots *roots;
void *event_listener_data;
struct gc_mutator *next;
struct gc_mutator *prev;
};
struct gc_trace_worker_data {
struct copy_space_allocator allocator;
};
static inline struct copy_space* heap_copy_space(struct gc_heap *heap) {
return &heap->copy_space;
}
static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) {
return &heap->large_object_space;
}
static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) {
return heap->extern_space;
}
static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) {
return mutator->heap;
}
static void
gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
struct gc_heap *heap,
struct gc_trace_worker *worker,
struct gc_trace_worker_data *data),
struct gc_tracer *tracer,
struct gc_heap *heap,
struct gc_trace_worker *worker) {
struct gc_trace_worker_data data;
copy_space_allocator_init(&data.allocator);
f(tracer, heap, worker, &data);
copy_space_allocator_finish(&data.allocator, heap_copy_space(heap));
}
static inline int do_trace(struct gc_heap *heap, struct gc_edge edge,
struct gc_ref ref,
struct gc_trace_worker_data *data) {
if (!gc_ref_is_heap_object(ref))
return 0;
if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref)))
return copy_space_forward_nonatomic(heap_copy_space(heap), edge, ref,
&data->allocator);
else if (large_object_space_contains(heap_large_object_space(heap), ref))
return large_object_space_mark_object(heap_large_object_space(heap), ref);
else
return gc_extern_space_visit(heap_extern_space(heap), edge, ref);
}
static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge,
struct gc_trace_worker *worker) {
struct gc_ref ref = gc_edge_ref(edge);
struct gc_trace_worker_data *data = gc_trace_worker_data(worker);
int is_new = do_trace(heap, edge, ref, data);
if (is_new && heap->check_pending_ephemerons)
gc_resolve_pending_ephemerons(ref, heap);
return is_new;
}
int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
struct gc_ref ref = gc_edge_ref(edge);
if (!gc_ref_is_heap_object(ref))
return 0;
if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref)))
return copy_space_forward_if_traced_nonatomic(heap_copy_space(heap), edge,
ref);
if (large_object_space_contains(heap_large_object_space(heap), ref))
return large_object_space_is_copied(heap_large_object_space(heap), ref);
GC_CRASH();
}
static int mutators_are_stopping(struct gc_heap *heap) {
return atomic_load_explicit(&heap->collecting, memory_order_relaxed);
}
static inline void heap_lock(struct gc_heap *heap) {
pthread_mutex_lock(&heap->lock);
}
static inline void heap_unlock(struct gc_heap *heap) {
pthread_mutex_unlock(&heap->lock);
}
// with heap lock
static inline int all_mutators_stopped(struct gc_heap *heap) {
return heap->mutator_count ==
heap->paused_mutator_count + heap->inactive_mutator_count;
}
static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
mut->heap = heap;
mut->event_listener_data =
heap->event_listener.mutator_added(heap->event_listener_data);
copy_space_allocator_init(&mut->allocator);
heap_lock(heap);
// We have no roots. If there is a GC currently in progress, we have
// nothing to add. Just wait until it's done.
while (mutators_are_stopping(heap))
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
mut->next = mut->prev = NULL;
struct gc_mutator *tail = heap->mutators;
if (tail) {
mut->next = tail;
tail->prev = mut;
}
heap->mutators = mut;
heap->mutator_count++;
heap_unlock(heap);
}
static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
MUTATOR_EVENT(mut, mutator_removed);
mut->heap = NULL;
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
heap->mutator_count--;
if (mut->next)
mut->next->prev = mut->prev;
if (mut->prev)
mut->prev->next = mut->next;
else
heap->mutators = mut->next;
// We have no roots. If there is a GC stop currently in progress,
// maybe tell the controller it can continue.
if (mutators_are_stopping(heap) && all_mutators_stopped(heap))
pthread_cond_signal(&heap->collector_cond);
heap_unlock(heap);
}
static void request_mutators_to_stop(struct gc_heap *heap) {
GC_ASSERT(!mutators_are_stopping(heap));
atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed);
}
static void allow_mutators_to_continue(struct gc_heap *heap) {
GC_ASSERT(mutators_are_stopping(heap));
GC_ASSERT(all_mutators_stopped(heap));
heap->paused_mutator_count--;
atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed);
GC_ASSERT(!mutators_are_stopping(heap));
pthread_cond_broadcast(&heap->mutator_cond);
}
static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) {
size_t previous = heap->large_object_pages;
heap->large_object_pages = npages;
GC_ASSERT(npages <= previous);
size_t bytes = (previous - npages) <<
heap_large_object_space(heap)->page_size_log2;
copy_space_reacquire_memory(heap_copy_space(heap), bytes);
}
void gc_mutator_set_roots(struct gc_mutator *mut,
struct gc_mutator_roots *roots) {
mut->roots = roots;
}
void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
heap->roots = roots;
}
void gc_heap_set_extern_space(struct gc_heap *heap,
struct gc_extern_space *space) {
heap->extern_space = space;
}
static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap,
void *trace_data) GC_ALWAYS_INLINE;
static inline void
tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
struct gc_trace_worker *worker = trace_data;
if (trace_edge(heap, edge, worker))
gc_trace_worker_enqueue(worker, gc_edge_ref(edge));
}
static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
struct gc_trace_worker *worker) {
#ifdef DEBUG
if (copy_space_contains(heap_copy_space(heap), ref))
GC_ASSERT(copy_space_object_region(ref) == heap_copy_space(heap)->active_region);
#endif
gc_trace_object(ref, tracer_visit, heap, worker, NULL);
}
static inline void trace_root(struct gc_root root, struct gc_heap *heap,
struct gc_trace_worker *worker) {
switch (root.kind) {
case GC_ROOT_KIND_HEAP:
gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker);
break;
case GC_ROOT_KIND_MUTATOR:
gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker);
break;
case GC_ROOT_KIND_RESOLVED_EPHEMERONS:
gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit,
heap, worker);
break;
case GC_ROOT_KIND_EDGE:
tracer_visit(root.edge, heap, worker);
break;
default:
GC_CRASH();
}
}
static void wait_for_mutators_to_stop(struct gc_heap *heap) {
heap->paused_mutator_count++;
while (!all_mutators_stopped(heap))
pthread_cond_wait(&heap->collector_cond, &heap->lock);
}
void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
struct gc_edge edge, struct gc_ref new_val) {
}
static void
pause_mutator_for_collection(struct gc_heap *heap,
struct gc_mutator *mut) GC_NEVER_INLINE;
static void
pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) {
GC_ASSERT(mutators_are_stopping(heap));
GC_ASSERT(!all_mutators_stopped(heap));
MUTATOR_EVENT(mut, mutator_stopped);
heap->paused_mutator_count++;
if (all_mutators_stopped(heap))
pthread_cond_signal(&heap->collector_cond);
do {
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
} while (mutators_are_stopping(heap));
heap->paused_mutator_count--;
MUTATOR_EVENT(mut, mutator_restarted);
}
static void
pause_mutator_for_collection_with_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
static void
pause_mutator_for_collection_with_lock(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
GC_ASSERT(mutators_are_stopping(heap));
MUTATOR_EVENT(mut, mutator_stopping);
pause_mutator_for_collection(heap, mut);
}
static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
GC_ASSERT(mutators_are_stopping(heap));
MUTATOR_EVENT(mut, mutator_stopping);
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
pause_mutator_for_collection(heap, mut);
heap_unlock(heap);
}
static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
while (mutators_are_stopping(mutator_heap(mut)))
pause_mutator_for_collection_without_lock(mut);
}
static int maybe_grow_heap(struct gc_heap *heap) {
return 0;
}
static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap,
void *unused) {
gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
}
static void add_roots(struct gc_heap *heap) {
for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next)
gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut));
gc_tracer_add_root(&heap->tracer, gc_root_heap(heap));
gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL);
}
static void resolve_ephemerons_lazily(struct gc_heap *heap) {
heap->check_pending_ephemerons = 0;
}
static void resolve_ephemerons_eagerly(struct gc_heap *heap) {
heap->check_pending_ephemerons = 1;
gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
}
static void trace_resolved_ephemerons(struct gc_heap *heap) {
for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
resolved;
resolved = gc_pop_resolved_ephemerons(heap)) {
gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved));
gc_tracer_trace(&heap->tracer);
}
}
static void resolve_finalizers(struct gc_heap *heap) {
for (size_t priority = 0;
priority < gc_finalizer_priority_count();
priority++) {
if (gc_resolve_finalizers(heap->finalizer_state, priority,
visit_root_edge, heap, NULL)) {
gc_tracer_trace(&heap->tracer);
trace_resolved_ephemerons(heap);
}
}
gc_notify_finalizers(heap->finalizer_state, heap);
}
static void sweep_ephemerons(struct gc_heap *heap) {
return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
}
static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
static void collect(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
struct copy_space *copy_space = heap_copy_space(heap);
struct large_object_space *lospace = heap_large_object_space(heap);
struct gc_extern_space *exspace = heap_extern_space(heap);
MUTATOR_EVENT(mut, mutator_cause_gc);
DEBUG("start collect #%ld:\n", heap->count);
large_object_space_start_gc(lospace, 0);
gc_extern_space_start_gc(exspace, 0);
resolve_ephemerons_lazily(heap);
HEAP_EVENT(heap, requesting_stop);
request_mutators_to_stop(heap);
HEAP_EVENT(heap, waiting_for_stop);
wait_for_mutators_to_stop(heap);
HEAP_EVENT(heap, mutators_stopped);
HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING);
copy_space_flip(copy_space);
gc_tracer_prepare(&heap->tracer);
add_roots(heap);
HEAP_EVENT(heap, roots_traced);
gc_tracer_trace(&heap->tracer);
HEAP_EVENT(heap, heap_traced);
resolve_ephemerons_eagerly(heap);
trace_resolved_ephemerons(heap);
HEAP_EVENT(heap, ephemerons_traced);
resolve_finalizers(heap);
HEAP_EVENT(heap, finalizers_traced);
sweep_ephemerons(heap);
gc_tracer_release(&heap->tracer);
copy_space_finish_gc(copy_space);
large_object_space_finish_gc(lospace, 0);
gc_extern_space_finish_gc(exspace, 0);
heap->count++;
heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection);
size_t live_size = (copy_space->allocated_bytes_at_last_gc +
large_object_space_size_at_last_collection(lospace));
HEAP_EVENT(heap, live_data_size, live_size);
maybe_grow_heap(heap);
if (!copy_space_page_out_blocks_until_memory_released(copy_space)) {
fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
heap->size, copy_space->nslabs);
GC_CRASH();
}
HEAP_EVENT(heap, restarting_mutators);
allow_mutators_to_continue(heap);
}
static void trigger_collection(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
long epoch = heap->count;
while (mutators_are_stopping(heap))
pause_mutator_for_collection_with_lock(mut);
if (epoch == heap->count)
collect(mut);
heap_unlock(heap);
}
void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) {
trigger_collection(mut);
}
static void* allocate_large(struct gc_mutator *mut, size_t size) {
struct gc_heap *heap = mutator_heap(mut);
struct large_object_space *space = heap_large_object_space(heap);
size_t npages = large_object_space_npages(space, size);
copy_space_request_release_memory(heap_copy_space(heap),
npages << space->page_size_log2);
while (!copy_space_page_out_blocks_until_memory_released(heap_copy_space(heap)))
trigger_collection(mut);
atomic_fetch_add(&heap->large_object_pages, npages);
void *ret = large_object_space_alloc(space, npages);
if (!ret)
ret = large_object_space_obtain_and_alloc(space, npages);
if (!ret) {
perror("weird: we have the space but mmap didn't work");
GC_CRASH();
}
return ret;
}
static void get_more_empty_blocks_for_mutator(void *mut) {
trigger_collection(mut);
}
void* gc_allocate_slow(struct gc_mutator *mut, size_t size) {
GC_ASSERT(size > 0); // allocating 0 bytes would be silly
if (size > gc_allocator_large_threshold())
return allocate_large(mut, size);
struct gc_ref ret = copy_space_allocate(&mut->allocator,
heap_copy_space(mutator_heap(mut)),
size,
get_more_empty_blocks_for_mutator,
mut);
gc_clear_fresh_allocation(ret, size);
return gc_ref_heap_object(ret);
}
void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
return gc_allocate(mut, size);
}
struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) {
return gc_allocate(mut, gc_ephemeron_size());
}
void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
struct gc_ref key, struct gc_ref value) {
gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value);
}
struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) {
return heap->pending_ephemerons;
}
unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
return heap->count;
}
struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) {
return gc_allocate(mut, gc_finalizer_size());
}
void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
unsigned priority, struct gc_ref object,
struct gc_ref closure) {
gc_finalizer_init_internal(finalizer, object, closure);
gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state,
finalizer, priority);
// No write barrier.
}
struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) {
return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state);
}
void gc_set_finalizer_callback(struct gc_heap *heap,
gc_finalizer_callback callback) {
gc_finalizer_state_set_callback(heap->finalizer_state, callback);
}
static int heap_prepare_pending_ephemerons(struct gc_heap *heap) {
struct gc_pending_ephemerons *cur = heap->pending_ephemerons;
size_t target = heap->size * heap->pending_ephemerons_size_factor;
double slop = heap->pending_ephemerons_size_slop;
heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop);
return !!heap->pending_ephemerons;
}
struct gc_options {
struct gc_common_options common;
};
int gc_option_from_string(const char *str) {
return gc_common_option_from_string(str);
}
struct gc_options* gc_allocate_options(void) {
struct gc_options *ret = malloc(sizeof(struct gc_options));
gc_init_common_options(&ret->common);
return ret;
}
int gc_options_set_int(struct gc_options *options, int option, int value) {
return gc_common_options_set_int(&options->common, option, value);
}
int gc_options_set_size(struct gc_options *options, int option,
size_t value) {
return gc_common_options_set_size(&options->common, option, value);
}
int gc_options_set_double(struct gc_options *options, int option,
double value) {
return gc_common_options_set_double(&options->common, option, value);
}
int gc_options_parse_and_set(struct gc_options *options, int option,
const char *value) {
return gc_common_options_parse_and_set(&options->common, option, value);
}
static int heap_init(struct gc_heap *heap, const struct gc_options *options) {
// *heap is already initialized to 0.
pthread_mutex_init(&heap->lock, NULL);
pthread_cond_init(&heap->mutator_cond, NULL);
pthread_cond_init(&heap->collector_cond, NULL);
heap->size = options->common.heap_size;
if (options->common.parallelism != 1)
fprintf(stderr, "warning: parallelism unimplemented in semispace copying collector\n");
if (!gc_tracer_init(&heap->tracer, heap, 1))
GC_CRASH();
heap->pending_ephemerons_size_factor = 0.005;
heap->pending_ephemerons_size_slop = 0.5;
if (!heap_prepare_pending_ephemerons(heap))
GC_CRASH();
heap->finalizer_state = gc_make_finalizer_state();
if (!heap->finalizer_state)
GC_CRASH();
return 1;
}
int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
struct gc_heap **heap, struct gc_mutator **mut,
struct gc_event_listener event_listener,
void *event_listener_data) {
GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT);
GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD);
GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator));
GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(),
offsetof(struct copy_space_allocator, hp));
GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
offsetof(struct copy_space_allocator, limit));
if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
fprintf(stderr, "fixed heap size is currently required\n");
return 0;
}
*heap = calloc(1, sizeof(struct gc_heap));
if (!*heap) GC_CRASH();
if (!heap_init(*heap, options))
GC_CRASH();
(*heap)->event_listener = event_listener;
(*heap)->event_listener_data = event_listener_data;
HEAP_EVENT(*heap, init, (*heap)->size);
struct copy_space *space = heap_copy_space(*heap);
int atomic_forward = 0;
if (!copy_space_init(space, (*heap)->size, atomic_forward)) {
free(*heap);
*heap = NULL;
return 0;
}
if (!large_object_space_init(heap_large_object_space(*heap), *heap))
GC_CRASH();
*mut = calloc(1, sizeof(struct gc_mutator));
if (!*mut) GC_CRASH();
add_mutator(*heap, *mut);
return 1;
}
struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base,
struct gc_heap *heap) {
struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator));
if (!ret)
GC_CRASH();
add_mutator(heap, ret);
return ret;
}
void gc_finish_for_thread(struct gc_mutator *mut) {
remove_mutator(mutator_heap(mut), mut);
free(mut);
}
static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
GC_ASSERT(mut->next == NULL);
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
heap->inactive_mutator_count++;
if (all_mutators_stopped(heap))
pthread_cond_signal(&heap->collector_cond);
heap_unlock(heap);
}
static void reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
heap_lock(heap);
while (mutators_are_stopping(heap))
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
heap->inactive_mutator_count--;
heap_unlock(heap);
}
void* gc_call_without_gc(struct gc_mutator *mut,
void* (*f)(void*),
void *data) {
struct gc_heap *heap = mutator_heap(mut);
deactivate_mutator(heap, mut);
void *ret = f(data);
reactivate_mutator(heap, mut);
return ret;
}