1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

Add serial copying collector

This commit is contained in:
Andy Wingo 2024-08-05 14:41:04 +02:00
parent 0ee58abb46
commit 37e57f8c8d
7 changed files with 802 additions and 52 deletions

View file

@ -2,6 +2,7 @@ TESTS = quads mt-gcbench ephemerons finalizers
COLLECTORS = \ COLLECTORS = \
bdw \ bdw \
semi \ semi \
scc \
pcc \ pcc \
\ \
whippet \ whippet \
@ -63,6 +64,9 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc`
GC_STEM_semi = semi GC_STEM_semi = semi
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
GC_STEM_scc = scc
GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1
GC_STEM_pcc = pcc GC_STEM_pcc = pcc
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1

View file

@ -1,49 +1,23 @@
# Parallel copying collector # Parallel copying collector
Whippet's `pcc` collector is a copying collector, like the more simple Whippet's `pcc` collector is a copying collector, exactly like
[`semi`](./collector-semi.md), but supporting multiple mutator threads [`scc`](./collector-scc.md), but supporting multiple tracing threads.
and multiple tracing threads. See the discussion of `scc` for a general overview.
Like `semi`, `pcc` traces by evacuation: it moves all live objects on Also like `scc` and `semi`, `pcc` is not generational yet. If and when
every collection. (Exception: objects larger than 8192 bytes are `pcc` grows a young generation, it would be a great collector.
placed into a partitioned space which traces by marking in place instead
of copying.) Evacuation requires precise roots, so if your embedder
does not support precise roots, `pcc` is not for you.
Again like `semi`, `pcc` generally requires a heap size at least twice
as large as the maximum live heap size, and performs best with ample
heap sizes; between 3× and 5× is best.
## Implementation notes ## Implementation notes
Unlike `semi` which has a single global bump-pointer allocation region,
`pcc` structures the heap into 64-kB blocks. In this way it supports
multiple mutator threads: mutators do local bump-pointer allocation into
their own block, and when their block is full, they fetch another from
the global store.
The block size is 64 kB, but really it's 128 kB, because each block has
two halves: the active region and the copy reserve. Dividing each block
in two allows the collector to easily grow and shrink the heap while
ensuring there is always enough reserve space.
Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
a slab. The first block in a slab is used by the collector itself, to
keep metadata for the rest of the blocks, for example a chain pointer
allowing blocks to be collected in lists, a saved allocation pointer for
partially-filled blocks, whether the block is paged in or out, and so
on.
`pcc` supports tracing in parallel. This mechanism works somewhat like `pcc` supports tracing in parallel. This mechanism works somewhat like
allocation, in which multiple trace workers compete to evacuate objects allocation, in which multiple trace workers compete to evacuate objects
into their local allocation buffers; when an allocation buffer is full, into their local allocation buffers; when an allocation buffer is full,
the trace worker grabs another, just like mutators do. the trace worker grabs another, just like mutators do.
However, unlike the simple semi-space collector which uses a Cheney grey To maintain a queue of objects to trace, `pcc` uses the [fine-grained
worklist, `pcc` uses the [fine-grained work-stealing parallel work-stealing parallel tracer](../src/parallel-tracer.h) originally
tracer](../src/parallel-tracer.h) originally developed for [Whippet's developed for [Whippet's Immix-like collector](./collector-whippet.md).
Immix-like collector](./collector-whippet.md). Each trace worker Each trace worker maintains a [local queue of objects that need
maintains a [local queue of objects that need
tracing](../src/local-worklist.h), which currently has 1024 entries. If tracing](../src/local-worklist.h), which currently has 1024 entries. If
the local queue becomes full, the worker will publish 3/4 of those the local queue becomes full, the worker will publish 3/4 of those
entries to the worker's [shared worklist](../src/shared-worklist.h). entries to the worker's [shared worklist](../src/shared-worklist.h).
@ -53,12 +27,11 @@ from its own shared worklist, then will try to steal from other workers.
Because threads compete to evacuate objects, `pcc` uses [atomic Because threads compete to evacuate objects, `pcc` uses [atomic
compare-and-swap instead of simple forwarding pointer compare-and-swap instead of simple forwarding pointer
updates](./manual.md#forwarding-objects), which imposes around a ~30% updates](./manual.md#forwarding-objects), which imposes around a ~30%
performance penalty. `pcc` generally starts to outperform `semi` when performance penalty. `pcc` generally starts to outperform `scc` when it
it can trace with 2 threads, and gets better with each additional can trace with 2 threads, and gets better with each additional thread.
thread.
The memory used for the external worklist is dynamically allocated from As with `scc`, the memory used for the external worklist is dynamically
the OS and is not currently counted as contributing to the heap size. allocated from the OS and is not currently counted as contributing to
If you are targetting a microcontroller or something, probably you need the heap size. If you are targetting a microcontroller or something,
to choose a different kind of collector that never dynamically probably you need to choose a different kind of collector that never
allocates, such as `semi`. dynamically allocates, such as `semi`.

62
doc/collector-scc.md Normal file
View file

@ -0,0 +1,62 @@
# Serial copying collector
Whippet's `scc` collector is a copying collector, like the more simple
[`semi`](./collector-semi.md), but supporting multiple mutator threads,
and using an external FIFO worklist instead of a Cheney worklist.
Like `semi`, `scc` traces by evacuation: it moves all live objects on
every collection. (Exception: objects larger than 8192 bytes are
placed into a partitioned space which traces by marking in place instead
of copying.) Evacuation requires precise roots, so if your embedder
does not support precise roots, `scc` is not for you.
Again like `semi`, `scc` generally requires a heap size at least twice
as large as the maximum live heap size, and performs best with ample
heap sizes; between 3× and 5× is best.
Overall, `scc` is most useful for isolating the performance implications
of using a block-structured heap and of using an external worklist
rather than a Cheney worklist as `semi` does. It also supports multiple
mutator threads, so it is generally more useful than `semi`. Also,
compared to `pcc`, we can measure the overhead that `pcc` imposes to
atomically forward objects.
But given a choice, you probably want `pcc`; though it's slower with
only one tracing thread, once you have more than once tracing thread
it's a win over `scc`.
## Implementation notes
Unlike `semi` which has a single global bump-pointer allocation region,
`scc` structures the heap into 64-kB blocks. In this way it supports
multiple mutator threads: mutators do local bump-pointer allocation into
their own block, and when their block is full, they fetch another from
the global store.
The block size is 64 kB, but really it's 128 kB, because each block has
two halves: the active region and the copy reserve. Dividing each block
in two allows the collector to easily grow and shrink the heap while
ensuring there is always enough reserve space.
Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
a slab. The first block in a slab is used by the collector itself, to
keep metadata for the rest of the blocks, for example a chain pointer
allowing blocks to be collected in lists, a saved allocation pointer for
partially-filled blocks, whether the block is paged in or out, and so
on.
Unlike the simple semi-space collector which uses a Cheney grey
worklist, `scc` uses a [simple first-in, first-out queue of objects to
be traced](../src/simple-worklist.h) originally developed for [Whippet's
Immix-like collector](./collector-whippet.md). Like a Cheney worklist,
this should result in objects being copied in breadth-first order. The
literature would suggest that depth-first is generally better for
locality, but that preserving allocation order is generally best. This
is something to experiment with in the future.
The memory used for the external worklist is dynamically allocated from
the OS and is not currently counted as contributing to the heap size.
If you are targetting a microcontroller or something, probably you need
to choose a different kind of collector that never dynamically
allocates, such as `semi`.

View file

@ -1,15 +1,17 @@
# Whippet collectors # Whippet collectors
Whippet has four collectors currently: Whippet has five collectors currently:
- [Semi-space collector (semi)](./collector-semi.md): For - [Semi-space collector (`semi`)](./collector-semi.md): For
single-threaded embedders who are not too tight on memory. single-threaded embedders who are not too tight on memory.
- [Parallel copying collector (pcc)](./collector-pcc.md): Like semi, - [Serial copying collector (`scc`)](./collector-scc.md): Like `semi`,
but with support for multiple mutator threads. Faster than semi if but with support for multiple mutator threads.
- [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `scc`,
but with support for multiple tracing threads. Faster than `scc` if
multiple cores are available at collection-time. multiple cores are available at collection-time.
- [Whippet collector (whippet)](./collector-whippet.md): - [Whippet collector (`whippet`)](./collector-whippet.md):
Immix-inspired collector. Optionally parallel, conservative (stack Immix-inspired collector. Optionally parallel, conservative (stack
and/or heap), and/or generational. and/or heap), and/or generational.
- [Boehm-Demers-Weiser collector (bdw)](./collector-bdw.md): - [Boehm-Demers-Weiser collector (`bdw`)](./collector-bdw.md):
Conservative mark-sweep collector, implemented by Conservative mark-sweep collector, implemented by
Boehm-Demers-Weiser library. Boehm-Demers-Weiser library.
@ -18,11 +20,13 @@ Whippet has four collectors currently:
If you are migrating an embedder off BDW-GC, then it could be reasonable If you are migrating an embedder off BDW-GC, then it could be reasonable
to first go to `bdw`, then `stack-conservative-parallel-whippet`. to first go to `bdw`, then `stack-conservative-parallel-whippet`.
If you have an embedder with precise roots, use `semi` if If you have an embedder with precise roots, use `pcc`. That will shake
single-threaded, or `pcc` if multi-threaded. That will shake out out mutator/embedder bugs. Then if memory is tight, switch to
mutator/embedder bugs. Then if memory is tight, switch to
`parallel-whippet`, possibly `parallel-generational-whippet`. `parallel-whippet`, possibly `parallel-generational-whippet`.
If you are aiming for maximum simplicity and minimal code size (ten
kilobytes or so), use `semi`.
If you are writing a new project, you have a choice as to whether to pay If you are writing a new project, you have a choice as to whether to pay
the development cost of precise roots or not. If you choose to not have the development cost of precise roots or not. If you choose to not have
precise roots, then go for `stack-conservative-parallel-whippet` precise roots, then go for `stack-conservative-parallel-whippet`

View file

@ -42,6 +42,9 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc`
GC_STEM_semi = semi GC_STEM_semi = semi
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
GC_STEM_scc = scc
GC_CFLAGS_scc = -DGC_PRECISE_ROOTS=1
GC_STEM_pcc = pcc GC_STEM_pcc = pcc
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1

View file

@ -484,6 +484,41 @@ copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge,
} }
} }
static inline int
copy_space_forward_nonatomic(struct copy_space *space, struct gc_edge edge,
struct gc_ref old_ref, struct copy_space_allocator *alloc) {
GC_ASSERT(copy_space_object_region(old_ref) != space->active_region);
uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref);
if (forwarded) {
gc_edge_update(edge, gc_ref(forwarded));
return 0;
} else {
size_t size;
gc_trace_object(old_ref, NULL, NULL, NULL, &size);
struct gc_ref new_ref =
copy_space_allocate(alloc, space, size,
copy_space_gc_during_evacuation, NULL);
memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), size);
gc_object_forward_nonatomic(old_ref, new_ref);
gc_edge_update(edge, new_ref);
return 1;
}
}
static int
copy_space_forward_if_traced_nonatomic(struct copy_space *space,
struct gc_edge edge,
struct gc_ref old_ref) {
GC_ASSERT(copy_space_object_region(old_ref) != space->active_region);
uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref);
if (forwarded) {
gc_edge_update(edge, gc_ref(forwarded));
return 1;
}
return 0;
}
static inline int static inline int
copy_space_contains(struct copy_space *space, struct gc_ref ref) { copy_space_contains(struct copy_space *space, struct gc_ref ref) {
for (size_t i = 0; i < space->nextents; i++) for (size_t i = 0; i < space->nextents; i++)

669
src/scc.c Normal file
View file

@ -0,0 +1,669 @@
#include <pthread.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <string.h>
#include <unistd.h>
#include "gc-api.h"
#define GC_IMPL 1
#include "gc-internal.h"
#include "copy-space.h"
#include "debug.h"
#include "gc-align.h"
#include "gc-inline.h"
#include "gc-trace.h"
#include "large-object-space.h"
#include "serial-tracer.h"
#include "spin.h"
#include "scc-attrs.h"
struct gc_heap {
struct copy_space copy_space;
struct large_object_space large_object_space;
struct gc_extern_space *extern_space;
size_t large_object_pages;
pthread_mutex_t lock;
pthread_cond_t collector_cond;
pthread_cond_t mutator_cond;
size_t size;
int collecting;
int check_pending_ephemerons;
struct gc_pending_ephemerons *pending_ephemerons;
struct gc_finalizer_state *finalizer_state;
size_t mutator_count;
size_t paused_mutator_count;
size_t inactive_mutator_count;
struct gc_heap_roots *roots;
struct gc_mutator *mutators;
long count;
struct gc_tracer tracer;
double pending_ephemerons_size_factor;
double pending_ephemerons_size_slop;
struct gc_event_listener event_listener;
void *event_listener_data;
};
#define HEAP_EVENT(heap, event, ...) \
(heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__)
#define MUTATOR_EVENT(mut, event, ...) \
(mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__)
struct gc_mutator {
struct copy_space_allocator allocator;
struct gc_heap *heap;
struct gc_mutator_roots *roots;
void *event_listener_data;
struct gc_mutator *next;
struct gc_mutator *prev;
};
struct gc_trace_worker_data {
struct copy_space_allocator allocator;
};
static inline struct copy_space* heap_copy_space(struct gc_heap *heap) {
return &heap->copy_space;
}
static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) {
return &heap->large_object_space;
}
static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) {
return heap->extern_space;
}
static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) {
return mutator->heap;
}
static void
gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
struct gc_heap *heap,
struct gc_trace_worker *worker,
struct gc_trace_worker_data *data),
struct gc_tracer *tracer,
struct gc_heap *heap,
struct gc_trace_worker *worker) {
struct gc_trace_worker_data data;
copy_space_allocator_init(&data.allocator, heap_copy_space(heap));
f(tracer, heap, worker, &data);
copy_space_allocator_finish(&data.allocator, heap_copy_space(heap));
}
static inline int do_trace(struct gc_heap *heap, struct gc_edge edge,
struct gc_ref ref,
struct gc_trace_worker_data *data) {
if (!gc_ref_is_heap_object(ref))
return 0;
if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref)))
return copy_space_forward_nonatomic(heap_copy_space(heap), edge, ref,
&data->allocator);
else if (large_object_space_contains(heap_large_object_space(heap), ref))
return large_object_space_mark_object(heap_large_object_space(heap), ref);
else
return gc_extern_space_visit(heap_extern_space(heap), edge, ref);
}
static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge,
struct gc_trace_worker *worker) {
struct gc_ref ref = gc_edge_ref(edge);
struct gc_trace_worker_data *data = gc_trace_worker_data(worker);
int is_new = do_trace(heap, edge, ref, data);
if (is_new && heap->check_pending_ephemerons)
gc_resolve_pending_ephemerons(ref, heap);
return is_new;
}
int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
struct gc_ref ref = gc_edge_ref(edge);
if (!gc_ref_is_heap_object(ref))
return 0;
if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref)))
return copy_space_forward_if_traced_nonatomic(heap_copy_space(heap), edge,
ref);
if (large_object_space_contains(heap_large_object_space(heap), ref))
return large_object_space_is_copied(heap_large_object_space(heap), ref);
GC_CRASH();
}
static int mutators_are_stopping(struct gc_heap *heap) {
return atomic_load_explicit(&heap->collecting, memory_order_relaxed);
}
static inline void heap_lock(struct gc_heap *heap) {
pthread_mutex_lock(&heap->lock);
}
static inline void heap_unlock(struct gc_heap *heap) {
pthread_mutex_unlock(&heap->lock);
}
// with heap lock
static inline int all_mutators_stopped(struct gc_heap *heap) {
return heap->mutator_count ==
heap->paused_mutator_count + heap->inactive_mutator_count;
}
static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
mut->heap = heap;
mut->event_listener_data =
heap->event_listener.mutator_added(heap->event_listener_data);
copy_space_allocator_init(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
// We have no roots. If there is a GC currently in progress, we have
// nothing to add. Just wait until it's done.
while (mutators_are_stopping(heap))
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
mut->next = mut->prev = NULL;
struct gc_mutator *tail = heap->mutators;
if (tail) {
mut->next = tail;
tail->prev = mut;
}
heap->mutators = mut;
heap->mutator_count++;
heap_unlock(heap);
}
static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
MUTATOR_EVENT(mut, mutator_removed);
mut->heap = NULL;
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
heap->mutator_count--;
if (mut->next)
mut->next->prev = mut->prev;
if (mut->prev)
mut->prev->next = mut->next;
else
heap->mutators = mut->next;
// We have no roots. If there is a GC stop currently in progress,
// maybe tell the controller it can continue.
if (mutators_are_stopping(heap) && all_mutators_stopped(heap))
pthread_cond_signal(&heap->collector_cond);
heap_unlock(heap);
}
static void request_mutators_to_stop(struct gc_heap *heap) {
GC_ASSERT(!mutators_are_stopping(heap));
atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed);
}
static void allow_mutators_to_continue(struct gc_heap *heap) {
GC_ASSERT(mutators_are_stopping(heap));
GC_ASSERT(all_mutators_stopped(heap));
heap->paused_mutator_count--;
atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed);
GC_ASSERT(!mutators_are_stopping(heap));
pthread_cond_broadcast(&heap->mutator_cond);
}
static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) {
size_t previous = heap->large_object_pages;
heap->large_object_pages = npages;
GC_ASSERT(npages <= previous);
size_t bytes = (previous - npages) <<
heap_large_object_space(heap)->page_size_log2;
copy_space_reacquire_memory(heap_copy_space(heap), bytes);
}
void gc_mutator_set_roots(struct gc_mutator *mut,
struct gc_mutator_roots *roots) {
mut->roots = roots;
}
void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
heap->roots = roots;
}
void gc_heap_set_extern_space(struct gc_heap *heap,
struct gc_extern_space *space) {
heap->extern_space = space;
}
static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap,
void *trace_data) GC_ALWAYS_INLINE;
static inline void
tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
struct gc_trace_worker *worker = trace_data;
if (trace_edge(heap, edge, worker))
gc_trace_worker_enqueue(worker, gc_edge_ref(edge));
}
static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
struct gc_trace_worker *worker) {
#ifdef DEBUG
if (copy_space_contains(heap_copy_space(heap), ref))
GC_ASSERT(copy_space_object_region(ref) == heap_copy_space(heap)->active_region);
#endif
gc_trace_object(ref, tracer_visit, heap, worker, NULL);
}
static inline void trace_root(struct gc_root root, struct gc_heap *heap,
struct gc_trace_worker *worker) {
switch (root.kind) {
case GC_ROOT_KIND_HEAP:
gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker);
break;
case GC_ROOT_KIND_MUTATOR:
gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker);
break;
case GC_ROOT_KIND_RESOLVED_EPHEMERONS:
gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit,
heap, worker);
break;
case GC_ROOT_KIND_EDGE:
tracer_visit(root.edge, heap, worker);
break;
default:
GC_CRASH();
}
}
static void wait_for_mutators_to_stop(struct gc_heap *heap) {
heap->paused_mutator_count++;
while (!all_mutators_stopped(heap))
pthread_cond_wait(&heap->collector_cond, &heap->lock);
}
void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
struct gc_edge edge, struct gc_ref new_val) {
}
static void
pause_mutator_for_collection(struct gc_heap *heap,
struct gc_mutator *mut) GC_NEVER_INLINE;
static void
pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) {
GC_ASSERT(mutators_are_stopping(heap));
GC_ASSERT(!all_mutators_stopped(heap));
MUTATOR_EVENT(mut, mutator_stopped);
heap->paused_mutator_count++;
if (all_mutators_stopped(heap))
pthread_cond_signal(&heap->collector_cond);
do {
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
} while (mutators_are_stopping(heap));
heap->paused_mutator_count--;
MUTATOR_EVENT(mut, mutator_restarted);
}
static void
pause_mutator_for_collection_with_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
static void
pause_mutator_for_collection_with_lock(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
GC_ASSERT(mutators_are_stopping(heap));
MUTATOR_EVENT(mut, mutator_stopping);
pause_mutator_for_collection(heap, mut);
}
static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
GC_ASSERT(mutators_are_stopping(heap));
MUTATOR_EVENT(mut, mutator_stopping);
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
pause_mutator_for_collection(heap, mut);
heap_unlock(heap);
}
static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
while (mutators_are_stopping(mutator_heap(mut)))
pause_mutator_for_collection_without_lock(mut);
}
static int maybe_grow_heap(struct gc_heap *heap) {
return 0;
}
static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap,
void *unused) {
gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
}
static void add_roots(struct gc_heap *heap) {
for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next)
gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut));
gc_tracer_add_root(&heap->tracer, gc_root_heap(heap));
gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL);
}
static void resolve_ephemerons_lazily(struct gc_heap *heap) {
heap->check_pending_ephemerons = 0;
}
static void resolve_ephemerons_eagerly(struct gc_heap *heap) {
heap->check_pending_ephemerons = 1;
gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
}
static void trace_resolved_ephemerons(struct gc_heap *heap) {
for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
resolved;
resolved = gc_pop_resolved_ephemerons(heap)) {
gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved));
gc_tracer_trace(&heap->tracer);
}
}
static void resolve_finalizers(struct gc_heap *heap) {
for (size_t priority = 0;
priority < gc_finalizer_priority_count();
priority++) {
if (gc_resolve_finalizers(heap->finalizer_state, priority,
visit_root_edge, heap, NULL)) {
gc_tracer_trace(&heap->tracer);
trace_resolved_ephemerons(heap);
}
}
gc_notify_finalizers(heap->finalizer_state, heap);
}
static void sweep_ephemerons(struct gc_heap *heap) {
return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
}
static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
static void collect(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
struct copy_space *copy_space = heap_copy_space(heap);
struct large_object_space *lospace = heap_large_object_space(heap);
struct gc_extern_space *exspace = heap_extern_space(heap);
MUTATOR_EVENT(mut, mutator_cause_gc);
DEBUG("start collect #%ld:\n", heap->count);
HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING);
large_object_space_start_gc(lospace, 0);
gc_extern_space_start_gc(exspace, 0);
resolve_ephemerons_lazily(heap);
HEAP_EVENT(heap, requesting_stop);
request_mutators_to_stop(heap);
HEAP_EVENT(heap, waiting_for_stop);
wait_for_mutators_to_stop(heap);
HEAP_EVENT(heap, mutators_stopped);
copy_space_flip(copy_space);
gc_tracer_prepare(&heap->tracer);
add_roots(heap);
HEAP_EVENT(heap, roots_traced);
gc_tracer_trace(&heap->tracer);
HEAP_EVENT(heap, heap_traced);
resolve_ephemerons_eagerly(heap);
trace_resolved_ephemerons(heap);
HEAP_EVENT(heap, ephemerons_traced);
resolve_finalizers(heap);
HEAP_EVENT(heap, finalizers_traced);
sweep_ephemerons(heap);
gc_tracer_release(&heap->tracer);
copy_space_finish_gc(copy_space);
large_object_space_finish_gc(lospace, 0);
gc_extern_space_finish_gc(exspace, 0);
heap->count++;
heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection);
size_t live_size = (copy_space->allocated_bytes_at_last_gc +
large_object_space_size_at_last_collection(lospace));
HEAP_EVENT(heap, live_data_size, live_size);
maybe_grow_heap(heap);
if (!copy_space_page_out_blocks_until_memory_released(copy_space)) {
fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
heap->size, copy_space->nslabs);
GC_CRASH();
}
HEAP_EVENT(heap, restarting_mutators);
allow_mutators_to_continue(heap);
}
static void trigger_collection(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut);
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
long epoch = heap->count;
while (mutators_are_stopping(heap))
pause_mutator_for_collection_with_lock(mut);
if (epoch == heap->count)
collect(mut);
heap_unlock(heap);
}
void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) {
trigger_collection(mut);
}
static void* allocate_large(struct gc_mutator *mut, size_t size) {
struct gc_heap *heap = mutator_heap(mut);
struct large_object_space *space = heap_large_object_space(heap);
size_t npages = large_object_space_npages(space, size);
copy_space_request_release_memory(heap_copy_space(heap),
npages << space->page_size_log2);
while (!copy_space_page_out_blocks_until_memory_released(heap_copy_space(heap)))
trigger_collection(mut);
atomic_fetch_add(&heap->large_object_pages, npages);
void *ret = large_object_space_alloc(space, npages);
if (!ret)
ret = large_object_space_obtain_and_alloc(space, npages);
if (!ret) {
perror("weird: we have the space but mmap didn't work");
GC_CRASH();
}
return ret;
}
static void get_more_empty_blocks_for_mutator(void *mut) {
trigger_collection(mut);
}
void* gc_allocate_slow(struct gc_mutator *mut, size_t size) {
GC_ASSERT(size > 0); // allocating 0 bytes would be silly
if (size > gc_allocator_large_threshold())
return allocate_large(mut, size);
struct gc_ref ret = copy_space_allocate(&mut->allocator,
heap_copy_space(mutator_heap(mut)),
size,
get_more_empty_blocks_for_mutator,
mut);
gc_clear_fresh_allocation(ret, size);
return gc_ref_heap_object(ret);
}
void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
return gc_allocate(mut, size);
}
struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) {
return gc_allocate(mut, gc_ephemeron_size());
}
void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
struct gc_ref key, struct gc_ref value) {
gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value);
}
struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) {
return heap->pending_ephemerons;
}
unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
return heap->count;
}
struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) {
return gc_allocate(mut, gc_finalizer_size());
}
void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
unsigned priority, struct gc_ref object,
struct gc_ref closure) {
gc_finalizer_init_internal(finalizer, object, closure);
gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state,
finalizer, priority);
// No write barrier.
}
struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) {
return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state);
}
void gc_set_finalizer_callback(struct gc_heap *heap,
gc_finalizer_callback callback) {
gc_finalizer_state_set_callback(heap->finalizer_state, callback);
}
static int heap_prepare_pending_ephemerons(struct gc_heap *heap) {
struct gc_pending_ephemerons *cur = heap->pending_ephemerons;
size_t target = heap->size * heap->pending_ephemerons_size_factor;
double slop = heap->pending_ephemerons_size_slop;
heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop);
return !!heap->pending_ephemerons;
}
struct gc_options {
struct gc_common_options common;
};
int gc_option_from_string(const char *str) {
return gc_common_option_from_string(str);
}
struct gc_options* gc_allocate_options(void) {
struct gc_options *ret = malloc(sizeof(struct gc_options));
gc_init_common_options(&ret->common);
return ret;
}
int gc_options_set_int(struct gc_options *options, int option, int value) {
return gc_common_options_set_int(&options->common, option, value);
}
int gc_options_set_size(struct gc_options *options, int option,
size_t value) {
return gc_common_options_set_size(&options->common, option, value);
}
int gc_options_set_double(struct gc_options *options, int option,
double value) {
return gc_common_options_set_double(&options->common, option, value);
}
int gc_options_parse_and_set(struct gc_options *options, int option,
const char *value) {
return gc_common_options_parse_and_set(&options->common, option, value);
}
static int heap_init(struct gc_heap *heap, const struct gc_options *options) {
// *heap is already initialized to 0.
pthread_mutex_init(&heap->lock, NULL);
pthread_cond_init(&heap->mutator_cond, NULL);
pthread_cond_init(&heap->collector_cond, NULL);
heap->size = options->common.heap_size;
if (options->common.parallelism != 1)
fprintf(stderr, "warning: parallelism unimplemented in semispace copying collector\n");
if (!gc_tracer_init(&heap->tracer, heap, 1))
GC_CRASH();
heap->pending_ephemerons_size_factor = 0.005;
heap->pending_ephemerons_size_slop = 0.5;
if (!heap_prepare_pending_ephemerons(heap))
GC_CRASH();
heap->finalizer_state = gc_make_finalizer_state();
if (!heap->finalizer_state)
GC_CRASH();
return 1;
}
int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
struct gc_heap **heap, struct gc_mutator **mut,
struct gc_event_listener event_listener,
void *event_listener_data) {
GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT);
GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD);
GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator));
GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(),
offsetof(struct copy_space_allocator, hp));
GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
offsetof(struct copy_space_allocator, limit));
if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
fprintf(stderr, "fixed heap size is currently required\n");
return 0;
}
*heap = calloc(1, sizeof(struct gc_heap));
if (!*heap) GC_CRASH();
if (!heap_init(*heap, options))
GC_CRASH();
(*heap)->event_listener = event_listener;
(*heap)->event_listener_data = event_listener_data;
HEAP_EVENT(*heap, init, (*heap)->size);
struct copy_space *space = heap_copy_space(*heap);
if (!copy_space_init(space, (*heap)->size)) {
free(*heap);
*heap = NULL;
return 0;
}
if (!large_object_space_init(heap_large_object_space(*heap), *heap))
GC_CRASH();
*mut = calloc(1, sizeof(struct gc_mutator));
if (!*mut) GC_CRASH();
add_mutator(*heap, *mut);
return 1;
}
struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base,
struct gc_heap *heap) {
struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator));
if (!ret)
GC_CRASH();
add_mutator(heap, ret);
return ret;
}
void gc_finish_for_thread(struct gc_mutator *mut) {
remove_mutator(mutator_heap(mut), mut);
free(mut);
}
static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
GC_ASSERT(mut->next == NULL);
copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
heap_lock(heap);
heap->inactive_mutator_count++;
if (all_mutators_stopped(heap))
pthread_cond_signal(&heap->collector_cond);
heap_unlock(heap);
}
static void reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
heap_lock(heap);
while (mutators_are_stopping(heap))
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
heap->inactive_mutator_count--;
heap_unlock(heap);
}
void* gc_call_without_gc(struct gc_mutator *mut,
void* (*f)(void*),
void *data) {
struct gc_heap *heap = mutator_heap(mut);
deactivate_mutator(heap, mut);
void *ret = f(data);
reactivate_mutator(heap, mut);
return ret;
}