Remove scc

PCC with GC_PARALLEL=0 is exactly equivalent to SCC. Also now that PCC will dynamically avoid atomic forwarding if parallelism is disabled at run-time, there is no need to keep SCC around.
2025-07-02 23:50:47 +02:00 · 2024-09-10 11:06:40 +02:00 · 2024-09-10 11:06:40 +02:00 · 1ff082705e
commit 1ff082705e
parent 6545b34073
10 changed files with 78 additions and 832 deletions
--- a/4
+++ b/4
@ -2,7 +2,6 @@ TESTS = quads mt-gcbench ephemerons finalizers
 COLLECTORS = \
 	bdw \
 	semi \
-	scc \
 	pcc \
 	\
 	mmc \
@ -64,9 +63,6 @@ GC_LIBS_bdw        = `pkg-config --libs bdw-gc`
 GC_STEM_semi       = semi
 GC_CFLAGS_semi     = -DGC_PRECISE_ROOTS=1

-GC_STEM_scc       = scc
-GC_CFLAGS_scc     = -DGC_PRECISE_ROOTS=1
-
 GC_STEM_pcc       = pcc
 GC_CFLAGS_pcc     = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1

--- a/README.md
+++ b/README.md
@ -18,11 +18,10 @@ See the [documentation](./doc/README.md).
 - Finalization (supporting resuscitation)
 - Ephemerons (except on `bdw`, which has a polyfill)
 - Conservative roots (optionally with `mmc` or always with `bdw`)
- - Precise roots (optionally with `mmc` or always with `semi` / `pcc` /
-   `scc`)
+ - Precise roots (optionally with `mmc` or always with `semi` / `pcc`)
 - Precise embedder-parameterized heap tracing (except with `bdw`)
 - Conservative heap tracing (optionally with `mmc`, always with `bdw`)
- - Parallel tracing (except `semi` and `scc`)
+ - Parallel tracing (except `semi`)
 - Parallel mutators (except `semi`)
 - Inline allocation / write barrier fast paths (supporting JIT)
 - One unified API with no-overhead abstraction: switch collectors when
@ -36,8 +35,8 @@ See the [documentation](./doc/README.md).
 * [src/](./src/): The actual GC implementation, containing a number of
   collector implementations.  The embedder chooses which collector to
   use at compile-time.  See the [documentation](./doc/collectors.md)
-   for more on the different collectors (`semi`, `bdw`, `scc`, `pcc`,
-   and the different flavors of `mmc`).
+   for more on the different collectors (`semi`, `bdw`, `pcc`, and the
+   different flavors of `mmc`).
 * [benchmarks/](./benchmarks/): Benchmarks.  A work in progress.
 * [test/](./test/): A dusty attic of minimal testing.

--- a/api/scc-attrs.h
+++ b/api/scc-attrs.h
@ -1,60 +0,0 @@
-#ifndef SCC_ATTRS_H
-#define SCC_ATTRS_H
-
-#include "gc-config.h"
-#include "gc-assert.h"
-#include "gc-attrs.h"
-
-static const uintptr_t GC_ALIGNMENT = 8;
-static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192;
-
-static inline enum gc_allocator_kind gc_allocator_kind(void) {
-  return GC_ALLOCATOR_INLINE_BUMP_POINTER;
-}
-static inline size_t gc_allocator_small_granule_size(void) {
-  return GC_ALIGNMENT;
-}
-static inline size_t gc_allocator_large_threshold(void) {
-  return GC_LARGE_OBJECT_THRESHOLD;
-}
-
-static inline size_t gc_allocator_allocation_pointer_offset(void) {
-  return sizeof(uintptr_t) * 0;
-}
-static inline size_t gc_allocator_allocation_limit_offset(void) {
-  return sizeof(uintptr_t) * 1;
-}
-
-static inline size_t gc_allocator_freelist_offset(size_t size) {
-  GC_CRASH();
-}
-
-static inline size_t gc_allocator_alloc_table_alignment(void) {
-  return 0;
-}
-static inline uint8_t gc_allocator_alloc_table_begin_pattern(void) {
-  GC_CRASH();
-}
-static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
-  GC_CRASH();
-}
-
-static inline int gc_allocator_needs_clear(void) {
-  return 0;
-}
-
-static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
-  return GC_WRITE_BARRIER_NONE;
-}
-static inline size_t gc_write_barrier_card_table_alignment(void) {
-  GC_CRASH();
-}
-static inline size_t gc_write_barrier_card_size(void) {
-  GC_CRASH();
-}
-
-static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
-  return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
-}
-
-#endif // SCC_ATTRS_H
--- a/doc/collector-pcc.md
+++ b/doc/collector-pcc.md
@ -1,38 +1,84 @@
 # Parallel copying collector

-Whippet's `pcc` collector is a copying collector, exactly like
-[`scc`](./collector-scc.md), but supporting multiple tracing threads.
-See the discussion of `scc` for a general overview.
+Whippet's `pcc` collector is a copying collector, like the more simple
+[`semi`](./collector-semi.md), but supporting multiple mutator threads,
+multiple tracing threads, and using an external FIFO worklist instead of
+a Cheney worklist.

-Also like `scc` and `semi`, `pcc` is not generational yet.  If and when
-`pcc` grows a young generation, it would be a great collector.
+Like `semi`, `pcc` traces by evacuation: it moves all live objects on
+every collection.  (Exception:  objects larger than 8192 bytes are
+placed into a partitioned space which traces by marking in place instead
+of copying.)  Evacuation requires precise roots, so if your embedder
+does not support precise roots, `pcc` is not for you.
+
+Again like `semi`, `pcc` generally requires a heap size at least twice
+as large as the maximum live heap size, and performs best with ample
+heap sizes; between 3× and 5× is best.
+
+Overall, `pcc` is a better version of `semi`.  It should have broadly
+the same performance characteristics with a single mutator and with
+parallelism disabled, additionally allowing multiple mutators, and
+scaling better with multiple tracing threads.
+
+Also like `semi`, `pcc` is not generational yet.  If and when `pcc`
+grows a young generation, it would be a great collector.

 ## Implementation notes

+Unlike `semi` which has a single global bump-pointer allocation region,
+`pcc` structures the heap into 64-kB blocks.  In this way it supports
+multiple mutator threads: mutators do local bump-pointer allocation into
+their own block, and when their block is full, they fetch another from
+the global store.
+
+The block size is 64 kB, but really it's 128 kB, because each block has
+two halves: the active region and the copy reserve.  Dividing each block
+in two allows the collector to easily grow and shrink the heap while
+ensuring there is always enough reserve space.
+
+Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
+a slab.  The first block in a slab is used by the collector itself, to
+keep metadata for the rest of the blocks, for example a chain pointer
+allowing blocks to be collected in lists, a saved allocation pointer for
+partially-filled blocks, whether the block is paged in or out, and so
+on.
+
 `pcc` supports tracing in parallel.  This mechanism works somewhat like
 allocation, in which multiple trace workers compete to evacuate objects
 into their local allocation buffers; when an allocation buffer is full,
 the trace worker grabs another, just like mutators do.

-To maintain a queue of objects to trace, `pcc` uses the [fine-grained
-work-stealing parallel tracer](../src/parallel-tracer.h) originally
-developed for [Whippet's Immix-like collector](./collector-whippet.md).
-Each trace worker maintains a [local queue of objects that need
-tracing](../src/local-worklist.h), which currently has 1024 entries.  If
-the local queue becomes full, the worker will publish 3/4 of those
-entries to the worker's [shared worklist](../src/shared-worklist.h).
-When a worker runs out of local work, it will first try to remove work
-from its own shared worklist, then will try to steal from other workers.
+Unlike the simple semi-space collector which uses a Cheney grey
+worklist, `pcc` uses an external worklist.  If parallelism is disabled
+at compile-time, it uses a [simple first-in, first-out queue of objects
+to be traced](../src/simple-worklist.h) originally developed for
+[Whippet's Immix-like collector](./collector-whippet.md).  Like a Cheney
+worklist, this should result in objects being copied in breadth-first
+order.  The literature would suggest that depth-first is generally
+better for locality, but that preserving allocation order is generally
+best.  This is something to experiment with in the future.

-If only one tracing thread is enabled (`parallelism=1`), `pcc` uses
+If parallelism is enabled, as it is by default, `pcc` uses the
+[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h)
+originally developed for [Whippet's Immix-like
+collector](./collector-whippet.md).  Each trace worker maintains a
+[local queue of objects that need tracing](../src/local-worklist.h),
+which currently has 1024 entries.  If the local queue becomes full, the
+worker will publish 3/4 of those entries to the worker's [shared
+worklist](../src/shared-worklist.h).  When a worker runs out of local
+work, it will first try to remove work from its own shared worklist,
+then will try to steal from other workers.
+
+If only one tracing thread is enabled at run-time (`parallelism=1`) (or
+if parallelism is disabled at compile-time), `pcc` will evacuate by
 non-atomic forwarding, but if multiple threads compete to evacuate
 objects, `pcc` uses [atomic compare-and-swap instead of simple
 forwarding pointer updates](./manual.md#forwarding-objects).  This
 imposes around a ~30% performance penalty but having multiple tracing
 threads is generally worth it, unless the object graph is itself serial.

-As with `scc`, the memory used for the external worklist is dynamically
-allocated from the OS and is not currently counted as contributing to
-the heap size.  If you are targetting a microcontroller or something,
-probably you need to choose a different kind of collector that never
-dynamically allocates, such as `semi`.
+The memory used for the external worklist is dynamically allocated from
+the OS and is not currently counted as contributing to the heap size.
+If you are targetting a microcontroller or something, probably you need
+to choose a different kind of collector that never dynamically
+allocates, such as `semi`.
--- a/doc/collector-scc.md
+++ b/doc/collector-scc.md
@ -1,62 +0,0 @@
-# Serial copying collector
-
-Whippet's `scc` collector is a copying collector, like the more simple
-[`semi`](./collector-semi.md), but supporting multiple mutator threads,
-and using an external FIFO worklist instead of a Cheney worklist.
-
-Like `semi`, `scc` traces by evacuation: it moves all live objects on
-every collection.  (Exception:  objects larger than 8192 bytes are
-placed into a partitioned space which traces by marking in place instead
-of copying.)  Evacuation requires precise roots, so if your embedder
-does not support precise roots, `scc` is not for you.
-
-Again like `semi`, `scc` generally requires a heap size at least twice
-as large as the maximum live heap size, and performs best with ample
-heap sizes; between 3× and 5× is best.
-
-Overall, `scc` is most useful for isolating the performance implications
-of using a block-structured heap and of using an external worklist
-rather than a Cheney worklist as `semi` does.  It also supports multiple
-mutator threads, so it is generally more useful than `semi`.  Also,
-compared to `pcc`, we can measure the overhead that `pcc` imposes to
-atomically forward objects.
-
-But given a choice, you probably want `pcc`; though it's slower with
-only one tracing thread, once you have more than once tracing thread
-it's a win over `scc`.
-
-## Implementation notes
-
-Unlike `semi` which has a single global bump-pointer allocation region,
-`scc` structures the heap into 64-kB blocks.  In this way it supports
-multiple mutator threads: mutators do local bump-pointer allocation into
-their own block, and when their block is full, they fetch another from
-the global store.
-
-The block size is 64 kB, but really it's 128 kB, because each block has
-two halves: the active region and the copy reserve.  Dividing each block
-in two allows the collector to easily grow and shrink the heap while
-ensuring there is always enough reserve space.
-
-Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
-a slab.  The first block in a slab is used by the collector itself, to
-keep metadata for the rest of the blocks, for example a chain pointer
-allowing blocks to be collected in lists, a saved allocation pointer for
-partially-filled blocks, whether the block is paged in or out, and so
-on.
-
-Unlike the simple semi-space collector which uses a Cheney grey
-worklist, `scc` uses a [simple first-in, first-out queue of objects to
-be traced](../src/simple-worklist.h) originally developed for [Whippet's
-Immix-like collector](./collector-whippet.md).  Like a Cheney worklist,
-this should result in objects being copied in breadth-first order.  The
-literature would suggest that depth-first is generally better for
-locality, but that preserving allocation order is generally best.  This
-is something to experiment with in the future.
-
-The memory used for the external worklist is dynamically allocated from
-the OS and is not currently counted as contributing to the heap size.
-If you are targetting a microcontroller or something, probably you need
-to choose a different kind of collector that never dynamically
-allocates, such as `semi`.
-
--- a/doc/collectors.md
+++ b/doc/collectors.md
@ -3,10 +3,8 @@
 Whippet has five collectors currently:
 - [Semi-space collector (`semi`)](./collector-semi.md): For
   single-threaded embedders who are not too tight on memory.
- - [Serial copying collector (`scc`)](./collector-scc.md): Like `semi`,
-   but with support for multiple mutator threads.
- - [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `scc`,
-   but with support for multiple tracing threads.
+ - [Parallel copying collector (`pcc`)](./collector-pcc.md): Like `semi`,
+   but with support for multiple mutator and tracing threads.
 - [Mostly marking collector (`mmc`)](./collector-mmc.md):
   Immix-inspired collector.  Optionally parallel, conservative (stack
   and/or heap), and/or generational.
@ -26,8 +24,6 @@ out mutator/embedder bugs.  Then if memory is tight, switch to
 If you are aiming for maximum simplicity and minimal code size (ten
 kilobytes or so), use `semi`.

-Only use `scc` if you are investigating GC internals.
-
 If you are writing a new project, you have a choice as to whether to pay
 the development cost of precise roots or not.  If you choose to not have
 precise roots, then go for `stack-conservative-parallel-mmc` directly.
--- a/embed.mk
+++ b/embed.mk
@ -42,9 +42,6 @@ GC_LIBS_bdw        = `pkg-config --libs bdw-gc`
 GC_STEM_semi       = semi
 GC_CFLAGS_semi     = -DGC_PRECISE_ROOTS=1

-GC_STEM_scc        = scc
-GC_CFLAGS_scc      = -DGC_PRECISE_ROOTS=1
-
 GC_STEM_pcc        = pcc
 GC_CFLAGS_pcc      = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1

--- a/src/copy-space.h
+++ b/src/copy-space.h
@ -523,7 +523,7 @@ static inline int
 copy_space_forward(struct copy_space *space, struct gc_edge edge,
                   struct gc_ref old_ref,
                   struct copy_space_allocator *alloc) {
-  if (space->atomic_forward)
+  if (GC_PARALLEL && space->atomic_forward)
    return copy_space_forward_atomic(space, edge, old_ref, alloc);
  return copy_space_forward_nonatomic(space, edge, old_ref, alloc);
 }
@ -531,7 +531,7 @@ copy_space_forward(struct copy_space *space, struct gc_edge edge,
 static inline int
 copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge,
                             struct gc_ref old_ref) {
-  if (space->atomic_forward)
+  if (GC_PARALLEL && space->atomic_forward)
    return copy_space_forward_if_traced_atomic(space, edge, old_ref);
  return copy_space_forward_if_traced_nonatomic(space, edge, old_ref);
 }
--- a/src/pcc.c
+++ b/src/pcc.c
@ -15,7 +15,11 @@
 #include "gc-inline.h"
 #include "gc-trace.h"
 #include "large-object-space.h"
+#if GC_PARALLEL
 #include "parallel-tracer.h"
+#else
+#include "serial-tracer.h"
+#endif
 #include "spin.h"
 #include "pcc-attrs.h"

--- a/src/scc.c
+++ b/src/scc.c
@ -1,670 +0,0 @@
-#include <pthread.h>
-#include <stdatomic.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "gc-api.h"
-
-#define GC_IMPL 1
-#include "gc-internal.h"
-
-#include "copy-space.h"
-#include "debug.h"
-#include "gc-align.h"
-#include "gc-inline.h"
-#include "gc-trace.h"
-#include "large-object-space.h"
-#include "serial-tracer.h"
-#include "spin.h"
-#include "scc-attrs.h"
-
-struct gc_heap {
-  struct copy_space copy_space;
-  struct large_object_space large_object_space;
-  struct gc_extern_space *extern_space;
-  size_t large_object_pages;
-  pthread_mutex_t lock;
-  pthread_cond_t collector_cond;
-  pthread_cond_t mutator_cond;
-  size_t size;
-  int collecting;
-  int check_pending_ephemerons;
-  struct gc_pending_ephemerons *pending_ephemerons;
-  struct gc_finalizer_state *finalizer_state;
-  size_t mutator_count;
-  size_t paused_mutator_count;
-  size_t inactive_mutator_count;
-  struct gc_heap_roots *roots;
-  struct gc_mutator *mutators;
-  long count;
-  struct gc_tracer tracer;
-  double pending_ephemerons_size_factor;
-  double pending_ephemerons_size_slop;
-  struct gc_event_listener event_listener;
-  void *event_listener_data;
-};
-
-#define HEAP_EVENT(heap, event, ...)                                    \
-  (heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__)
-#define MUTATOR_EVENT(mut, event, ...)                                  \
-  (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__)
-
-struct gc_mutator {
-  struct copy_space_allocator allocator;
-  struct gc_heap *heap;
-  struct gc_mutator_roots *roots;
-  void *event_listener_data;
-  struct gc_mutator *next;
-  struct gc_mutator *prev;
-};
-
-struct gc_trace_worker_data {
-  struct copy_space_allocator allocator;
-};
-
-static inline struct copy_space* heap_copy_space(struct gc_heap *heap) {
-  return &heap->copy_space;
-}
-static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) {
-  return &heap->large_object_space;
-}
-static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) {
-  return heap->extern_space;
-}
-static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) {
-  return mutator->heap;
-}
-
-static void
-gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
-                                         struct gc_heap *heap,
-                                         struct gc_trace_worker *worker,
-                                         struct gc_trace_worker_data *data),
-                               struct gc_tracer *tracer,
-                               struct gc_heap *heap,
-                               struct gc_trace_worker *worker) {
-  struct gc_trace_worker_data data;
-  copy_space_allocator_init(&data.allocator);
-  f(tracer, heap, worker, &data);
-  copy_space_allocator_finish(&data.allocator, heap_copy_space(heap));
-}
-
-static inline int do_trace(struct gc_heap *heap, struct gc_edge edge,
-                           struct gc_ref ref,
-                           struct gc_trace_worker_data *data) {
-  if (!gc_ref_is_heap_object(ref))
-    return 0;
-  if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref)))
-    return copy_space_forward_nonatomic(heap_copy_space(heap), edge, ref,
-                                        &data->allocator);
-  else if (large_object_space_contains(heap_large_object_space(heap), ref))
-    return large_object_space_mark_object(heap_large_object_space(heap), ref);
-  else
-    return gc_extern_space_visit(heap_extern_space(heap), edge, ref);
-}
-
-static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge,
-                             struct gc_trace_worker *worker) {
-  struct gc_ref ref = gc_edge_ref(edge);
-  struct gc_trace_worker_data *data = gc_trace_worker_data(worker);
-  int is_new = do_trace(heap, edge, ref, data);
-
-  if (is_new && heap->check_pending_ephemerons)
-    gc_resolve_pending_ephemerons(ref, heap);
-
-  return is_new;
-}
-
-int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
-  struct gc_ref ref = gc_edge_ref(edge);
-  if (!gc_ref_is_heap_object(ref))
-    return 0;
-  if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref)))
-    return copy_space_forward_if_traced_nonatomic(heap_copy_space(heap), edge,
-                                                  ref);
-  if (large_object_space_contains(heap_large_object_space(heap), ref))
-    return large_object_space_is_copied(heap_large_object_space(heap), ref);
-  GC_CRASH();
-}
-
-static int mutators_are_stopping(struct gc_heap *heap) {
-  return atomic_load_explicit(&heap->collecting, memory_order_relaxed);
-}
-
-static inline void heap_lock(struct gc_heap *heap) {
-  pthread_mutex_lock(&heap->lock);
-}
-static inline void heap_unlock(struct gc_heap *heap) {
-  pthread_mutex_unlock(&heap->lock);
-}
-
-// with heap lock
-static inline int all_mutators_stopped(struct gc_heap *heap) {
-  return heap->mutator_count ==
-    heap->paused_mutator_count + heap->inactive_mutator_count;
-}
-
-static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
-  mut->heap = heap;
-  mut->event_listener_data =
-    heap->event_listener.mutator_added(heap->event_listener_data);
-  copy_space_allocator_init(&mut->allocator);
-  heap_lock(heap);
-  // We have no roots.  If there is a GC currently in progress, we have
-  // nothing to add.  Just wait until it's done.
-  while (mutators_are_stopping(heap))
-    pthread_cond_wait(&heap->mutator_cond, &heap->lock);
-  mut->next = mut->prev = NULL;
-  struct gc_mutator *tail = heap->mutators;
-  if (tail) {
-    mut->next = tail;
-    tail->prev = mut;
-  }
-  heap->mutators = mut;
-  heap->mutator_count++;
-  heap_unlock(heap);
-}
-
-static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
-  MUTATOR_EVENT(mut, mutator_removed);
-  mut->heap = NULL;
-  copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
-  heap_lock(heap);
-  heap->mutator_count--;
-  if (mut->next)
-    mut->next->prev = mut->prev;
-  if (mut->prev)
-    mut->prev->next = mut->next;
-  else
-    heap->mutators = mut->next;
-  // We have no roots.  If there is a GC stop currently in progress,
-  // maybe tell the controller it can continue.
-  if (mutators_are_stopping(heap) && all_mutators_stopped(heap))
-    pthread_cond_signal(&heap->collector_cond);
-  heap_unlock(heap);
-}
-
-static void request_mutators_to_stop(struct gc_heap *heap) {
-  GC_ASSERT(!mutators_are_stopping(heap));
-  atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed);
-}
-
-static void allow_mutators_to_continue(struct gc_heap *heap) {
-  GC_ASSERT(mutators_are_stopping(heap));
-  GC_ASSERT(all_mutators_stopped(heap));
-  heap->paused_mutator_count--;
-  atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed);
-  GC_ASSERT(!mutators_are_stopping(heap));
-  pthread_cond_broadcast(&heap->mutator_cond);
-}
-
-static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) {
-  size_t previous = heap->large_object_pages;
-  heap->large_object_pages = npages;
-  GC_ASSERT(npages <= previous);
-  size_t bytes = (previous - npages) <<
-    heap_large_object_space(heap)->page_size_log2;
-  copy_space_reacquire_memory(heap_copy_space(heap), bytes);
-}
-
-void gc_mutator_set_roots(struct gc_mutator *mut,
-                          struct gc_mutator_roots *roots) {
-  mut->roots = roots;
-}
-void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
-  heap->roots = roots;
-}
-void gc_heap_set_extern_space(struct gc_heap *heap,
-                              struct gc_extern_space *space) {
-  heap->extern_space = space;
-}
-
-static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap,
-                                void *trace_data) GC_ALWAYS_INLINE;
-static inline void
-tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
-  struct gc_trace_worker *worker = trace_data;
-  if (trace_edge(heap, edge, worker))
-    gc_trace_worker_enqueue(worker, gc_edge_ref(edge));
-}
-
-static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
-                             struct gc_trace_worker *worker) {
-#ifdef DEBUG
-  if (copy_space_contains(heap_copy_space(heap), ref))
-    GC_ASSERT(copy_space_object_region(ref) == heap_copy_space(heap)->active_region);
-#endif
-  gc_trace_object(ref, tracer_visit, heap, worker, NULL);
-}
-
-static inline void trace_root(struct gc_root root, struct gc_heap *heap,
-                              struct gc_trace_worker *worker) {
-  switch (root.kind) {
-  case GC_ROOT_KIND_HEAP:
-    gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker);
-    break;
-  case GC_ROOT_KIND_MUTATOR:
-    gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker);
-    break;
-  case GC_ROOT_KIND_RESOLVED_EPHEMERONS:
-    gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit,
-                                 heap, worker);
-    break;
-  case GC_ROOT_KIND_EDGE:
-    tracer_visit(root.edge, heap, worker);
-    break;
-  default:
-    GC_CRASH();
-  }
-}
-
-static void wait_for_mutators_to_stop(struct gc_heap *heap) {
-  heap->paused_mutator_count++;
-  while (!all_mutators_stopped(heap))
-    pthread_cond_wait(&heap->collector_cond, &heap->lock);
-}
-
-void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
-                             struct gc_edge edge, struct gc_ref new_val) {
-}
-
-static void
-pause_mutator_for_collection(struct gc_heap *heap,
-                             struct gc_mutator *mut) GC_NEVER_INLINE;
-static void
-pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) {
-  GC_ASSERT(mutators_are_stopping(heap));
-  GC_ASSERT(!all_mutators_stopped(heap));
-  MUTATOR_EVENT(mut, mutator_stopped);
-  heap->paused_mutator_count++;
-  if (all_mutators_stopped(heap))
-    pthread_cond_signal(&heap->collector_cond);
-
-  do {
-    pthread_cond_wait(&heap->mutator_cond, &heap->lock);
-  } while (mutators_are_stopping(heap));
-  heap->paused_mutator_count--;
-
-  MUTATOR_EVENT(mut, mutator_restarted);
-}
-
-static void
-pause_mutator_for_collection_with_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
-static void
-pause_mutator_for_collection_with_lock(struct gc_mutator *mut) {
-  struct gc_heap *heap = mutator_heap(mut);
-  GC_ASSERT(mutators_are_stopping(heap));
-  MUTATOR_EVENT(mut, mutator_stopping);
-  pause_mutator_for_collection(heap, mut);
-}
-
-static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
-static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
-  struct gc_heap *heap = mutator_heap(mut);
-  GC_ASSERT(mutators_are_stopping(heap));
-  MUTATOR_EVENT(mut, mutator_stopping);
-  copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
-  heap_lock(heap);
-  pause_mutator_for_collection(heap, mut);
-  heap_unlock(heap);
-}
-
-static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
-  while (mutators_are_stopping(mutator_heap(mut)))
-    pause_mutator_for_collection_without_lock(mut);
-}
-
-static int maybe_grow_heap(struct gc_heap *heap) {
-  return 0;
-}
-
-static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap,
-                            void *unused) {
-  gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
-}
-
-static void add_roots(struct gc_heap *heap) {
-  for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next)
-    gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut));
-  gc_tracer_add_root(&heap->tracer, gc_root_heap(heap));
-  gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL);
-}
-
-static void resolve_ephemerons_lazily(struct gc_heap *heap) {
-  heap->check_pending_ephemerons = 0;
-}
-
-static void resolve_ephemerons_eagerly(struct gc_heap *heap) {
-  heap->check_pending_ephemerons = 1;
-  gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
-}
-
-static void trace_resolved_ephemerons(struct gc_heap *heap) {
-  for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
-       resolved;
-       resolved = gc_pop_resolved_ephemerons(heap)) {
-    gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved));
-    gc_tracer_trace(&heap->tracer);
-  }
-}
-
-static void resolve_finalizers(struct gc_heap *heap) {
-  for (size_t priority = 0;
-       priority < gc_finalizer_priority_count();
-       priority++) {
-    if (gc_resolve_finalizers(heap->finalizer_state, priority,
-                              visit_root_edge, heap, NULL)) {
-      gc_tracer_trace(&heap->tracer);
-      trace_resolved_ephemerons(heap);
-    }
-  }
-  gc_notify_finalizers(heap->finalizer_state, heap);
-}
-
-static void sweep_ephemerons(struct gc_heap *heap) {
-  return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
-}
-
-static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
-static void collect(struct gc_mutator *mut) {
-  struct gc_heap *heap = mutator_heap(mut);
-  struct copy_space *copy_space = heap_copy_space(heap);
-  struct large_object_space *lospace = heap_large_object_space(heap);
-  struct gc_extern_space *exspace = heap_extern_space(heap);
-  MUTATOR_EVENT(mut, mutator_cause_gc);
-  DEBUG("start collect #%ld:\n", heap->count);
-  large_object_space_start_gc(lospace, 0);
-  gc_extern_space_start_gc(exspace, 0);
-  resolve_ephemerons_lazily(heap);
-  HEAP_EVENT(heap, requesting_stop);
-  request_mutators_to_stop(heap);
-  HEAP_EVENT(heap, waiting_for_stop);
-  wait_for_mutators_to_stop(heap);
-  HEAP_EVENT(heap, mutators_stopped);
-  HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING);
-  copy_space_flip(copy_space);
-  gc_tracer_prepare(&heap->tracer);
-  add_roots(heap);
-  HEAP_EVENT(heap, roots_traced);
-  gc_tracer_trace(&heap->tracer);
-  HEAP_EVENT(heap, heap_traced);
-  resolve_ephemerons_eagerly(heap);
-  trace_resolved_ephemerons(heap);
-  HEAP_EVENT(heap, ephemerons_traced);
-  resolve_finalizers(heap);
-  HEAP_EVENT(heap, finalizers_traced);
-  sweep_ephemerons(heap);
-  gc_tracer_release(&heap->tracer);
-  copy_space_finish_gc(copy_space);
-  large_object_space_finish_gc(lospace, 0);
-  gc_extern_space_finish_gc(exspace, 0);
-  heap->count++;
-  heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection);
-  size_t live_size = (copy_space->allocated_bytes_at_last_gc +
-                      large_object_space_size_at_last_collection(lospace));
-  HEAP_EVENT(heap, live_data_size, live_size);
-  maybe_grow_heap(heap);
-  if (!copy_space_page_out_blocks_until_memory_released(copy_space)) {
-    fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
-            heap->size, copy_space->nslabs);
-    GC_CRASH();
-  }
-  HEAP_EVENT(heap, restarting_mutators);
-  allow_mutators_to_continue(heap);
-}
-
-static void trigger_collection(struct gc_mutator *mut) {
-  struct gc_heap *heap = mutator_heap(mut);
-  copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
-  heap_lock(heap);
-  long epoch = heap->count;
-  while (mutators_are_stopping(heap))
-    pause_mutator_for_collection_with_lock(mut);
-  if (epoch == heap->count)
-    collect(mut);
-  heap_unlock(heap);
-}
-
-void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) {
-  trigger_collection(mut);
-}
-
-static void* allocate_large(struct gc_mutator *mut, size_t size) {
-  struct gc_heap *heap = mutator_heap(mut);
-  struct large_object_space *space = heap_large_object_space(heap);
-
-  size_t npages = large_object_space_npages(space, size);
-
-  copy_space_request_release_memory(heap_copy_space(heap),
-                                     npages << space->page_size_log2);
-  while (!copy_space_page_out_blocks_until_memory_released(heap_copy_space(heap)))
-    trigger_collection(mut);
-  atomic_fetch_add(&heap->large_object_pages, npages);
-
-  void *ret = large_object_space_alloc(space, npages);
-  if (!ret)
-    ret = large_object_space_obtain_and_alloc(space, npages);
-
-  if (!ret) {
-    perror("weird: we have the space but mmap didn't work");
-    GC_CRASH();
-  }
-
-  return ret;
-}
-
-static void get_more_empty_blocks_for_mutator(void *mut) {
-  trigger_collection(mut);
-}
-
-void* gc_allocate_slow(struct gc_mutator *mut, size_t size) {
-  GC_ASSERT(size > 0); // allocating 0 bytes would be silly
-
-  if (size > gc_allocator_large_threshold())
-    return allocate_large(mut, size);
-
-  struct gc_ref ret = copy_space_allocate(&mut->allocator,
-                                          heap_copy_space(mutator_heap(mut)),
-                                          size,
-                                          get_more_empty_blocks_for_mutator,
-                                          mut);
-  gc_clear_fresh_allocation(ret, size);
-  return gc_ref_heap_object(ret);
-}
-
-void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
-  return gc_allocate(mut, size);
-}
-
-struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) {
-  return gc_allocate(mut, gc_ephemeron_size());
-}
-
-void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
-                       struct gc_ref key, struct gc_ref value) {
-  gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value);
-}
-
-struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) {
-  return heap->pending_ephemerons;
-}
-
-unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
-  return heap->count;
-}
-
-struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) {
-  return gc_allocate(mut, gc_finalizer_size());
-}
-
-void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
-                         unsigned priority, struct gc_ref object,
-                         struct gc_ref closure) {
-  gc_finalizer_init_internal(finalizer, object, closure);
-  gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state,
-                               finalizer, priority);
-  // No write barrier.
-}
-
-struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) {
-  return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state);
-}
-
-void gc_set_finalizer_callback(struct gc_heap *heap,
-                               gc_finalizer_callback callback) {
-  gc_finalizer_state_set_callback(heap->finalizer_state, callback);
-}
-
-static int heap_prepare_pending_ephemerons(struct gc_heap *heap) {
-  struct gc_pending_ephemerons *cur = heap->pending_ephemerons;
-  size_t target = heap->size * heap->pending_ephemerons_size_factor;
-  double slop = heap->pending_ephemerons_size_slop;
-
-  heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop);
-
-  return !!heap->pending_ephemerons;
-}
-
-struct gc_options {
-  struct gc_common_options common;
-};
-int gc_option_from_string(const char *str) {
-  return gc_common_option_from_string(str);
-}
-struct gc_options* gc_allocate_options(void) {
-  struct gc_options *ret = malloc(sizeof(struct gc_options));
-  gc_init_common_options(&ret->common);
-  return ret;
-}
-int gc_options_set_int(struct gc_options *options, int option, int value) {
-  return gc_common_options_set_int(&options->common, option, value);
-}
-int gc_options_set_size(struct gc_options *options, int option,
-                        size_t value) {
-  return gc_common_options_set_size(&options->common, option, value);
-}
-int gc_options_set_double(struct gc_options *options, int option,
-                          double value) {
-  return gc_common_options_set_double(&options->common, option, value);
-}
-int gc_options_parse_and_set(struct gc_options *options, int option,
-                             const char *value) {
-  return gc_common_options_parse_and_set(&options->common, option, value);
-}
-
-static int heap_init(struct gc_heap *heap, const struct gc_options *options) {
-  // *heap is already initialized to 0.
-
-  pthread_mutex_init(&heap->lock, NULL);
-  pthread_cond_init(&heap->mutator_cond, NULL);
-  pthread_cond_init(&heap->collector_cond, NULL);
-  heap->size = options->common.heap_size;
-
-  if (options->common.parallelism != 1)
-    fprintf(stderr, "warning: parallelism unimplemented in semispace copying collector\n");
-
-  if (!gc_tracer_init(&heap->tracer, heap, 1))
-    GC_CRASH();
-
-  heap->pending_ephemerons_size_factor = 0.005;
-  heap->pending_ephemerons_size_slop = 0.5;
-
-  if (!heap_prepare_pending_ephemerons(heap))
-    GC_CRASH();
-
-  heap->finalizer_state = gc_make_finalizer_state();
-  if (!heap->finalizer_state)
-    GC_CRASH();
-
-  return 1;
-}
-
-int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
-            struct gc_heap **heap, struct gc_mutator **mut,
-            struct gc_event_listener event_listener,
-            void *event_listener_data) {
-  GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT);
-  GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD);
-  GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator));
-  GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(),
-               offsetof(struct copy_space_allocator, hp));
-  GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
-               offsetof(struct copy_space_allocator, limit));
-
-  if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
-    fprintf(stderr, "fixed heap size is currently required\n");
-    return 0;
-  }
-
-  *heap = calloc(1, sizeof(struct gc_heap));
-  if (!*heap) GC_CRASH();
-
-  if (!heap_init(*heap, options))
-    GC_CRASH();
-
-  (*heap)->event_listener = event_listener;
-  (*heap)->event_listener_data = event_listener_data;
-  HEAP_EVENT(*heap, init, (*heap)->size);
-
-  struct copy_space *space = heap_copy_space(*heap);
-  int atomic_forward = 0;
-  if (!copy_space_init(space, (*heap)->size, atomic_forward)) {
-    free(*heap);
-    *heap = NULL;
-    return 0;
-  }
-  
-  if (!large_object_space_init(heap_large_object_space(*heap), *heap))
-    GC_CRASH();
-
-  *mut = calloc(1, sizeof(struct gc_mutator));
-  if (!*mut) GC_CRASH();
-  add_mutator(*heap, *mut);
-  return 1;
-}
-
-struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base,
-                                      struct gc_heap *heap) {
-  struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator));
-  if (!ret)
-    GC_CRASH();
-  add_mutator(heap, ret);
-  return ret;
-}
-
-void gc_finish_for_thread(struct gc_mutator *mut) {
-  remove_mutator(mutator_heap(mut), mut);
-  free(mut);
-}
-
-static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
-  GC_ASSERT(mut->next == NULL);
-  copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap));
-  heap_lock(heap);
-  heap->inactive_mutator_count++;
-  if (all_mutators_stopped(heap))
-    pthread_cond_signal(&heap->collector_cond);
-  heap_unlock(heap);
-}
-
-static void reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
-  heap_lock(heap);
-  while (mutators_are_stopping(heap))
-    pthread_cond_wait(&heap->mutator_cond, &heap->lock);
-  heap->inactive_mutator_count--;
-  heap_unlock(heap);
-}
-
-void* gc_call_without_gc(struct gc_mutator *mut,
-                         void* (*f)(void*),
-                         void *data) {
-  struct gc_heap *heap = mutator_heap(mut);
-  deactivate_mutator(heap, mut);
-  void *ret = f(data);
-  reactivate_mutator(heap, mut);
-  return ret;
-}