MMC and PCC defer actual page-out operations to background thread

Should avoid excessive VM traffic when allocating large objects, or when the adaptive heap sizer is on and we see frequent expansions and resizes.
2025-07-12 12:10:30 +02:00 · 2024-09-16 08:49:30 +02:00 · 2024-09-16 08:49:30 +02:00 · 7984f60eae
commit 7984f60eae
parent d785f082b1
6 changed files with 184 additions and 76 deletions
--- a/src/adaptive-heap-sizer.h
+++ b/src/adaptive-heap-sizer.h
@ -139,7 +139,7 @@ gc_make_adaptive_heap_sizer(double expansiveness,
  sizer->last_bytes_allocated = get_allocation_counter(callback_data);
  sizer->last_heartbeat = gc_platform_monotonic_nanoseconds();
  sizer->background_task_id = thread
-    ? gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_FIRST,
+    ? gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_MIDDLE,
                                    gc_adaptive_heap_sizer_background_task,
                                    sizer)
    : -1;
--- a/src/background-thread.h
+++ b/src/background-thread.h
@ -9,9 +9,9 @@
 #include "debug.h"

 enum {
-  GC_BACKGROUND_TASK_FIRST = 0,
-  GC_BACKGROUND_TASK_NORMAL = 100,
-  GC_BACKGROUND_TASK_LAST = 200
+  GC_BACKGROUND_TASK_START = 0,
+  GC_BACKGROUND_TASK_MIDDLE = 100,
+  GC_BACKGROUND_TASK_END = 200
 };

 struct gc_background_task {
@ -21,12 +21,18 @@ struct gc_background_task {
  void *data;
 };

+enum gc_background_thread_state {
+  GC_BACKGROUND_THREAD_STARTING,
+  GC_BACKGROUND_THREAD_RUNNING,
+  GC_BACKGROUND_THREAD_STOPPING
+};
+
 struct gc_background_thread {
  size_t count;
  size_t capacity;
  struct gc_background_task *tasks;
  int next_id;
-  int stopping;
+  enum gc_background_thread_state state;
  pthread_t thread;
  pthread_mutex_t lock;
  pthread_cond_t cond;
@ -35,19 +41,20 @@ struct gc_background_thread {
 static void*
 gc_background_thread(void *data) {
  struct gc_background_thread *thread = data;
+  pthread_mutex_lock(&thread->lock);
+  while (thread->state == GC_BACKGROUND_THREAD_STARTING)
+    pthread_cond_wait(&thread->cond, &thread->lock);
  struct timespec ts;
  if (clock_gettime(CLOCK_REALTIME, &ts)) {
    perror("background thread: failed to get time!");
    return NULL;
  }
-  pthread_mutex_lock(&thread->lock);
-  while (!thread->stopping) {
+  while (thread->state == GC_BACKGROUND_THREAD_RUNNING) {
    ts.tv_sec += 1;
    pthread_cond_timedwait(&thread->cond, &thread->lock, &ts);
-    if (thread->stopping)
-      break;
-    for (size_t i = 0; i < thread->count; i++)
-      thread->tasks[i].run(thread->tasks[i].data);
+    if (thread->state == GC_BACKGROUND_THREAD_RUNNING)
+      for (size_t i = 0; i < thread->count; i++)
+        thread->tasks[i].run(thread->tasks[i].data);
  }
  pthread_mutex_unlock(&thread->lock);
  return NULL;
@ -63,6 +70,7 @@ gc_make_background_thread(void) {
  thread->tasks = NULL;
  thread->count = 0;
  thread->capacity = 0;
+  thread->state = GC_BACKGROUND_THREAD_STARTING;
  pthread_mutex_init(&thread->lock, NULL);
  pthread_cond_init(&thread->cond, NULL);
  if (pthread_create(&thread->thread, NULL, gc_background_thread, thread)) {
@ -72,6 +80,15 @@ gc_make_background_thread(void) {
  return thread;
 }

+static void
+gc_background_thread_start(struct gc_background_thread *thread) {
+  pthread_mutex_lock(&thread->lock);
+  GC_ASSERT_EQ(thread->state, GC_BACKGROUND_THREAD_STARTING);
+  thread->state = GC_BACKGROUND_THREAD_RUNNING;
+  pthread_mutex_unlock(&thread->lock);
+  pthread_cond_signal(&thread->cond);
+}
+
 static int
 gc_background_thread_add_task(struct gc_background_thread *thread,
                              int priority, void (*run)(void *data),
@ -126,8 +143,8 @@ gc_background_thread_remove_task(struct gc_background_thread *thread,
 static void
 gc_destroy_background_thread(struct gc_background_thread *thread) {
  pthread_mutex_lock(&thread->lock);
-  GC_ASSERT(!thread->stopping);
-  thread->stopping = 1;
+  GC_ASSERT(thread->state == GC_BACKGROUND_THREAD_RUNNING);
+  thread->state = GC_BACKGROUND_THREAD_STOPPING;
  pthread_mutex_unlock(&thread->lock);
  pthread_cond_signal(&thread->cond);
  pthread_join(thread->thread, NULL);
--- a/src/copy-space.h
+++ b/src/copy-space.h
@ -10,6 +10,7 @@
 #include "gc-internal.h"

 #include "assert.h"
+#include "background-thread.h"
 #include "debug.h"
 #include "extents.h"
 #include "gc-align.h"
@ -102,13 +103,16 @@ copy_space_object_region(struct gc_ref obj) {
  return (gc_ref_value(obj) / COPY_SPACE_REGION_SIZE) & 1;
 }

+#define COPY_SPACE_PAGE_OUT_QUEUE_SIZE 4
+
 struct copy_space {
  struct copy_space_block *empty;
  struct copy_space_block *partly_full;
  struct copy_space_block *full ALIGNED_TO_AVOID_FALSE_SHARING;
  size_t allocated_bytes;
  size_t fragmentation;
-  struct copy_space_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING;
+  struct copy_space_block *paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE]
+    ALIGNED_TO_AVOID_FALSE_SHARING;
  ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING;
  // The rest of these members are only changed rarely and with the heap
  // lock.
@ -186,31 +190,23 @@ copy_space_push_partly_full_block(struct copy_space *space,
  copy_space_push_block(&space->partly_full, block);
 }

-static struct copy_space_block*
-copy_space_pop_paged_out_block(struct copy_space *space) {
-  return copy_space_pop_block(&space->paged_out);
-}
-
-static void
-copy_space_push_paged_out_block(struct copy_space *space,
-                                struct copy_space_block *block) {
-  copy_space_push_block(&space->paged_out, block);
-}
-
 static void
 copy_space_page_out_block(struct copy_space *space,
                          struct copy_space_block *block) {
-  block->in_core = 0;
-  block->all_zeroes[0] = block->all_zeroes[1] = 1;
-  madvise(copy_space_block_payload(block), COPY_SPACE_BLOCK_SIZE, MADV_DONTNEED);
-  copy_space_push_paged_out_block(space, block);
+  copy_space_push_block(block->in_core
+                        ? &space->paged_out[0]
+                        : &space->paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE-1],
+                        block);
 }

 static struct copy_space_block*
 copy_space_page_in_block(struct copy_space *space) {
-  struct copy_space_block* block = copy_space_pop_paged_out_block(space);
-  if (block) block->in_core = 1;
-  return block;
+  for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) {
+    struct copy_space_block *block =
+      copy_space_pop_block(&space->paged_out[age]);
+    if (block) return block;
+  }
+  return NULL;
 }

 static ssize_t
@ -280,6 +276,7 @@ copy_space_allocator_acquire_empty_block(struct copy_space_allocator *alloc,
  if (copy_space_allocator_acquire_block(alloc,
                                         copy_space_pop_empty_block(space),
                                         space->active_region)) {
+    alloc->block->in_core = 1;
    if (alloc->block->all_zeroes[space->active_region])
      alloc->block->all_zeroes[space->active_region] = 0;
    else
@ -629,15 +626,45 @@ copy_space_expand(struct copy_space *space, size_t bytes) {
      struct copy_space_block *block = &slabs[slab].headers[idx];
      block->all_zeroes[0] = block->all_zeroes[1] = 1;
      block->in_core = 0;
-      copy_space_push_paged_out_block(space, block);
+      copy_space_page_out_block(space, block);
      reserved -= COPY_SPACE_BLOCK_SIZE;
    }
  }
  copy_space_reacquire_memory(space, 0);
 }

+static void
+copy_space_advance_page_out_queue(void *data) {
+  struct copy_space *space = data;
+  for (int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) {
+    while (1) {
+      struct copy_space_block *block =
+        copy_space_pop_block(&space->paged_out[age]);
+      if (!block) break;
+      copy_space_push_block(&space->paged_out[age + 1], block);
+    }
+  }
+}
+
+static void
+copy_space_page_out_blocks(void *data) {
+  struct copy_space *space = data;
+  int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 2;
+  while (1) {
+    struct copy_space_block *block =
+      copy_space_pop_block(&space->paged_out[age]);
+    if (!block) break;
+    block->in_core = 0;
+    block->all_zeroes[0] = block->all_zeroes[1] = 1;
+    madvise(copy_space_block_payload(block), COPY_SPACE_BLOCK_SIZE,
+            MADV_DONTNEED);
+    copy_space_push_block(&space->paged_out[age + 1], block);
+  }
+}
+
 static int
-copy_space_init(struct copy_space *space, size_t size, int atomic) {
+copy_space_init(struct copy_space *space, size_t size, int atomic,
+                struct gc_background_thread *thread) {
  size = align_up(size, COPY_SPACE_BLOCK_SIZE);
  size_t reserved = align_up(size, COPY_SPACE_SLAB_SIZE);
  size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE;
@ -648,7 +675,8 @@ copy_space_init(struct copy_space *space, size_t size, int atomic) {
  space->empty = NULL;
  space->partly_full = NULL;
  space->full = NULL;
-  space->paged_out = NULL;
+  for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++)
+    space->paged_out[age] = NULL;
  space->allocated_bytes = 0;
  space->fragmentation = 0;
  space->bytes_to_page_out = 0;
@ -662,16 +690,21 @@ copy_space_init(struct copy_space *space, size_t size, int atomic) {
    for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) {
      struct copy_space_block *block = &slabs[slab].headers[idx];
      block->all_zeroes[0] = block->all_zeroes[1] = 1;
+      block->in_core = 0;
      if (reserved > size) {
-        block->in_core = 0;
-        copy_space_push_paged_out_block(space, block);
+        copy_space_page_out_block(space, block);
        reserved -= COPY_SPACE_BLOCK_SIZE;
      } else {
-        block->in_core = 1;
        copy_space_push_empty_block(space, block);
      }
    }
  }
+  gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START,
+                                copy_space_advance_page_out_queue,
+                                space);
+  gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END,
+                                copy_space_page_out_blocks,
+                                space);
  return 1;
 }

--- a/src/mmc.c
+++ b/src/mmc.c
@ -1010,6 +1010,8 @@ heap_init(struct gc_heap *heap, const struct gc_options *options) {
  if (!heap->finalizer_state)
    GC_CRASH();

+  heap->background_thread = gc_make_background_thread();
+
  return 1;
 }

@ -1064,7 +1066,8 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
  struct nofl_space *space = heap_nofl_space(*heap);
  if (!nofl_space_init(space, (*heap)->size,
                       options->common.parallelism != 1,
-                       (*heap)->fragmentation_low_threshold)) {
+                       (*heap)->fragmentation_low_threshold,
+                       (*heap)->background_thread)) {
    free(*heap);
    *heap = NULL;
    return 0;
@ -1073,7 +1076,6 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
  if (!large_object_space_init(heap_large_object_space(*heap), *heap))
    GC_CRASH();

-  (*heap)->background_thread = gc_make_background_thread();
  (*heap)->sizer = gc_make_heap_sizer(*heap, &options->common,
                                      allocation_counter_from_thread,
                                      set_heap_size_from_thread,
@ -1084,6 +1086,9 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
  if (!*mut) GC_CRASH();
  gc_stack_init(&(*mut)->stack, stack_base);
  add_mutator(*heap, *mut);
+
+  gc_background_thread_start((*heap)->background_thread);
+  
  return 1;
 }

--- a/src/nofl-space.h
+++ b/src/nofl-space.h
@ -137,6 +137,8 @@ struct nofl_block_list {
  uintptr_t blocks;
 };

+#define NOFL_PAGE_OUT_QUEUE_SIZE 4
+
 struct nofl_space {
  uint64_t sweep_mask;
  uint8_t live_mask;
@ -146,7 +148,7 @@ struct nofl_space {
  size_t heap_size;
  uint8_t last_collection_was_minor;
  struct nofl_block_list empty;
-  struct nofl_block_list unavailable;
+  struct nofl_block_list paged_out[NOFL_PAGE_OUT_QUEUE_SIZE];
  struct nofl_block_list to_sweep;
  struct nofl_block_list partly_full;
  struct nofl_block_list full;
@ -407,31 +409,26 @@ nofl_block_count(struct nofl_block_list *list) {
  return atomic_load_explicit(&list->count, memory_order_acquire);
 }

-static void
-nofl_push_paged_out_block(struct nofl_space *space,
-                          struct nofl_block_ref block) {
-  GC_ASSERT(nofl_block_has_flag(block,
-                                NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT));
-  nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE);
-  nofl_push_block(&space->unavailable, block);
-}
-
 static void
 nofl_push_unavailable_block(struct nofl_space *space,
                            struct nofl_block_ref block) {
-  if (!nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT)) {
-    nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
-    madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED);
-  }
-  nofl_push_paged_out_block(space, block);
+  nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE);
+  nofl_push_block(nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT)
+                  ? &space->paged_out[NOFL_PAGE_OUT_QUEUE_SIZE-1]
+                  : &space->paged_out[0],
+                  block);
 }

 static struct nofl_block_ref
 nofl_pop_unavailable_block(struct nofl_space *space) {
-  struct nofl_block_ref block = nofl_pop_block(&space->unavailable);
-  if (!nofl_block_is_null(block))
-    nofl_block_clear_flag(block, NOFL_BLOCK_UNAVAILABLE);
-  return block;
+  for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) {
+    struct nofl_block_ref block = nofl_pop_block(&space->paged_out[age]);
+    if (!nofl_block_is_null(block)) {
+      nofl_block_clear_flag(block, NOFL_BLOCK_UNAVAILABLE);
+      return block;
+    }
+  }
+  return nofl_block_null();
 }

 static void
@ -445,14 +442,23 @@ nofl_pop_empty_block(struct nofl_space *space) {
  return nofl_pop_block(&space->empty);
 }

+static size_t
+nofl_active_block_count(struct nofl_space *space) {
+  size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
+  size_t unavailable = 0;
+  for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++)
+    unavailable += nofl_block_count(&space->paged_out[age]);
+  GC_ASSERT(unavailable <= total);
+  return total - unavailable;
+}
+
 static int
 nofl_maybe_push_evacuation_target(struct nofl_space *space,
                                  struct nofl_block_ref block,
                                  double reserve) {
  size_t targets = nofl_block_count(&space->evacuation_targets);
-  size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
-  size_t unavailable = nofl_block_count(&space->unavailable);
-  if (targets >= (total - unavailable) * reserve)
+  size_t active = nofl_active_block_count(space);
+  if (targets >= active * reserve)
    return 0;

  nofl_push_block(&space->evacuation_targets, block);
@ -1084,9 +1090,8 @@ nofl_space_finish_evacuation(struct nofl_space *space) {
  // repopulate the reserve.
  GC_ASSERT(space->evacuating);
  space->evacuating = 0;
-  size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
-  size_t unavailable = nofl_block_count(&space->unavailable);
-  size_t reserve = space->evacuation_minimum_reserve * (total - unavailable);
+  size_t active = nofl_active_block_count(space);
+  size_t reserve = space->evacuation_minimum_reserve * active;
  GC_ASSERT(nofl_block_count(&space->evacuation_targets) == 0);
  while (reserve--) {
    struct nofl_block_ref block = nofl_pop_block(&space->empty);
@ -1214,7 +1219,8 @@ nofl_space_verify_before_restart(struct nofl_space *space) {
  nofl_space_verify_swept_blocks(space, &space->full);
  nofl_space_verify_swept_blocks(space, &space->old);
  nofl_space_verify_empty_blocks(space, &space->empty, 1);
-  nofl_space_verify_empty_blocks(space, &space->unavailable, 0);
+  for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++)
+    nofl_space_verify_empty_blocks(space, &space->paged_out[age], 0);
  // GC_ASSERT(space->last_collection_was_minor || !nofl_block_count(&space->old));
 }

@ -1229,9 +1235,8 @@ nofl_space_finish_gc(struct nofl_space *space,
    // If we were evacuating and preferentially allocated empty blocks
    // to the evacuation reserve, return those blocks to the empty set
    // for allocation by the mutator.
-    size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
-    size_t unavailable = nofl_block_count(&space->unavailable);
-    size_t target = space->evacuation_minimum_reserve * (total - unavailable);
+    size_t active = nofl_active_block_count(space);
+    size_t target = space->evacuation_minimum_reserve * active;
    size_t reserve = nofl_block_count(&space->evacuation_targets);
    while (reserve-- > target)
      nofl_push_block(&space->empty,
@ -1626,9 +1631,8 @@ nofl_space_shrink(struct nofl_space *space, size_t bytes) {
  // during trace, synchronously from gc_heap_sizer_on_gc, or async but subject
  // to the heap lock.
  if (pending > 0) {
-    size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
-    size_t unavailable = nofl_block_count(&space->unavailable);
-    size_t target = space->evacuation_minimum_reserve * (total - unavailable);
+    size_t active = nofl_active_block_count(space);
+    size_t target = space->evacuation_minimum_reserve * active;
    ssize_t avail = nofl_block_count(&space->evacuation_targets);
    while (avail > target && pending > 0) {
      struct nofl_block_ref block = nofl_pop_block(&space->evacuation_targets);
@ -1660,15 +1664,52 @@ nofl_space_expand(struct nofl_space *space, size_t bytes) {
      uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data;
      struct nofl_block_ref block = nofl_block_for_addr(addr);
      nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
-      nofl_push_paged_out_block(space, block);
+      nofl_push_unavailable_block(space, block);
    }
  }
  nofl_space_reacquire_memory(space, 0);
 }

+static void
+nofl_space_advance_page_out_queue(void *data) {
+  // When the nofl space goes to return a block to the OS, it goes on the head
+  // of the page-out queue.  Every second, the background thread will age the
+  // queue, moving all blocks from index 0 to index 1, and so on.  When a block
+  // reaches the end of the queue it is paged out (and stays at the end of the
+  // queue).  In this task, invoked by the background thread, we age queue
+  // items, except that we don't page out yet, as it could be that some other
+  // background task will need to pull pages back in.
+  struct nofl_space *space = data;
+  for (int age = NOFL_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) {
+    while (1) {
+      struct nofl_block_ref block = nofl_pop_block(&space->paged_out[age]);
+      if (nofl_block_is_null(block))
+        break;
+      nofl_push_block(&space->paged_out[age + 1], block);
+    }
+  }
+}
+
+static void
+nofl_space_page_out_blocks(void *data) {
+  // This task is invoked by the background thread after other tasks.  It
+  // actually pages out blocks that reached the end of the queue.
+  struct nofl_space *space = data;
+  int age = NOFL_PAGE_OUT_QUEUE_SIZE - 2;
+  while (1) {
+    struct nofl_block_ref block = nofl_pop_block(&space->paged_out[age]);
+    if (nofl_block_is_null(block))
+      break;
+    nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
+    madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED);
+    nofl_push_block(&space->paged_out[age + 1], block);
+  }
+}
+
 static int
 nofl_space_init(struct nofl_space *space, size_t size, int atomic,
-                double promotion_threshold) {
+                double promotion_threshold,
+                struct gc_background_thread *thread) {
  size = align_up(size, NOFL_BLOCK_SIZE);
  size_t reserved = align_up(size, NOFL_SLAB_SIZE);
  size_t nslabs = reserved / NOFL_SLAB_SIZE;
@ -1689,7 +1730,7 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic,
      struct nofl_block_ref block = nofl_block_for_addr(addr);
      nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
      if (reserved > size) {
-        nofl_push_paged_out_block(space, block);
+        nofl_push_unavailable_block(space, block);
        reserved -= NOFL_BLOCK_SIZE;
      } else {
        if (!nofl_push_evacuation_target_if_needed(space, block))
@ -1697,6 +1738,12 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic,
      }
    }
  }
+  gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START,
+                                nofl_space_advance_page_out_queue,
+                                space);
+  gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END,
+                                nofl_space_page_out_blocks,
+                                space);
  return 1;
 }

--- a/src/pcc.c
+++ b/src/pcc.c
@ -606,6 +606,8 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) {
  if (!heap->finalizer_state)
    GC_CRASH();

+  heap->background_thread = gc_make_background_thread();
+
  return 1;
 }

@ -651,7 +653,8 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,

  struct copy_space *space = heap_copy_space(*heap);
  int atomic_forward = options->common.parallelism > 1;
-  if (!copy_space_init(space, (*heap)->size, atomic_forward)) {
+  if (!copy_space_init(space, (*heap)->size, atomic_forward,
+                       (*heap)->background_thread)) {
    free(*heap);
    *heap = NULL;
    return 0;
@ -670,6 +673,9 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
  *mut = calloc(1, sizeof(struct gc_mutator));
  if (!*mut) GC_CRASH();
  add_mutator(*heap, *mut);
+
+  gc_background_thread_start((*heap)->background_thread);
+
  return 1;
 }