diff --git a/api/gc-api.h b/api/gc-api.h
index 4831500fe..e60be7579 100644
--- a/api/gc-api.h
+++ b/api/gc-api.h
@@ -59,11 +59,9 @@ static inline void gc_clear_fresh_allocation(struct gc_ref obj,
   memset(gc_ref_heap_object(obj), 0, size);
 }
 
-static inline void gc_update_alloc_table(struct gc_mutator *mut,
-                                         struct gc_ref obj,
+static inline void gc_update_alloc_table(struct gc_ref obj,
                                          size_t size) GC_ALWAYS_INLINE;
-static inline void gc_update_alloc_table(struct gc_mutator *mut,
-                                         struct gc_ref obj,
+static inline void gc_update_alloc_table(struct gc_ref obj,
                                          size_t size) {
   size_t alignment = gc_allocator_alloc_table_alignment();
   if (!alignment) return;
@@ -117,7 +115,7 @@ static inline void* gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut,
   *hp_loc = new_hp;
 
   gc_clear_fresh_allocation(gc_ref(hp), size);
-  gc_update_alloc_table(mut, gc_ref(hp), size);
+  gc_update_alloc_table(gc_ref(hp), size);
 
   return (void*)hp;
 }
@@ -138,7 +136,7 @@ static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut, size
   *freelist_loc = *(void**)head;
 
   gc_clear_fresh_allocation(gc_ref_from_heap_object(head), size);
-  gc_update_alloc_table(mut, gc_ref_from_heap_object(head), size);
+  gc_update_alloc_table(gc_ref_from_heap_object(head), size);
 
   return head;
 }
diff --git a/src/nofl-space.h b/src/nofl-space.h
new file mode 100644
index 000000000..fd718c962
--- /dev/null
+++ b/src/nofl-space.h
@@ -0,0 +1,1448 @@
+#ifndef NOFL_SPACE_H
+#define NOFL_SPACE_H
+
+#include <stdatomic.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "gc-api.h"
+
+#define GC_IMPL 1
+#include "gc-internal.h"
+
+#include "assert.h"
+#include "debug.h"
+#include "gc-align.h"
+#include "gc-attrs.h"
+#include "gc-inline.h"
+#include "spin.h"
+#include "swar.h"
+
+// This is the nofl space!  It is a mark space which doesn't use
+// free-lists to allocate, and which can evacuate objects if
+// fragmentation is too high, inspired by Immix.  Nofl stands for "no
+// free-list", but also "novel", in the sense that it hasn't been tried
+// before.
+
+#define NOFL_GRANULE_SIZE 16
+#define NOFL_GRANULE_SIZE_LOG_2 4
+#define NOFL_MEDIUM_OBJECT_THRESHOLD 256
+#define NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD 16
+
+STATIC_ASSERT_EQ(NOFL_GRANULE_SIZE, 1 << NOFL_GRANULE_SIZE_LOG_2);
+STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD,
+                 NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD * NOFL_GRANULE_SIZE);
+
+// Each granule has one mark byte stored in a side table.  A granule's
+// mark state is a whole byte instead of a bit to facilitate parallel
+// marking.  (Parallel markers are allowed to race.)  We also use this
+// byte to compute object extent, via a bit flag indicating
+// end-of-object.
+//
+// Because we want to allow for conservative roots, we need to know
+// whether an address indicates an object or not.  That means that when
+// an object is allocated, it has to set a bit, somewhere.  We use the
+// metadata byte for this purpose, setting the "young" bit.
+//
+// The "young" bit's name might make you think about generational
+// collection, and indeed all objects collected in a minor collection
+// will have this bit set.  However, the nofl space never needs to check
+// for the young bit; if it weren't for the need to identify
+// conservative roots, we wouldn't need a young bit at all.  Perhaps in
+// an all-precise system, we would be able to avoid the overhead of
+// initializing mark byte upon each fresh allocation.
+//
+// When an object becomes dead after a GC, it will still have a bit set
+// -- maybe the young bit, or maybe a survivor bit.  The sweeper has to
+// clear these bits before the next collection.  But, for concurrent
+// marking, we will also be marking "live" objects, updating their mark
+// bits.  So there are four object states concurrently observable:
+// young, dead, survivor, and marked.  (If we didn't have concurrent
+// marking we would still need the "marked" state, because marking
+// mutator roots before stopping is also a form of concurrent marking.)
+// Even though these states are mutually exclusive, we use separate bits
+// for them because we have the space.  After each collection, the dead,
+// survivor, and marked states rotate by one bit.
+enum nofl_metadata_byte {
+  NOFL_METADATA_BYTE_NONE = 0,
+  NOFL_METADATA_BYTE_YOUNG = 1,
+  NOFL_METADATA_BYTE_MARK_0 = 2,
+  NOFL_METADATA_BYTE_MARK_1 = 4,
+  NOFL_METADATA_BYTE_MARK_2 = 8,
+  NOFL_METADATA_BYTE_END = 16,
+  NOFL_METADATA_BYTE_EPHEMERON = 32,
+  NOFL_METADATA_BYTE_PINNED = 64,
+  NOFL_METADATA_BYTE_UNUSED_1 = 128
+};
+
+static uint8_t
+nofl_rotate_dead_survivor_marked(uint8_t mask) {
+  uint8_t all =
+    NOFL_METADATA_BYTE_MARK_0 | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
+  return ((mask << 1) | (mask >> 2)) & all;
+}
+
+#define NOFL_SLAB_SIZE (4 * 1024 * 1024)
+#define NOFL_BLOCK_SIZE (64 * 1024)
+#define NOFL_METADATA_BYTES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE)
+#define NOFL_BLOCKS_PER_SLAB (NOFL_SLAB_SIZE / NOFL_BLOCK_SIZE)
+#define NOFL_META_BLOCKS_PER_SLAB (NOFL_METADATA_BYTES_PER_BLOCK * NOFL_BLOCKS_PER_SLAB / NOFL_BLOCK_SIZE)
+#define NOFL_NONMETA_BLOCKS_PER_SLAB (NOFL_BLOCKS_PER_SLAB - NOFL_META_BLOCKS_PER_SLAB)
+#define NOFL_METADATA_BYTES_PER_SLAB (NOFL_NONMETA_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK)
+#define NOFL_SLACK_METADATA_BYTES_PER_SLAB (NOFL_META_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK)
+#define NOFL_REMSET_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB)
+#define NOFL_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * NOFL_NONMETA_BLOCKS_PER_SLAB)
+#define NOFL_SLACK_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB)
+#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_REMSET_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB)
+#define NOFL_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB)
+#define NOFL_SLACK_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB)
+#define NOFL_HEADER_BYTES_PER_SLAB NOFL_SLACK_SUMMARY_BYTES_PER_SLAB
+
+struct nofl_slab;
+
+struct nofl_slab_header {
+  union {
+    struct {
+      struct nofl_slab *next;
+      struct nofl_slab *prev;
+    };
+    uint8_t padding[NOFL_HEADER_BYTES_PER_SLAB];
+  };
+};
+STATIC_ASSERT_EQ(sizeof(struct nofl_slab_header), NOFL_HEADER_BYTES_PER_SLAB);
+
+// Sometimes we want to put a block on a singly-linked list.  For that
+// there's a pointer reserved in the block summary.  But because the
+// pointer is aligned (32kB on 32-bit, 64kB on 64-bit), we can portably
+// hide up to 15 flags in the low bits.  These flags can be accessed
+// non-atomically by the mutator when it owns a block; otherwise they
+// need to be accessed atomically.
+enum nofl_block_summary_flag {
+  NOFL_BLOCK_OUT_FOR_THREAD = 0x1,
+  NOFL_BLOCK_HAS_PIN = 0x2,
+  NOFL_BLOCK_PAGED_OUT = 0x4,
+  NOFL_BLOCK_NEEDS_SWEEP = 0x8,
+  NOFL_BLOCK_UNAVAILABLE = 0x10,
+  NOFL_BLOCK_EVACUATE = 0x20,
+  NOFL_BLOCK_VENERABLE = 0x40,
+  NOFL_BLOCK_VENERABLE_AFTER_SWEEP = 0x80,
+  NOFL_BLOCK_FLAG_UNUSED_8 = 0x100,
+  NOFL_BLOCK_FLAG_UNUSED_9 = 0x200,
+  NOFL_BLOCK_FLAG_UNUSED_10 = 0x400,
+  NOFL_BLOCK_FLAG_UNUSED_11 = 0x800,
+  NOFL_BLOCK_FLAG_UNUSED_12 = 0x1000,
+  NOFL_BLOCK_FLAG_UNUSED_13 = 0x2000,
+  NOFL_BLOCK_FLAG_UNUSED_14 = 0x4000,
+};
+
+struct nofl_block_summary {
+  union {
+    struct {
+      // Counters related to previous collection: how many holes there
+      // were, and how much space they had.
+      uint16_t hole_count;
+      uint16_t free_granules;
+      // Counters related to allocation since previous collection:
+      // wasted space due to fragmentation.
+      uint16_t holes_with_fragmentation;
+      uint16_t fragmentation_granules;
+      // After a block is swept, if it's empty it goes on the empties
+      // list.  Otherwise if it's not immediately used by a mutator (as
+      // is usually the case), it goes on the swept list.  Both of these
+      // lists use this field.  But as the next element in the field is
+      // block-aligned, we stash flags in the low bits.
+      uintptr_t next_and_flags;
+    };
+    uint8_t padding[NOFL_SUMMARY_BYTES_PER_BLOCK];
+  };
+};
+STATIC_ASSERT_EQ(sizeof(struct nofl_block_summary),
+                 NOFL_SUMMARY_BYTES_PER_BLOCK);
+
+struct nofl_block {
+  char data[NOFL_BLOCK_SIZE];
+};
+
+struct nofl_slab {
+  struct nofl_slab_header header;
+  struct nofl_block_summary summaries[NOFL_NONMETA_BLOCKS_PER_SLAB];
+  uint8_t remembered_set[NOFL_REMSET_BYTES_PER_SLAB];
+  uint8_t metadata[NOFL_METADATA_BYTES_PER_SLAB];
+  struct nofl_block blocks[NOFL_NONMETA_BLOCKS_PER_SLAB];
+};
+STATIC_ASSERT_EQ(sizeof(struct nofl_slab), NOFL_SLAB_SIZE);
+
+static struct nofl_slab*
+nofl_object_slab(void *obj) {
+  uintptr_t addr = (uintptr_t) obj;
+  uintptr_t base = align_down(addr, NOFL_SLAB_SIZE);
+  return (struct nofl_slab*) base;
+}
+
+static uint8_t*
+nofl_metadata_byte_for_addr(uintptr_t addr) {
+  uintptr_t base = align_down(addr, NOFL_SLAB_SIZE);
+  uintptr_t granule = (addr & (NOFL_SLAB_SIZE - 1)) >> NOFL_GRANULE_SIZE_LOG_2;
+  return (uint8_t*) (base + granule);
+}
+
+static uint8_t*
+nofl_metadata_byte_for_object(struct gc_ref ref) {
+  return nofl_metadata_byte_for_addr(gc_ref_value(ref));
+}
+
+#define NOFL_GRANULES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE)
+#define NOFL_GRANULES_PER_REMSET_BYTE \
+  (NOFL_GRANULES_PER_BLOCK / NOFL_REMSET_BYTES_PER_BLOCK)
+
+static struct nofl_block_summary*
+nofl_block_summary_for_addr(uintptr_t addr) {
+  uintptr_t base = align_down(addr, NOFL_SLAB_SIZE);
+  uintptr_t block = (addr & (NOFL_SLAB_SIZE - 1)) / NOFL_BLOCK_SIZE;
+  return (struct nofl_block_summary*)
+    (base + block * sizeof(struct nofl_block_summary));
+}
+
+static uintptr_t
+nofl_block_summary_has_flag(struct nofl_block_summary *summary,
+                            enum nofl_block_summary_flag flag) {
+  return summary->next_and_flags & flag;
+}
+
+static void
+nofl_block_summary_set_flag(struct nofl_block_summary *summary,
+                                        enum nofl_block_summary_flag flag) {
+  summary->next_and_flags |= flag;
+}
+
+static void
+nofl_block_summary_clear_flag(struct nofl_block_summary *summary,
+                              enum nofl_block_summary_flag flag) {
+  summary->next_and_flags &= ~(uintptr_t)flag;
+}
+
+static uintptr_t
+nofl_block_summary_next(struct nofl_block_summary *summary) {
+  return align_down(summary->next_and_flags, NOFL_BLOCK_SIZE);
+}
+
+static void
+nofl_block_summary_set_next(struct nofl_block_summary *summary,
+                            uintptr_t next) {
+  GC_ASSERT((next & (NOFL_BLOCK_SIZE - 1)) == 0);
+  summary->next_and_flags =
+    (summary->next_and_flags & (NOFL_BLOCK_SIZE - 1)) | next;
+}
+
+// Lock-free block list.
+struct nofl_block_list {
+  size_t count;
+  uintptr_t blocks;
+};
+
+static void
+nofl_push_block(struct nofl_block_list *list, uintptr_t block) {
+  atomic_fetch_add_explicit(&list->count, 1, memory_order_acq_rel);
+  struct nofl_block_summary *summary = nofl_block_summary_for_addr(block);
+  uintptr_t next = atomic_load_explicit(&list->blocks, memory_order_acquire);
+  do {
+    nofl_block_summary_set_next(summary, next);
+  } while (!atomic_compare_exchange_weak(&list->blocks, &next, block));
+}
+
+static uintptr_t
+nofl_pop_block(struct nofl_block_list *list) {
+  uintptr_t head = atomic_load_explicit(&list->blocks, memory_order_acquire);
+  struct nofl_block_summary *summary;
+  uintptr_t next;
+  do {
+    if (!head)
+      return 0;
+    summary = nofl_block_summary_for_addr(head);
+    next = nofl_block_summary_next(summary);
+  } while (!atomic_compare_exchange_weak(&list->blocks, &head, next));
+  nofl_block_summary_set_next(summary, 0);
+  atomic_fetch_sub_explicit(&list->count, 1, memory_order_acq_rel);
+  return head;
+}
+
+static inline size_t
+nofl_size_to_granules(size_t size) {
+  return (size + NOFL_GRANULE_SIZE - 1) >> NOFL_GRANULE_SIZE_LOG_2;
+}
+
+struct nofl_evacuation_allocator {
+  size_t allocated; // atomically
+  size_t limit;
+  uintptr_t block_cursor; // atomically
+};
+
+struct nofl_space {
+  uint64_t sweep_mask;
+  uint8_t live_mask;
+  uint8_t marked_mask;
+  uint8_t evacuating;
+  uintptr_t low_addr;
+  size_t extent;
+  size_t heap_size;
+  uint8_t last_collection_was_minor;
+  uintptr_t next_block;   // atomically
+  struct nofl_block_list empty;
+  struct nofl_block_list unavailable;
+  struct nofl_block_list evacuation_targets;
+  double evacuation_minimum_reserve;
+  double evacuation_reserve;
+  double venerable_threshold;
+  ssize_t pending_unavailable_bytes; // atomically
+  struct nofl_evacuation_allocator evacuation_allocator;
+  struct nofl_slab *slabs;
+  size_t nslabs;
+  uintptr_t granules_freed_by_last_collection; // atomically
+  uintptr_t fragmentation_granules_since_last_collection; // atomically
+};
+
+struct nofl_allocator {
+  uintptr_t alloc;
+  uintptr_t sweep;
+  uintptr_t block;
+};
+
+static inline void
+nofl_clear_memory(uintptr_t addr, size_t size) {
+  memset((char*)addr, 0, size);
+}
+
+static size_t
+nofl_space_live_object_granules(uint8_t *metadata) {
+  return scan_for_byte(metadata, -1, broadcast_byte(NOFL_METADATA_BYTE_END)) + 1;
+}
+
+static inline int
+nofl_space_mark_object(struct nofl_space *space, struct gc_ref ref) {
+  uint8_t *loc = nofl_metadata_byte_for_object(ref);
+  uint8_t byte = *loc;
+  if (byte & space->marked_mask)
+    return 0;
+  uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0
+    | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
+  *loc = (byte & ~mask) | space->marked_mask;
+  return 1;
+}
+
+static uintptr_t
+nofl_make_evacuation_allocator_cursor(uintptr_t block, size_t allocated) {
+  GC_ASSERT(allocated < (NOFL_BLOCK_SIZE - 1) * (uint64_t) NOFL_BLOCK_SIZE);
+  return align_down(block, NOFL_BLOCK_SIZE) | (allocated / NOFL_BLOCK_SIZE);
+}
+
+static void
+nofl_prepare_evacuation_allocator(struct nofl_evacuation_allocator *alloc,
+                                  struct nofl_block_list *targets) {
+  uintptr_t first_block = targets->blocks;
+  atomic_store_explicit(&alloc->allocated, 0, memory_order_release);
+  alloc->limit =
+    atomic_load_explicit(&targets->count, memory_order_acquire) * NOFL_BLOCK_SIZE;
+  atomic_store_explicit(&alloc->block_cursor,
+                        nofl_make_evacuation_allocator_cursor(first_block, 0),
+                        memory_order_release);
+}
+
+static void
+nofl_clear_remaining_metadata_bytes_in_block(uintptr_t block,
+                                             uintptr_t allocated) {
+  GC_ASSERT((allocated & (NOFL_GRANULE_SIZE - 1)) == 0);
+  uintptr_t base = block + allocated;
+  uintptr_t limit = block + NOFL_BLOCK_SIZE;
+  uintptr_t granules = (limit - base) >> NOFL_GRANULE_SIZE_LOG_2;
+  GC_ASSERT(granules <= NOFL_GRANULES_PER_BLOCK);
+  memset(nofl_metadata_byte_for_addr(base), 0, granules);
+}
+
+static void
+nofl_finish_evacuation_allocator_block(uintptr_t block,
+                                       uintptr_t allocated) {
+  GC_ASSERT(allocated <= NOFL_BLOCK_SIZE);
+  struct nofl_block_summary *summary = nofl_block_summary_for_addr(block);
+  nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP);
+  size_t fragmentation = (NOFL_BLOCK_SIZE - allocated) >> NOFL_GRANULE_SIZE_LOG_2;
+  summary->hole_count = 1;
+  summary->free_granules = NOFL_GRANULES_PER_BLOCK;
+  summary->holes_with_fragmentation = fragmentation ? 1 : 0;
+  summary->fragmentation_granules = fragmentation;
+  if (fragmentation)
+    nofl_clear_remaining_metadata_bytes_in_block(block, allocated);
+}
+
+static void
+nofl_finish_evacuation_allocator(struct nofl_evacuation_allocator *alloc,
+                                 struct nofl_block_list *targets,
+                                 struct nofl_block_list *empties,
+                                 size_t reserve) {
+  // Blocks that we used for evacuation get returned to the mutator as
+  // sweepable blocks.  Blocks that we didn't get to use go to the
+  // empties.
+  size_t allocated = atomic_load_explicit(&alloc->allocated,
+                                          memory_order_acquire);
+  atomic_store_explicit(&alloc->allocated, 0, memory_order_release);
+  if (allocated > alloc->limit)
+    allocated = alloc->limit;
+  while (allocated >= NOFL_BLOCK_SIZE) {
+    uintptr_t block = nofl_pop_block(targets);
+    GC_ASSERT(block);
+    allocated -= NOFL_BLOCK_SIZE;
+  }
+  if (allocated) {
+    // Finish off the last partially-filled block.
+    uintptr_t block = nofl_pop_block(targets);
+    GC_ASSERT(block);
+    nofl_finish_evacuation_allocator_block(block, allocated);
+  }
+  size_t remaining = atomic_load_explicit(&targets->count, memory_order_acquire);
+  while (remaining-- > reserve)
+    nofl_push_block(empties, nofl_pop_block(targets));
+}
+
+static struct gc_ref
+nofl_evacuation_allocate(struct nofl_space *space, size_t granules) {
+  // All collector threads compete to allocate from what is logically a
+  // single bump-pointer arena, which is actually composed of a linked
+  // list of blocks.
+  struct nofl_evacuation_allocator *alloc = &space->evacuation_allocator;
+  uintptr_t cursor = atomic_load_explicit(&alloc->block_cursor,
+                                          memory_order_acquire);
+  size_t bytes = granules * NOFL_GRANULE_SIZE;
+  size_t prev = atomic_load_explicit(&alloc->allocated, memory_order_acquire);
+  size_t block_mask = (NOFL_BLOCK_SIZE - 1);
+  size_t next;
+  do {
+    if (prev >= alloc->limit)
+      // No more space.
+      return gc_ref_null();
+    next = prev + bytes;
+    if ((prev ^ next) & ~block_mask)
+      // Allocation straddles a block boundary; advance so it starts a
+      // fresh block.
+      next = (next & ~block_mask) + bytes;
+  } while (!atomic_compare_exchange_weak(&alloc->allocated, &prev, next));
+  // OK, we've claimed our memory, starting at next - bytes.  Now find
+  // the node in the linked list of evacuation targets that corresponds
+  // to this allocation pointer.
+  uintptr_t block = cursor & ~block_mask;
+  // This is the SEQ'th block to be allocated into.
+  uintptr_t seq = cursor & block_mask;
+  // Therefore this block handles allocations starting at SEQ*BLOCK_SIZE
+  // and continuing for NOFL_BLOCK_SIZE bytes.
+  uintptr_t base = seq * NOFL_BLOCK_SIZE;
+
+  while ((base ^ next) & ~block_mask) {
+    GC_ASSERT(base < next);
+    if (base + NOFL_BLOCK_SIZE > prev) {
+      // The allocation straddles a block boundary, and the cursor has
+      // caught up so that we identify the block for the previous
+      // allocation pointer.  Finish the previous block, probably
+      // leaving a small hole at the end.
+      nofl_finish_evacuation_allocator_block(block, prev - base);
+    }
+    // Cursor lags; advance it.
+    block = nofl_block_summary_next(nofl_block_summary_for_addr(block));
+    base += NOFL_BLOCK_SIZE;
+    if (base >= alloc->limit) {
+      // Ran out of blocks!
+      GC_ASSERT(!block);
+      return gc_ref_null();
+    }
+    GC_ASSERT(block);
+    // This store can race with other allocators, but that's OK as long
+    // as it never advances the cursor beyond the allocation pointer,
+    // which it won't because we updated the allocation pointer already.
+    atomic_store_explicit(&alloc->block_cursor,
+                          nofl_make_evacuation_allocator_cursor(block, base),
+                          memory_order_release);
+  }
+
+  uintptr_t addr = block + (next & block_mask) - bytes;
+  return gc_ref(addr);
+}
+
+static inline int
+nofl_space_evacuate_or_mark_object(struct nofl_space *space,
+                                   struct gc_edge edge,
+                                   struct gc_ref old_ref) {
+  uint8_t *metadata = nofl_metadata_byte_for_object(old_ref);
+  uint8_t byte = *metadata;
+  if (byte & space->marked_mask)
+    return 0;
+  if (space->evacuating &&
+      nofl_block_summary_has_flag(nofl_block_summary_for_addr(gc_ref_value(old_ref)),
+                                  NOFL_BLOCK_EVACUATE)) {
+    // This is an evacuating collection, and we are attempting to
+    // evacuate this block, and we are tracing this particular object
+    // for what appears to be the first time.
+    struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref);
+
+    if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED)
+      gc_atomic_forward_acquire(&fwd);
+
+    switch (fwd.state) {
+    case GC_FORWARDING_STATE_NOT_FORWARDED:
+    case GC_FORWARDING_STATE_ABORTED:
+      // Impossible.
+      GC_CRASH();
+    case GC_FORWARDING_STATE_ACQUIRED: {
+      // We claimed the object successfully; evacuating is up to us.
+      size_t object_granules = nofl_space_live_object_granules(metadata);
+      struct gc_ref new_ref = nofl_evacuation_allocate(space, object_granules);
+      if (gc_ref_is_heap_object(new_ref)) {
+        // Copy object contents before committing, as we don't know what
+        // part of the object (if any) will be overwritten by the
+        // commit.
+        memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref),
+               object_granules * NOFL_GRANULE_SIZE);
+        gc_atomic_forward_commit(&fwd, new_ref);
+        // Now update extent metadata, and indicate to the caller that
+        // the object's fields need to be traced.
+        uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref);
+        memcpy(new_metadata + 1, metadata + 1, object_granules - 1);
+        gc_edge_update(edge, new_ref);
+        metadata = new_metadata;
+        // Fall through to set mark bits.
+      } else {
+        // Well shucks; allocation failed, marking the end of
+        // opportunistic evacuation.  No future evacuation of this
+        // object will succeed.  Mark in place instead.
+        gc_atomic_forward_abort(&fwd);
+      }
+      break;
+    }
+    case GC_FORWARDING_STATE_BUSY:
+      // Someone else claimed this object first.  Spin until new address
+      // known, or evacuation aborts.
+      for (size_t spin_count = 0;; spin_count++) {
+        if (gc_atomic_forward_retry_busy(&fwd))
+          break;
+        yield_for_spin(spin_count);
+      }
+      if (fwd.state == GC_FORWARDING_STATE_ABORTED)
+        // Remove evacuation aborted; remote will mark and enqueue.
+        return 0;
+      ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED);
+      // Fall through.
+    case GC_FORWARDING_STATE_FORWARDED:
+      // The object has been evacuated already.  Update the edge;
+      // whoever forwarded the object will make sure it's eventually
+      // traced.
+      gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
+      return 0;
+    }
+  }
+
+  uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0
+    | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
+  *metadata = (byte & ~mask) | space->marked_mask;
+  return 1;
+}
+
+static inline int
+nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) {
+  return addr - space->low_addr < space->extent;
+}
+
+static inline int
+nofl_space_contains_conservative_ref(struct nofl_space *space,
+                                     struct gc_conservative_ref ref) {
+  return nofl_space_contains_address(space, gc_conservative_ref_value(ref));
+}
+
+static inline int
+nofl_space_contains(struct nofl_space *space, struct gc_ref ref) {
+  return nofl_space_contains_address(space, gc_ref_value(ref));
+}
+
+static int
+nofl_space_forward_or_mark_if_traced(struct nofl_space *space,
+                                     struct gc_edge edge,
+                                     struct gc_ref ref) {
+  uint8_t *metadata = nofl_metadata_byte_for_object(ref);
+  uint8_t byte = *metadata;
+  if (byte & space->marked_mask)
+    return 1;
+
+  if (!space->evacuating)
+    return 0;
+  if (!nofl_block_summary_has_flag(nofl_block_summary_for_addr(gc_ref_value(ref)),
+                                   NOFL_BLOCK_EVACUATE))
+    return 0;
+
+  struct gc_atomic_forward fwd = gc_atomic_forward_begin(ref);
+  switch (fwd.state) {
+  case GC_FORWARDING_STATE_NOT_FORWARDED:
+    return 0;
+  case GC_FORWARDING_STATE_BUSY:
+    // Someone else claimed this object first.  Spin until new address
+    // known, or evacuation aborts.
+    for (size_t spin_count = 0;; spin_count++) {
+      if (gc_atomic_forward_retry_busy(&fwd))
+        break;
+      yield_for_spin(spin_count);
+    }
+    if (fwd.state == GC_FORWARDING_STATE_ABORTED)
+      // Remote evacuation aborted; remote will mark and enqueue.
+      return 1;
+    ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED);
+    // Fall through.
+  case GC_FORWARDING_STATE_FORWARDED:
+    gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
+    return 1;
+  default:
+    GC_CRASH();
+  }
+}
+
+static inline struct gc_ref
+nofl_space_mark_conservative_ref(struct nofl_space *space,
+                                 struct gc_conservative_ref ref,
+                                 int possibly_interior) {
+  uintptr_t addr = gc_conservative_ref_value(ref);
+
+  if (possibly_interior) {
+    addr = align_down(addr, NOFL_GRANULE_SIZE);
+  } else {
+    // Addr not an aligned granule?  Not an object.
+    uintptr_t displacement = addr & (NOFL_GRANULE_SIZE - 1);
+    if (!gc_is_valid_conservative_ref_displacement(displacement))
+      return gc_ref_null();
+    addr -= displacement;
+  }
+
+  // Addr in meta block?  Not an object.
+  if ((addr & (NOFL_SLAB_SIZE - 1)) < NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE)
+    return gc_ref_null();
+
+  // Addr in block that has been paged out?  Not an object.
+  struct nofl_block_summary *summary = nofl_block_summary_for_addr(addr);
+  if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE))
+    return gc_ref_null();
+
+  uint8_t *loc = nofl_metadata_byte_for_addr(addr);
+  uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed);
+
+  // Already marked object?  Nothing to do.
+  if (byte & space->marked_mask)
+    return gc_ref_null();
+
+  // Addr is the not start of an unmarked object?  Search backwards if
+  // we have interior pointers, otherwise not an object.
+  uint8_t object_start_mask = space->live_mask | NOFL_METADATA_BYTE_YOUNG;
+  if (!(byte & object_start_mask)) {
+    if (!possibly_interior)
+      return gc_ref_null();
+
+    uintptr_t block_base = align_down(addr, NOFL_BLOCK_SIZE);
+    uint8_t *loc_base = nofl_metadata_byte_for_addr(block_base);
+    do {
+      // Searched past block?  Not an object.
+      if (loc-- == loc_base)
+        return gc_ref_null();
+
+      byte = atomic_load_explicit(loc, memory_order_relaxed);
+
+      // Ran into the end of some other allocation?  Not an object, then.
+      if (byte & NOFL_METADATA_BYTE_END)
+        return gc_ref_null();
+
+      // Continue until we find object start.
+    } while (!(byte & object_start_mask));
+
+    // Found object start, and object is unmarked; adjust addr.
+    addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE;
+  }
+
+  uint8_t mask = NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_MARK_0
+    | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
+  atomic_store_explicit(loc, (byte & ~mask) | space->marked_mask,
+                        memory_order_relaxed);
+
+  return gc_ref(addr);
+}
+
+static inline size_t
+nofl_space_object_size(struct nofl_space *space, struct gc_ref ref) {
+  uint8_t *loc = nofl_metadata_byte_for_object(ref);
+  size_t granules = nofl_space_live_object_granules(loc);
+  return granules * NOFL_GRANULE_SIZE;
+}
+
+static void
+nofl_push_unavailable_block(struct nofl_space *space, uintptr_t block) {
+  struct nofl_block_summary *summary = nofl_block_summary_for_addr(block);
+  GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP));
+  GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE));
+  nofl_block_summary_set_flag(summary, NOFL_BLOCK_UNAVAILABLE);
+  madvise((void*)block, NOFL_BLOCK_SIZE, MADV_DONTNEED);
+  nofl_push_block(&space->unavailable, block);
+}
+
+static uintptr_t
+nofl_pop_unavailable_block(struct nofl_space *space) {
+  uintptr_t block = nofl_pop_block(&space->unavailable);
+  if (!block)
+    return 0;
+  struct nofl_block_summary *summary = nofl_block_summary_for_addr(block);
+  GC_ASSERT(nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE));
+  nofl_block_summary_clear_flag(summary, NOFL_BLOCK_UNAVAILABLE);
+  return block;
+}
+
+static uintptr_t
+nofl_pop_empty_block(struct nofl_space *space) {
+  return nofl_pop_block(&space->empty);
+}
+
+static int
+nofl_maybe_push_evacuation_target(struct nofl_space *space,
+                                  uintptr_t block, double reserve) {
+  GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block),
+                                         NOFL_BLOCK_NEEDS_SWEEP));
+  size_t targets = atomic_load_explicit(&space->evacuation_targets.count,
+                                        memory_order_acquire);
+  size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
+  size_t unavailable = atomic_load_explicit(&space->unavailable.count,
+                                            memory_order_acquire);
+  if (targets >= (total - unavailable) * reserve)
+    return 0;
+
+  nofl_push_block(&space->evacuation_targets, block);
+  return 1;
+}
+
+static int
+nofl_push_evacuation_target_if_needed(struct nofl_space *space,
+                                      uintptr_t block) {
+  return nofl_maybe_push_evacuation_target(space, block,
+                                           space->evacuation_minimum_reserve);
+}
+
+static int
+nofl_push_evacuation_target_if_possible(struct nofl_space *space,
+                                        uintptr_t block) {
+  return nofl_maybe_push_evacuation_target(space, block,
+                                           space->evacuation_reserve);
+}
+
+static void
+nofl_push_empty_block(struct nofl_space *space, uintptr_t block) {
+  GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block),
+                                         NOFL_BLOCK_NEEDS_SWEEP));
+  nofl_push_block(&space->empty, block);
+}
+
+static ssize_t
+nofl_space_request_release_memory(struct nofl_space *space, size_t bytes) {
+  return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes;
+}
+
+static void
+nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) {
+  ssize_t pending =
+    atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes;
+  while (pending + NOFL_BLOCK_SIZE <= 0) {
+    uintptr_t block = nofl_pop_unavailable_block(space);
+    GC_ASSERT(block);
+    if (nofl_push_evacuation_target_if_needed(space, block))
+      continue;
+    nofl_push_empty_block(space, block);
+    pending = atomic_fetch_add(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE)
+      + NOFL_BLOCK_SIZE;
+  }
+}
+
+static size_t
+nofl_allocator_next_hole(struct nofl_allocator *alloc,
+                         struct nofl_space *space);
+
+static int
+nofl_space_sweep_until_memory_released(struct nofl_space *space,
+                                       struct nofl_allocator *alloc) {
+  ssize_t pending = atomic_load_explicit(&space->pending_unavailable_bytes,
+                                         memory_order_acquire);
+  // First try to unmap previously-identified empty blocks.  If pending
+  // > 0 and other mutators happen to identify empty blocks, they will
+  // be unmapped directly and moved to the unavailable list.
+  while (pending > 0) {
+    uintptr_t block = nofl_pop_empty_block(space);
+    if (!block)
+      break;
+    // Note that we may have competing uses; if we're evacuating,
+    // perhaps we should push this block to the evacuation target list.
+    // That would enable us to reach a fragmentation low water-mark in
+    // fewer cycles.  But maybe evacuation started in order to obtain
+    // free blocks for large objects; in that case we should just reap
+    // the fruits of our labor.  Probably this second use-case is more
+    // important.
+    nofl_push_unavailable_block(space, block);
+    pending = atomic_fetch_sub(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE);
+    pending -= NOFL_BLOCK_SIZE;
+  }
+  // Otherwise, sweep, transitioning any empty blocks to unavailable and
+  // throwing away any non-empty block.  A bit wasteful but hastening
+  // the next collection is a reasonable thing to do here.
+  while (pending > 0) {
+    if (!nofl_allocator_next_hole(alloc, space))
+      return 0;
+    pending = atomic_load_explicit(&space->pending_unavailable_bytes,
+                                   memory_order_acquire);
+  }
+  return pending <= 0;
+}
+
+static inline int
+nofl_is_ephemeron(struct gc_ref ref) {
+  uint8_t meta = *nofl_metadata_byte_for_addr(gc_ref_value(ref));
+  return meta & NOFL_METADATA_BYTE_EPHEMERON;
+}
+
+static void
+nofl_space_set_ephemeron_flag(struct gc_ref ref) {
+  if (gc_has_conservative_intraheap_edges()) {
+    uint8_t *metadata = nofl_metadata_byte_for_addr(gc_ref_value(ref));
+    *metadata |= NOFL_METADATA_BYTE_EPHEMERON;
+  }
+}
+
+static void nofl_finish_sweeping(struct nofl_allocator *alloc,
+                                 struct nofl_space *space);
+static void nofl_finish_sweeping_in_block(struct nofl_allocator *alloc,
+                                          struct nofl_space *space);
+
+// Note that it's quite possible (and even likely) that any given remset
+// byte doesn't hold any roots, if all stores were to nursery objects.
+STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0);
+static void
+nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab,
+                      size_t card,
+                      void (*enqueue)(struct gc_ref, struct gc_heap*),
+                      struct gc_heap *heap) {
+  uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0];
+  size_t granule_base = card * NOFL_GRANULES_PER_REMSET_BYTE;
+  for (size_t granule_in_remset = 0;
+       granule_in_remset < NOFL_GRANULES_PER_REMSET_BYTE;
+       granule_in_remset += 8, granule_base += 8) {
+    uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + granule_base);
+    mark_bytes &= space->sweep_mask;
+    while (mark_bytes) {
+      size_t granule_offset = count_zero_bytes(mark_bytes);
+      mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8));
+      size_t granule = granule_base + granule_offset;
+      uintptr_t addr = first_addr_in_slab + granule * NOFL_GRANULE_SIZE;
+      GC_ASSERT(nofl_metadata_byte_for_addr(addr) == &slab->metadata[granule]);
+      enqueue(gc_ref(addr), heap);
+    }
+  }
+}
+
+static void
+nofl_space_trace_remembered_set(struct nofl_space *space,
+                                void (*enqueue)(struct gc_ref,
+                                                struct gc_heap*),
+                                struct gc_heap *heap) {
+  GC_ASSERT(!space->evacuating);
+  for (size_t s = 0; s < space->nslabs; s++) {
+    struct nofl_slab *slab = &space->slabs[s];
+    uint8_t *remset = slab->remembered_set;
+    for (size_t card_base = 0;
+         card_base < NOFL_REMSET_BYTES_PER_SLAB;
+         card_base += 8) {
+      uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
+      if (!remset_bytes) continue;
+      memset(remset + card_base, 0, 8);
+      while (remset_bytes) {
+        size_t card_offset = count_zero_bytes(remset_bytes);
+        remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
+        nofl_space_trace_card(space, slab, card_base + card_offset,
+                              enqueue, heap);
+      }
+    }
+  }
+}
+
+static void
+nofl_space_clear_remembered_set(struct nofl_space *space) {
+  if (!GC_GENERATIONAL) return;
+  for (size_t slab = 0; slab < space->nslabs; slab++) {
+    memset(space->slabs[slab].remembered_set, 0, NOFL_REMSET_BYTES_PER_SLAB);
+  }
+}
+
+static void
+nofl_space_reset_sweeper(struct nofl_space *space) {
+  space->next_block = (uintptr_t) &space->slabs[0].blocks;
+}
+
+static void
+nofl_space_update_mark_patterns(struct nofl_space *space,
+                                int advance_mark_mask) {
+  uint8_t survivor_mask = space->marked_mask;
+  uint8_t next_marked_mask = nofl_rotate_dead_survivor_marked(survivor_mask);
+  if (advance_mark_mask)
+    space->marked_mask = next_marked_mask;
+  space->live_mask = survivor_mask | next_marked_mask;
+  space->sweep_mask = broadcast_byte(space->live_mask);
+}
+
+static void
+nofl_space_reset_statistics(struct nofl_space *space) {
+  space->granules_freed_by_last_collection = 0;
+  space->fragmentation_granules_since_last_collection = 0;
+}
+
+static size_t
+nofl_space_yield(struct nofl_space *space) {
+  return space->granules_freed_by_last_collection * NOFL_GRANULE_SIZE;
+}
+
+static size_t
+nofl_space_evacuation_reserve(struct nofl_space *space) {
+  return atomic_load_explicit(&space->evacuation_targets.count,
+                              memory_order_acquire) * NOFL_BLOCK_SIZE;
+}
+
+static size_t
+nofl_space_fragmentation(struct nofl_space *space) {
+  size_t granules = space->fragmentation_granules_since_last_collection;
+  return granules * NOFL_GRANULE_SIZE;
+}
+
+static void
+nofl_space_release_evacuation_target_blocks(struct nofl_space *space) {
+  // Move excess evacuation target blocks back to empties.
+  size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
+  size_t unavailable = atomic_load_explicit(&space->unavailable.count,
+                                            memory_order_acquire);
+  size_t reserve = space->evacuation_minimum_reserve * (total - unavailable);
+  nofl_finish_evacuation_allocator(&space->evacuation_allocator,
+                                   &space->evacuation_targets,
+                                   &space->empty,
+                                   reserve);
+}
+
+static void
+nofl_space_prepare_for_evacuation(struct nofl_space *space,
+                                  enum gc_collection_kind gc_kind) {
+  if (gc_kind != GC_COLLECTION_COMPACTING) {
+    space->evacuating = 0;
+    space->evacuation_reserve = space->evacuation_minimum_reserve;
+    return;
+  }
+
+  // Put the mutator into evacuation mode, collecting up to 50% of free space as
+  // evacuation blocks.
+  space->evacuation_reserve = 0.5;
+
+  size_t target_blocks = space->evacuation_targets.count;
+  DEBUG("evacuation target block count: %zu\n", target_blocks);
+
+  if (target_blocks == 0) {
+    DEBUG("no evacuation target blocks, disabling evacuation for this round\n");
+    space->evacuating = 0;
+    return;
+  }
+
+  size_t target_granules = target_blocks * NOFL_GRANULES_PER_BLOCK;
+  // Compute histogram where domain is the number of granules in a block
+  // that survived the last collection, aggregated into 33 buckets, and
+  // range is number of blocks in that bucket.  (Bucket 0 is for blocks
+  // that were found to be completely empty; such blocks may be on the
+  // evacuation target list.)
+  const size_t bucket_count = 33;
+  size_t histogram[33] = {0,};
+  size_t bucket_size = NOFL_GRANULES_PER_BLOCK / 32;
+  size_t empties = 0;
+  for (size_t slab = 0; slab < space->nslabs; slab++) {
+    for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) {
+      struct nofl_block_summary *summary = &space->slabs[slab].summaries[block];
+      if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE))
+        continue;
+      if (!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) {
+        empties++;
+        continue;
+      }
+      size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - summary->free_granules;
+      size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size;
+      histogram[bucket]++;
+    }
+  }
+
+  // Blocks which lack the NEEDS_SWEEP flag are empty, either because
+  // they have been removed from the pool and have the UNAVAILABLE flag
+  // set, or because they are on the empties or evacuation target
+  // lists.  When evacuation starts, the empties list should be empty.
+  GC_ASSERT(empties == target_blocks);
+
+  // Now select a number of blocks that is likely to fill the space in
+  // the target blocks.  Prefer candidate blocks with fewer survivors
+  // from the last GC, to increase expected free block yield.
+  for (size_t bucket = 0; bucket < bucket_count; bucket++) {
+    size_t bucket_granules = bucket * bucket_size * histogram[bucket];
+    if (bucket_granules <= target_granules) {
+      target_granules -= bucket_granules;
+    } else {
+      histogram[bucket] = target_granules / (bucket_size * bucket);
+      target_granules = 0;
+    }
+  }
+
+  // Having selected the number of blocks, now we set the evacuation
+  // candidate flag on all blocks.
+  for (size_t slab = 0; slab < space->nslabs; slab++) {
+    for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) {
+      struct nofl_block_summary *summary = &space->slabs[slab].summaries[block];
+      if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE))
+        continue;
+      if (!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP))
+        continue;
+      size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - summary->free_granules;
+      size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size;
+      if (histogram[bucket]) {
+        nofl_block_summary_set_flag(summary, NOFL_BLOCK_EVACUATE);
+        histogram[bucket]--;
+      } else {
+        nofl_block_summary_clear_flag(summary, NOFL_BLOCK_EVACUATE);
+      }
+    }
+  }
+
+  // We are ready to evacuate!
+  nofl_prepare_evacuation_allocator(&space->evacuation_allocator,
+                                    &space->evacuation_targets);
+  space->evacuating = 1;
+}
+
+static void
+nofl_space_verify_before_restart(struct nofl_space *space) {
+  // Iterate objects in each block, verifying that the END bytes correspond to
+  // the measured object size.
+  for (size_t slab = 0; slab < space->nslabs; slab++) {
+    for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) {
+      struct nofl_block_summary *summary = &space->slabs[slab].summaries[block];
+      if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE))
+        continue;
+
+      uintptr_t addr = (uintptr_t)space->slabs[slab].blocks[block].data;
+      uintptr_t limit = addr + NOFL_BLOCK_SIZE;
+      uint8_t *meta = nofl_metadata_byte_for_addr(addr);
+      while (addr < limit) {
+        if (meta[0] & space->live_mask) {
+          struct gc_ref obj = gc_ref(addr);
+          size_t obj_bytes;
+          gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes);
+          size_t granules = nofl_size_to_granules(obj_bytes);
+          GC_ASSERT(granules);
+          for (size_t granule = 0; granule < granules - 1; granule++)
+            GC_ASSERT(!(meta[granule] & NOFL_METADATA_BYTE_END));
+          GC_ASSERT(meta[granules - 1] & NOFL_METADATA_BYTE_END);
+          meta += granules;
+          addr += granules * NOFL_GRANULE_SIZE;
+        } else {
+          meta++;
+          addr += NOFL_GRANULE_SIZE;
+        }
+      }
+      GC_ASSERT(addr == limit);
+    }
+  }
+}
+
+static void
+nofl_space_finish_gc(struct nofl_space *space,
+                     enum gc_collection_kind gc_kind) {
+  space->evacuating = 0;
+  space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR);
+  nofl_space_reset_sweeper(space);
+  nofl_space_update_mark_patterns(space, 0);
+  nofl_space_reset_statistics(space);
+  nofl_space_release_evacuation_target_blocks(space);
+  if (GC_DEBUG)
+    nofl_space_verify_before_restart(space);
+}
+
+static int
+nofl_sweep_byte(uint8_t *loc, uintptr_t sweep_mask) {
+  uint8_t metadata = atomic_load_explicit(loc, memory_order_relaxed);
+  // If the metadata byte is nonzero, that means either a young, dead,
+  // survived, or marked object.  If it's live (survived or marked), we
+  // found the next mark.  Otherwise it's dead and we clear the byte.
+  // If we see an END, that means an end of a dead object; clear it.
+  if (metadata) {
+    if (metadata & sweep_mask)
+      return 1;
+    atomic_store_explicit(loc, 0, memory_order_relaxed);
+  }
+  return 0;
+}
+
+static int
+nofl_sweep_word(uintptr_t *loc, uintptr_t sweep_mask) {
+  uintptr_t metadata = atomic_load_explicit(loc, memory_order_relaxed);
+  if (metadata) {
+    if (metadata & sweep_mask)
+      return 1;
+    atomic_store_explicit(loc, 0, memory_order_relaxed);
+  }
+  return 0;
+}
+
+static uintptr_t
+nofl_space_next_block_to_sweep(struct nofl_space *space) {
+  uintptr_t block = atomic_load_explicit(&space->next_block,
+                                         memory_order_acquire);
+  uintptr_t next_block;
+  do {
+    if (block == 0)
+      return 0;
+
+    next_block = block + NOFL_BLOCK_SIZE;
+    if (next_block % NOFL_SLAB_SIZE == 0) {
+      uintptr_t hi_addr = space->low_addr + space->extent;
+      if (next_block == hi_addr)
+        next_block = 0;
+      else
+        next_block += NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE;
+    }
+  } while (!atomic_compare_exchange_weak(&space->next_block, &block,
+                                         next_block));
+  return block;
+}
+
+static void
+nofl_allocator_release_block(struct nofl_allocator *alloc) {
+  alloc->alloc = alloc->sweep = alloc->block = 0;
+}
+
+static void
+nofl_allocator_finish_block(struct nofl_allocator *alloc,
+                            struct nofl_space *space) {
+  GC_ASSERT(alloc->block);
+  struct nofl_block_summary *block = nofl_block_summary_for_addr(alloc->block);
+  atomic_fetch_add(&space->granules_freed_by_last_collection,
+                   block->free_granules);
+  atomic_fetch_add(&space->fragmentation_granules_since_last_collection,
+                   block->fragmentation_granules);
+
+  // If this block has mostly survivors, we should avoid sweeping it and
+  // trying to allocate into it for a minor GC.  Sweep it next time to
+  // clear any garbage allocated in this cycle and mark it as
+  // "venerable" (i.e., old).
+  GC_ASSERT(!nofl_block_summary_has_flag(block, NOFL_BLOCK_VENERABLE));
+  if (!nofl_block_summary_has_flag(block, NOFL_BLOCK_VENERABLE_AFTER_SWEEP) &&
+      block->free_granules < NOFL_GRANULES_PER_BLOCK * space->venerable_threshold)
+    nofl_block_summary_set_flag(block, NOFL_BLOCK_VENERABLE_AFTER_SWEEP);
+
+  nofl_allocator_release_block(alloc);
+}
+
+// Sweep some heap to reclaim free space, resetting alloc->alloc and
+// alloc->sweep.  Return the size of the hole in granules.
+static size_t
+nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc,
+                                  struct nofl_space *space) {
+  uintptr_t sweep = alloc->sweep;
+  if (sweep == 0)
+    return 0;
+  uintptr_t limit = alloc->block + NOFL_BLOCK_SIZE;
+  uintptr_t sweep_mask = space->sweep_mask;
+
+  while (sweep != limit) {
+    GC_ASSERT((sweep & (NOFL_GRANULE_SIZE - 1)) == 0);
+    uint8_t* metadata = nofl_metadata_byte_for_addr(sweep);
+    size_t limit_granules = (limit - sweep) >> NOFL_GRANULE_SIZE_LOG_2;
+
+    // Except for when we first get a block, alloc->sweep is positioned
+    // right after a hole, which can point to either the end of the
+    // block or to a live object.  Assume that a live object is more
+    // common.
+    {
+      size_t live_granules = 0;
+      while (limit_granules && (metadata[0] & sweep_mask)) {
+        // Object survived collection; skip over it and continue sweeping.
+        size_t object_granules = nofl_space_live_object_granules(metadata);
+        live_granules += object_granules;
+        limit_granules -= object_granules;
+        metadata += object_granules;
+      }
+      if (!limit_granules)
+        break;
+      sweep += live_granules * NOFL_GRANULE_SIZE;
+    }
+
+    size_t free_granules = scan_for_byte(metadata, limit_granules, sweep_mask);
+    GC_ASSERT(free_granules);
+    GC_ASSERT(free_granules <= limit_granules);
+
+    struct nofl_block_summary *summary = nofl_block_summary_for_addr(sweep);
+    summary->hole_count++;
+    GC_ASSERT(free_granules <= NOFL_GRANULES_PER_BLOCK - summary->free_granules);
+    summary->free_granules += free_granules;
+
+    size_t free_bytes = free_granules * NOFL_GRANULE_SIZE;
+    alloc->alloc = sweep;
+    alloc->sweep = sweep + free_bytes;
+    return free_granules;
+  }
+
+  nofl_allocator_finish_block(alloc, space);
+  return 0;
+}
+
+static void
+nofl_allocator_finish_hole(struct nofl_allocator *alloc) {
+  size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE;
+  if (granules) {
+    struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block);
+    summary->holes_with_fragmentation++;
+    summary->fragmentation_granules += granules;
+    uint8_t *metadata = nofl_metadata_byte_for_addr(alloc->alloc);
+    memset(metadata, 0, granules);
+    alloc->alloc = alloc->sweep;
+  }
+  // FIXME: add to fragmentation
+}
+
+static int
+nofl_maybe_release_swept_empty_block(struct nofl_allocator *alloc,
+                                     struct nofl_space *space) {
+  GC_ASSERT(alloc->block);
+  uintptr_t block = alloc->block;
+  if (atomic_load_explicit(&space->pending_unavailable_bytes,
+                           memory_order_acquire) <= 0)
+    return 0;
+
+  nofl_push_unavailable_block(space, block);
+  atomic_fetch_sub(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE);
+  nofl_allocator_release_block(alloc);
+  return 1;
+}
+
+static size_t
+nofl_allocator_next_hole(struct nofl_allocator *alloc,
+                         struct nofl_space *space) {
+  nofl_allocator_finish_hole(alloc);
+  // As we sweep if we find that a block is empty, we return it to the
+  // empties list.  Empties are precious.  But if we return 10 blocks in
+  // a row, and still find an 11th empty, go ahead and use it.
+  size_t empties_countdown = 10;
+  while (1) {
+    // Sweep current block for a hole.
+    size_t granules = nofl_allocator_next_hole_in_block(alloc, space);
+    if (granules) {
+      // If the hole spans only part of a block, let the allocator try
+      // to use it.
+      if (granules < NOFL_GRANULES_PER_BLOCK)
+        return granules;
+      struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block);
+      memset(nofl_metadata_byte_for_addr(alloc->block), 0, NOFL_GRANULES_PER_BLOCK);
+      nofl_block_summary_clear_flag(summary, NOFL_BLOCK_NEEDS_SWEEP);
+      // Sweeping found a completely empty block.  If we are below the
+      // minimum evacuation reserve, take the block.
+      if (nofl_push_evacuation_target_if_needed(space, alloc->block)) {
+        nofl_allocator_release_block(alloc);
+        continue;
+      }
+      // If we have pending pages to release to the OS, we should unmap
+      // this block.
+      if (nofl_maybe_release_swept_empty_block(alloc, space))
+        continue;
+      // Otherwise if we've already returned lots of empty blocks to the
+      // freelist, let the allocator keep this block.
+      if (!empties_countdown) {
+        // After this block is allocated into, it will need to be swept.
+        nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP);
+        return granules;
+      }
+      // Otherwise we push to the empty blocks list.
+      nofl_push_empty_block(space, alloc->block);
+      nofl_allocator_release_block(alloc);
+      empties_countdown--;
+    }
+    GC_ASSERT(alloc->block == 0);
+    while (1) {
+      uintptr_t block = nofl_space_next_block_to_sweep(space);
+      if (block) {
+        // Sweeping found a block.  We might take it for allocation, or
+        // we might send it back.
+        struct nofl_block_summary *summary = nofl_block_summary_for_addr(block);
+        // If it's marked unavailable, it's already on a list of
+        // unavailable blocks, so skip and get the next block.
+        if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE))
+          continue;
+        if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE)) {
+          // Skip venerable blocks after a minor GC -- we don't need to
+          // sweep as they weren't allocated into last cycle, and the
+          // mark bytes didn't rotate, so we have no cleanup to do; and
+          // we shouldn't try to allocate into them as it's not worth
+          // it.  Any wasted space is measured as fragmentation.
+          if (space->last_collection_was_minor)
+            continue;
+          else
+            nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE);
+        }
+        if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) {
+          // Prepare to sweep the block for holes.
+          alloc->alloc = alloc->sweep = alloc->block = block;
+          if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP)) {
+            // In the last cycle we noted that this block consists of
+            // mostly old data.  Sweep any garbage, commit the mark as
+            // venerable, and avoid allocating into it.
+            nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP);
+            if (space->last_collection_was_minor) {
+              nofl_finish_sweeping_in_block(alloc, space);
+              nofl_block_summary_set_flag(summary, NOFL_BLOCK_VENERABLE);
+              continue;
+            }
+          }
+          // This block was marked in the last GC and needs sweeping.
+          // As we sweep we'll want to record how many bytes were live
+          // at the last collection.  As we allocate we'll record how
+          // many granules were wasted because of fragmentation.
+          summary->hole_count = 0;
+          summary->free_granules = 0;
+          summary->holes_with_fragmentation = 0;
+          summary->fragmentation_granules = 0;
+          break;
+        } else {
+          // Otherwise this block is completely empty and is on the
+          // empties list.  We take from the empties list only after all
+          // the NEEDS_SWEEP blocks are processed.
+          continue;
+        }
+      } else {
+        // We are done sweeping for blocks.  Now take from the empties
+        // list.
+        block = nofl_pop_empty_block(space);
+        // No empty block?  Return 0 to cause collection.
+        if (!block)
+          return 0;
+
+        // Maybe we should use this empty as a target for evacuation.
+        if (nofl_push_evacuation_target_if_possible(space, block))
+          continue;
+
+        // Otherwise give the block to the allocator.
+        struct nofl_block_summary *summary = nofl_block_summary_for_addr(block);
+        nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP);
+        summary->hole_count = 1;
+        summary->free_granules = NOFL_GRANULES_PER_BLOCK;
+        summary->holes_with_fragmentation = 0;
+        summary->fragmentation_granules = 0;
+        alloc->block = block;
+        alloc->alloc = block;
+        alloc->sweep = block + NOFL_BLOCK_SIZE;
+        return NOFL_GRANULES_PER_BLOCK;
+      }
+    }
+  }
+}
+
+static void
+nofl_finish_sweeping_in_block(struct nofl_allocator *alloc,
+                              struct nofl_space *space) {
+  do {
+    nofl_allocator_finish_hole(alloc);
+  } while (nofl_allocator_next_hole_in_block(alloc, space));
+}
+
+// Another thread is triggering GC.  Before we stop, finish clearing the
+// dead mark bytes for the mutator's block, and release the block.
+static void
+nofl_finish_sweeping(struct nofl_allocator *alloc,
+                     struct nofl_space *space) {
+  while (nofl_allocator_next_hole(alloc, space)) {}
+}
+
+static struct gc_ref
+nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space,
+              size_t size, void (*gc)(void*), void *gc_data) {
+  GC_ASSERT(size > 0);
+  GC_ASSERT(size <= gc_allocator_large_threshold());
+  size = align_up(size, NOFL_GRANULE_SIZE);
+
+  if (alloc->alloc + size > alloc->sweep) {
+    size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2;
+    while (1) {
+      size_t hole = nofl_allocator_next_hole(alloc, space);
+      if (hole >= granules) {
+        nofl_clear_memory(alloc->alloc, hole * NOFL_GRANULE_SIZE);
+        break;
+      }
+      if (!hole)
+        gc(gc_data);
+    }
+  }
+
+  struct gc_ref ret = gc_ref(alloc->alloc);
+  alloc->alloc += size;
+  gc_update_alloc_table(ret, size);
+  return ret;
+}
+
+static struct nofl_slab*
+nofl_allocate_slabs(size_t nslabs) {
+  size_t size = nslabs * NOFL_SLAB_SIZE;
+  size_t extent = size + NOFL_SLAB_SIZE;
+
+  char *mem = mmap(NULL, extent, PROT_READ|PROT_WRITE,
+                   MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    perror("mmap failed");
+    return NULL;
+  }
+
+  uintptr_t base = (uintptr_t) mem;
+  uintptr_t end = base + extent;
+  uintptr_t aligned_base = align_up(base, NOFL_SLAB_SIZE);
+  uintptr_t aligned_end = aligned_base + size;
+
+  if (aligned_base - base)
+    munmap((void*)base, aligned_base - base);
+  if (end - aligned_end)
+    munmap((void*)aligned_end, end - aligned_end);
+
+  return (struct nofl_slab*) aligned_base;
+}
+
+static int
+nofl_space_init(struct nofl_space *space, size_t size, int atomic,
+                double venerable_threshold) {
+  size = align_up(size, NOFL_BLOCK_SIZE);
+  size_t reserved = align_up(size, NOFL_SLAB_SIZE);
+  size_t nslabs = reserved / NOFL_SLAB_SIZE;
+  struct nofl_slab *slabs = nofl_allocate_slabs(nslabs);
+  if (!slabs)
+    return 0;
+
+  space->marked_mask = NOFL_METADATA_BYTE_MARK_0;
+  nofl_space_update_mark_patterns(space, 0);
+  space->slabs = slabs;
+  space->nslabs = nslabs;
+  space->low_addr = (uintptr_t) slabs;
+  space->extent = reserved;
+  space->next_block = 0;
+  space->evacuation_minimum_reserve = 0.02;
+  space->evacuation_reserve = space->evacuation_minimum_reserve;
+  space->venerable_threshold = venerable_threshold;
+  for (size_t slab = 0; slab < nslabs; slab++) {
+    for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) {
+      uintptr_t addr = (uintptr_t)slabs[slab].blocks[block].data;
+      if (reserved > size) {
+        nofl_push_unavailable_block(space, addr);
+        reserved -= NOFL_BLOCK_SIZE;
+      } else {
+        if (!nofl_push_evacuation_target_if_needed(space, addr))
+          nofl_push_empty_block(space, addr);
+      }
+    }
+  }
+  return 1;
+}
+
+#endif // NOFL_SPACE_H
diff --git a/src/swar.h b/src/swar.h
new file mode 100644
index 000000000..293d99ec2
--- /dev/null
+++ b/src/swar.h
@@ -0,0 +1,51 @@
+#ifndef SWAR_H
+#define SWAR_H
+
+#include <string.h>
+
+static inline size_t
+count_zero_bytes(uint64_t bytes) {
+  return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
+}
+
+static uint64_t
+broadcast_byte(uint8_t byte) {
+  uint64_t result = byte;
+  return result * 0x0101010101010101ULL;
+}
+
+static inline uint64_t
+load_eight_aligned_bytes(uint8_t *ptr) {
+  GC_ASSERT(((uintptr_t)ptr & 7) == 0);
+  uint8_t * __attribute__((aligned(8))) aligned_ptr = ptr;
+  uint64_t word;
+  memcpy(&word, aligned_ptr, 8);
+#ifdef WORDS_BIGENDIAN
+  word = __builtin_bswap64(word);
+#endif
+  return word;
+}
+
+static size_t
+scan_for_byte(uint8_t *ptr, size_t limit, uint64_t mask) {
+  size_t n = 0;
+  size_t unaligned = ((uintptr_t) ptr) & 7;
+  if (unaligned) {
+    uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8);
+    bytes &= mask;
+    if (bytes)
+      return count_zero_bytes(bytes);
+    n += 8 - unaligned;
+  }
+
+  for(; n < limit; n += 8) {
+    uint64_t bytes = load_eight_aligned_bytes(ptr + n);
+    bytes &= mask;
+    if (bytes)
+      return n + count_zero_bytes(bytes);
+  }
+
+  return limit;
+}
+
+#endif // SWAR_H
diff --git a/src/whippet.c b/src/whippet.c
index 2f77b251e..76f8f1ed5 100644
--- a/src/whippet.c
+++ b/src/whippet.c
@@ -19,6 +19,7 @@
 #include "gc-stack.h"
 #include "gc-trace.h"
 #include "large-object-space.h"
+#include "nofl-space.h"
 #if GC_PARALLEL
 #include "parallel-tracer.h"
 #else
@@ -27,269 +28,10 @@
 #include "spin.h"
 #include "whippet-attrs.h"
 
-#define GRANULE_SIZE 16
-#define GRANULE_SIZE_LOG_2 4
-#define MEDIUM_OBJECT_THRESHOLD 256
-#define MEDIUM_OBJECT_GRANULE_THRESHOLD 16
 #define LARGE_OBJECT_THRESHOLD 8192
-#define LARGE_OBJECT_GRANULE_THRESHOLD 512
-
-STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
-STATIC_ASSERT_EQ(MEDIUM_OBJECT_THRESHOLD,
-                 MEDIUM_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
-STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
-                 LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
-
-// Each granule has one mark byte stored in a side table.  A granule's
-// mark state is a whole byte instead of a bit to facilitate parallel
-// marking.  (Parallel markers are allowed to race.)  We also use this
-// byte to compute object extent, via a bit flag indicating
-// end-of-object.
-//
-// Because we want to allow for conservative roots, we need to know
-// whether an address indicates an object or not.  That means that when
-// an object is allocated, it has to set a bit, somewhere.  We use the
-// metadata byte for this purpose, setting the "young" bit.
-//
-// The "young" bit's name might make you think about generational
-// collection, and indeed all objects collected in a minor collection
-// will have this bit set.  However, whippet never needs to check for
-// the young bit; if it weren't for the need to identify conservative
-// roots, we wouldn't need a young bit at all.  Perhaps in an
-// all-precise system, we would be able to avoid the overhead of
-// initializing mark byte upon each fresh allocation.
-//
-// When an object becomes dead after a GC, it will still have a bit set
-// -- maybe the young bit, or maybe a survivor bit.  The sweeper has to
-// clear these bits before the next collection.  But, for concurrent
-// marking, we will also be marking "live" objects, updating their mark
-// bits.  So there are four object states concurrently observable:
-// young, dead, survivor, and marked.  (If we didn't have concurrent
-// marking we would still need the "marked" state, because marking
-// mutator roots before stopping is also a form of concurrent marking.)
-// Even though these states are mutually exclusive, we use separate bits
-// for them because we have the space.  After each collection, the dead,
-// survivor, and marked states rotate by one bit.
-enum metadata_byte {
-  METADATA_BYTE_NONE = 0,
-  METADATA_BYTE_YOUNG = 1,
-  METADATA_BYTE_MARK_0 = 2,
-  METADATA_BYTE_MARK_1 = 4,
-  METADATA_BYTE_MARK_2 = 8,
-  METADATA_BYTE_END = 16,
-  METADATA_BYTE_EPHEMERON = 32,
-  METADATA_BYTE_PINNED = 64,
-  METADATA_BYTE_UNUSED_1 = 128
-};
-
-static uint8_t rotate_dead_survivor_marked(uint8_t mask) {
-  uint8_t all =
-    METADATA_BYTE_MARK_0 | METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
-  return ((mask << 1) | (mask >> 2)) & all;
-}
-
-#define SLAB_SIZE (4 * 1024 * 1024)
-#define BLOCK_SIZE (64 * 1024)
-#define METADATA_BYTES_PER_BLOCK (BLOCK_SIZE / GRANULE_SIZE)
-#define BLOCKS_PER_SLAB (SLAB_SIZE / BLOCK_SIZE)
-#define META_BLOCKS_PER_SLAB (METADATA_BYTES_PER_BLOCK * BLOCKS_PER_SLAB / BLOCK_SIZE)
-#define NONMETA_BLOCKS_PER_SLAB (BLOCKS_PER_SLAB - META_BLOCKS_PER_SLAB)
-#define METADATA_BYTES_PER_SLAB (NONMETA_BLOCKS_PER_SLAB * METADATA_BYTES_PER_BLOCK)
-#define SLACK_METADATA_BYTES_PER_SLAB (META_BLOCKS_PER_SLAB * METADATA_BYTES_PER_BLOCK)
-#define REMSET_BYTES_PER_BLOCK (SLACK_METADATA_BYTES_PER_SLAB / BLOCKS_PER_SLAB)
-#define REMSET_BYTES_PER_SLAB (REMSET_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB)
-#define SLACK_REMSET_BYTES_PER_SLAB (REMSET_BYTES_PER_BLOCK * META_BLOCKS_PER_SLAB)
-#define SUMMARY_BYTES_PER_BLOCK (SLACK_REMSET_BYTES_PER_SLAB / BLOCKS_PER_SLAB)
-#define SUMMARY_BYTES_PER_SLAB (SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB)
-#define SLACK_SUMMARY_BYTES_PER_SLAB (SUMMARY_BYTES_PER_BLOCK * META_BLOCKS_PER_SLAB)
-#define HEADER_BYTES_PER_SLAB SLACK_SUMMARY_BYTES_PER_SLAB
-
-struct slab;
-
-struct slab_header {
-  union {
-    struct {
-      struct slab *next;
-      struct slab *prev;
-    };
-    uint8_t padding[HEADER_BYTES_PER_SLAB];
-  };
-};
-STATIC_ASSERT_EQ(sizeof(struct slab_header), HEADER_BYTES_PER_SLAB);
-
-// Sometimes we want to put a block on a singly-linked list.  For that
-// there's a pointer reserved in the block summary.  But because the
-// pointer is aligned (32kB on 32-bit, 64kB on 64-bit), we can portably
-// hide up to 15 flags in the low bits.  These flags can be accessed
-// non-atomically by the mutator when it owns a block; otherwise they
-// need to be accessed atomically.
-enum block_summary_flag {
-  BLOCK_OUT_FOR_THREAD = 0x1,
-  BLOCK_HAS_PIN = 0x2,
-  BLOCK_PAGED_OUT = 0x4,
-  BLOCK_NEEDS_SWEEP = 0x8,
-  BLOCK_UNAVAILABLE = 0x10,
-  BLOCK_EVACUATE = 0x20,
-  BLOCK_VENERABLE = 0x40,
-  BLOCK_VENERABLE_AFTER_SWEEP = 0x80,
-  BLOCK_FLAG_UNUSED_8 = 0x100,
-  BLOCK_FLAG_UNUSED_9 = 0x200,
-  BLOCK_FLAG_UNUSED_10 = 0x400,
-  BLOCK_FLAG_UNUSED_11 = 0x800,
-  BLOCK_FLAG_UNUSED_12 = 0x1000,
-  BLOCK_FLAG_UNUSED_13 = 0x2000,
-  BLOCK_FLAG_UNUSED_14 = 0x4000,
-};
-
-struct block_summary {
-  union {
-    struct {
-      //struct block *next;
-      // Counters related to previous collection: how many holes there
-      // were, and how much space they had.
-      uint16_t hole_count;
-      uint16_t free_granules;
-      // Counters related to allocation since previous collection:
-      // wasted space due to fragmentation.
-      uint16_t holes_with_fragmentation;
-      uint16_t fragmentation_granules;
-      // After a block is swept, if it's empty it goes on the empties
-      // list.  Otherwise if it's not immediately used by a mutator (as
-      // is usually the case), it goes on the swept list.  Both of these
-      // lists use this field.  But as the next element in the field is
-      // block-aligned, we stash flags in the low bits.
-      uintptr_t next_and_flags;
-    };
-    uint8_t padding[SUMMARY_BYTES_PER_BLOCK];
-  };
-};
-STATIC_ASSERT_EQ(sizeof(struct block_summary), SUMMARY_BYTES_PER_BLOCK);
-
-struct block {
-  char data[BLOCK_SIZE];
-};
-
-struct slab {
-  struct slab_header header;
-  struct block_summary summaries[NONMETA_BLOCKS_PER_SLAB];
-  uint8_t remembered_set[REMSET_BYTES_PER_SLAB];
-  uint8_t metadata[METADATA_BYTES_PER_SLAB];
-  struct block blocks[NONMETA_BLOCKS_PER_SLAB];
-};
-STATIC_ASSERT_EQ(sizeof(struct slab), SLAB_SIZE);
-
-static struct slab *object_slab(void *obj) {
-  uintptr_t addr = (uintptr_t) obj;
-  uintptr_t base = align_down(addr, SLAB_SIZE);
-  return (struct slab*) base;
-}
-
-static uint8_t *metadata_byte_for_addr(uintptr_t addr) {
-  uintptr_t base = align_down(addr, SLAB_SIZE);
-  uintptr_t granule = (addr & (SLAB_SIZE - 1)) >> GRANULE_SIZE_LOG_2;
-  return (uint8_t*) (base + granule);
-}
-
-static uint8_t *metadata_byte_for_object(struct gc_ref ref) {
-  return metadata_byte_for_addr(gc_ref_value(ref));
-}
-
-#define GRANULES_PER_BLOCK (BLOCK_SIZE / GRANULE_SIZE)
-#define GRANULES_PER_REMSET_BYTE (GRANULES_PER_BLOCK / REMSET_BYTES_PER_BLOCK)
-
-static struct block_summary* block_summary_for_addr(uintptr_t addr) {
-  uintptr_t base = align_down(addr, SLAB_SIZE);
-  uintptr_t block = (addr & (SLAB_SIZE - 1)) / BLOCK_SIZE;
-  return (struct block_summary*) (base + block * sizeof(struct block_summary));
-}
-
-static uintptr_t block_summary_has_flag(struct block_summary *summary,
-                                        enum block_summary_flag flag) {
-  return summary->next_and_flags & flag;
-}
-static void block_summary_set_flag(struct block_summary *summary,
-                                   enum block_summary_flag flag) {
-  summary->next_and_flags |= flag;
-}
-static void block_summary_clear_flag(struct block_summary *summary,
-                                     enum block_summary_flag flag) {
-  summary->next_and_flags &= ~(uintptr_t)flag;
-}
-static uintptr_t block_summary_next(struct block_summary *summary) {
-  return align_down(summary->next_and_flags, BLOCK_SIZE);
-}
-static void block_summary_set_next(struct block_summary *summary,
-                                   uintptr_t next) {
-  GC_ASSERT((next & (BLOCK_SIZE - 1)) == 0);
-  summary->next_and_flags =
-    (summary->next_and_flags & (BLOCK_SIZE - 1)) | next;
-}
-
-// Lock-free block list.
-struct block_list {
-  size_t count;
-  uintptr_t blocks;
-};
-
-static void push_block(struct block_list *list, uintptr_t block) {
-  atomic_fetch_add_explicit(&list->count, 1, memory_order_acq_rel);
-  struct block_summary *summary = block_summary_for_addr(block);
-  uintptr_t next = atomic_load_explicit(&list->blocks, memory_order_acquire);
-  do {
-    block_summary_set_next(summary, next);
-  } while (!atomic_compare_exchange_weak(&list->blocks, &next, block));
-}
-
-static uintptr_t pop_block(struct block_list *list) {
-  uintptr_t head = atomic_load_explicit(&list->blocks, memory_order_acquire);
-  struct block_summary *summary;
-  uintptr_t next;
-  do {
-    if (!head)
-      return 0;
-    summary = block_summary_for_addr(head);
-    next = block_summary_next(summary);
-  } while (!atomic_compare_exchange_weak(&list->blocks, &head, next));
-  block_summary_set_next(summary, 0);
-  atomic_fetch_sub_explicit(&list->count, 1, memory_order_acq_rel);
-  return head;
-}
-
-static inline size_t size_to_granules(size_t size) {
-  return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
-}
-
-struct evacuation_allocator {
-  size_t allocated; // atomically
-  size_t limit;
-  uintptr_t block_cursor; // atomically
-};
-
-struct mark_space {
-  uint64_t sweep_mask;
-  uint8_t live_mask;
-  uint8_t marked_mask;
-  uint8_t evacuating;
-  uintptr_t low_addr;
-  size_t extent;
-  size_t heap_size;
-  uintptr_t next_block;   // atomically
-  struct block_list empty;
-  struct block_list unavailable;
-  struct block_list evacuation_targets;
-  double evacuation_minimum_reserve;
-  double evacuation_reserve;
-  double venerable_threshold;
-  ssize_t pending_unavailable_bytes; // atomically
-  struct evacuation_allocator evacuation_allocator;
-  struct slab *slabs;
-  size_t nslabs;
-  uintptr_t granules_freed_by_last_collection; // atomically
-  uintptr_t fragmentation_granules_since_last_collection; // atomically
-};
 
 struct gc_heap {
-  struct mark_space mark_space;
+  struct nofl_space nofl_space;
   struct large_object_space large_object_space;
   struct gc_extern_space *extern_space;
   size_t large_object_pages;
@@ -336,10 +78,7 @@ struct gc_mutator_mark_buf {
 };
 
 struct gc_mutator {
-  // Bump-pointer allocation into holes.
-  uintptr_t alloc;
-  uintptr_t sweep;
-  uintptr_t block;
+  struct nofl_allocator allocator;
   struct gc_heap *heap;
   struct gc_stack stack;
   struct gc_mutator_roots *roots;
@@ -352,319 +91,32 @@ struct gc_mutator {
   struct gc_mutator *next;
 };
 
-static inline struct mark_space* heap_mark_space(struct gc_heap *heap) {
-  return &heap->mark_space;
+static inline struct nofl_space*
+heap_nofl_space(struct gc_heap *heap) {
+  return &heap->nofl_space;
 }
-static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) {
+static inline struct large_object_space*
+heap_large_object_space(struct gc_heap *heap) {
   return &heap->large_object_space;
 }
-static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) {
+static inline struct gc_extern_space*
+heap_extern_space(struct gc_heap *heap) {
   return heap->extern_space;
 }
-static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) {
+static inline struct gc_heap*
+mutator_heap(struct gc_mutator *mutator) {
   return mutator->heap;
 }
 
-static inline void clear_memory(uintptr_t addr, size_t size) {
-  memset((char*)addr, 0, size);
-}
-
 static void collect(struct gc_mutator *mut,
                     enum gc_collection_kind requested_kind) GC_NEVER_INLINE;
 
-static inline uint64_t load_eight_aligned_bytes(uint8_t *mark) {
-  GC_ASSERT(((uintptr_t)mark & 7) == 0);
-  uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
-  uint64_t word;
-  memcpy(&word, aligned_mark, 8);
-#ifdef WORDS_BIGENDIAN
-  word = __builtin_bswap64(word);
-#endif
-  return word;
-}
-
-static inline size_t count_zero_bytes(uint64_t bytes) {
-  return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes);
-}
-
-static uint64_t broadcast_byte(uint8_t byte) {
-  uint64_t result = byte;
-  return result * 0x0101010101010101ULL;
-}
-
-static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
-  size_t n = 0;
-  // If we have a hole, it is likely to be more that 8 granules long.
-  // Assuming that it's better to make aligned loads, first we align the
-  // sweep pointer, then we load aligned mark words.
-  size_t unaligned = ((uintptr_t) mark) & 7;
-  if (unaligned) {
-    uint64_t bytes = load_eight_aligned_bytes(mark - unaligned) >> (unaligned * 8);
-    bytes &= sweep_mask;
-    if (bytes)
-      return count_zero_bytes(bytes);
-    n += 8 - unaligned;
-  }
-
-  for(; n < limit; n += 8) {
-    uint64_t bytes = load_eight_aligned_bytes(mark + n);
-    bytes &= sweep_mask;
-    if (bytes)
-      return n + count_zero_bytes(bytes);
-  }
-
-  return limit;
-}
-
-static size_t mark_space_live_object_granules(uint8_t *metadata) {
-  return next_mark(metadata, -1, broadcast_byte(METADATA_BYTE_END)) + 1;
-}
-
-static inline int mark_space_mark_object(struct mark_space *space,
-                                         struct gc_ref ref) {
-  uint8_t *loc = metadata_byte_for_object(ref);
-  uint8_t byte = *loc;
-  if (byte & space->marked_mask)
-    return 0;
-  uint8_t mask = METADATA_BYTE_YOUNG | METADATA_BYTE_MARK_0
-    | METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
-  *loc = (byte & ~mask) | space->marked_mask;
-  return 1;
-}
-
-static uintptr_t make_evacuation_allocator_cursor(uintptr_t block,
-                                                  size_t allocated) {
-  GC_ASSERT(allocated < (BLOCK_SIZE - 1) * (uint64_t) BLOCK_SIZE);
-  return align_down(block, BLOCK_SIZE) | (allocated / BLOCK_SIZE);
-}
-
-static void prepare_evacuation_allocator(struct evacuation_allocator *alloc,
-                                         struct block_list *targets) {
-  uintptr_t first_block = targets->blocks;
-  atomic_store_explicit(&alloc->allocated, 0, memory_order_release);
-  alloc->limit =
-    atomic_load_explicit(&targets->count, memory_order_acquire) * BLOCK_SIZE;
-  atomic_store_explicit(&alloc->block_cursor,
-                        make_evacuation_allocator_cursor(first_block, 0),
-                        memory_order_release);
-}
-
-static void clear_remaining_metadata_bytes_in_block(uintptr_t block,
-                                                    uintptr_t allocated) {
-  GC_ASSERT((allocated & (GRANULE_SIZE - 1)) == 0);
-  uintptr_t base = block + allocated;
-  uintptr_t limit = block + BLOCK_SIZE;
-  uintptr_t granules = (limit - base) >> GRANULE_SIZE_LOG_2;
-  GC_ASSERT(granules <= GRANULES_PER_BLOCK);
-  memset(metadata_byte_for_addr(base), 0, granules);
-}
-
-static void finish_evacuation_allocator_block(uintptr_t block,
-                                              uintptr_t allocated) {
-  GC_ASSERT(allocated <= BLOCK_SIZE);
-  struct block_summary *summary = block_summary_for_addr(block);
-  block_summary_set_flag(summary, BLOCK_NEEDS_SWEEP);
-  size_t fragmentation = (BLOCK_SIZE - allocated) >> GRANULE_SIZE_LOG_2;
-  summary->hole_count = 1;
-  summary->free_granules = GRANULES_PER_BLOCK;
-  summary->holes_with_fragmentation = fragmentation ? 1 : 0;
-  summary->fragmentation_granules = fragmentation;
-  if (fragmentation)
-    clear_remaining_metadata_bytes_in_block(block, allocated);
-}
-
-static void finish_evacuation_allocator(struct evacuation_allocator *alloc,
-                                        struct block_list *targets,
-                                        struct block_list *empties,
-                                        size_t reserve) {
-  // Blocks that we used for evacuation get returned to the mutator as
-  // sweepable blocks.  Blocks that we didn't get to use go to the
-  // empties.
-  size_t allocated = atomic_load_explicit(&alloc->allocated,
-                                          memory_order_acquire);
-  atomic_store_explicit(&alloc->allocated, 0, memory_order_release);
-  if (allocated > alloc->limit)
-    allocated = alloc->limit;
-  while (allocated >= BLOCK_SIZE) {
-    uintptr_t block = pop_block(targets);
-    GC_ASSERT(block);
-    allocated -= BLOCK_SIZE;
-  }
-  if (allocated) {
-    // Finish off the last partially-filled block.
-    uintptr_t block = pop_block(targets);
-    GC_ASSERT(block);
-    finish_evacuation_allocator_block(block, allocated);
-  }
-  size_t remaining = atomic_load_explicit(&targets->count, memory_order_acquire);
-  while (remaining-- > reserve)
-    push_block(empties, pop_block(targets));
-}
-
-static struct gc_ref evacuation_allocate(struct mark_space *space,
-                                         size_t granules) {
-  // All collector threads compete to allocate from what is logically a
-  // single bump-pointer arena, which is actually composed of a linked
-  // list of blocks.
-  struct evacuation_allocator *alloc = &space->evacuation_allocator;
-  uintptr_t cursor = atomic_load_explicit(&alloc->block_cursor,
-                                          memory_order_acquire);
-  size_t bytes = granules * GRANULE_SIZE;
-  size_t prev = atomic_load_explicit(&alloc->allocated, memory_order_acquire);
-  size_t block_mask = (BLOCK_SIZE - 1);
-  size_t next;
-  do {
-    if (prev >= alloc->limit)
-      // No more space.
-      return gc_ref_null();
-    next = prev + bytes;
-    if ((prev ^ next) & ~block_mask)
-      // Allocation straddles a block boundary; advance so it starts a
-      // fresh block.
-      next = (next & ~block_mask) + bytes;
-  } while (!atomic_compare_exchange_weak(&alloc->allocated, &prev, next));
-  // OK, we've claimed our memory, starting at next - bytes.  Now find
-  // the node in the linked list of evacuation targets that corresponds
-  // to this allocation pointer.
-  uintptr_t block = cursor & ~block_mask;
-  // This is the SEQ'th block to be allocated into.
-  uintptr_t seq = cursor & block_mask;
-  // Therefore this block handles allocations starting at SEQ*BLOCK_SIZE
-  // and continuing for BLOCK_SIZE bytes.
-  uintptr_t base = seq * BLOCK_SIZE;
-
-  while ((base ^ next) & ~block_mask) {
-    GC_ASSERT(base < next);
-    if (base + BLOCK_SIZE > prev) {
-      // The allocation straddles a block boundary, and the cursor has
-      // caught up so that we identify the block for the previous
-      // allocation pointer.  Finish the previous block, probably
-      // leaving a small hole at the end.
-      finish_evacuation_allocator_block(block, prev - base);
-    }
-    // Cursor lags; advance it.
-    block = block_summary_next(block_summary_for_addr(block));
-    base += BLOCK_SIZE;
-    if (base >= alloc->limit) {
-      // Ran out of blocks!
-      GC_ASSERT(!block);
-      return gc_ref_null();
-    }
-    GC_ASSERT(block);
-    // This store can race with other allocators, but that's OK as long
-    // as it never advances the cursor beyond the allocation pointer,
-    // which it won't because we updated the allocation pointer already.
-    atomic_store_explicit(&alloc->block_cursor,
-                          make_evacuation_allocator_cursor(block, base),
-                          memory_order_release);
-  }
-
-  uintptr_t addr = block + (next & block_mask) - bytes;
-  return gc_ref(addr);
-}
-
-static inline int mark_space_evacuate_or_mark_object(struct mark_space *space,
-                                                     struct gc_edge edge,
-                                                     struct gc_ref old_ref) {
-  uint8_t *metadata = metadata_byte_for_object(old_ref);
-  uint8_t byte = *metadata;
-  if (byte & space->marked_mask)
-    return 0;
-  if (space->evacuating &&
-      block_summary_has_flag(block_summary_for_addr(gc_ref_value(old_ref)),
-                             BLOCK_EVACUATE)) {
-    // This is an evacuating collection, and we are attempting to
-    // evacuate this block, and we are tracing this particular object
-    // for what appears to be the first time.
-    struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref);
-
-    if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED)
-      gc_atomic_forward_acquire(&fwd);
-
-    switch (fwd.state) {
-    case GC_FORWARDING_STATE_NOT_FORWARDED:
-    case GC_FORWARDING_STATE_ABORTED:
-      // Impossible.
-      GC_CRASH();
-    case GC_FORWARDING_STATE_ACQUIRED: {
-      // We claimed the object successfully; evacuating is up to us.
-      size_t object_granules = mark_space_live_object_granules(metadata);
-      struct gc_ref new_ref = evacuation_allocate(space, object_granules);
-      if (gc_ref_is_heap_object(new_ref)) {
-        // Copy object contents before committing, as we don't know what
-        // part of the object (if any) will be overwritten by the
-        // commit.
-        memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref),
-               object_granules * GRANULE_SIZE);
-        gc_atomic_forward_commit(&fwd, new_ref);
-        // Now update extent metadata, and indicate to the caller that
-        // the object's fields need to be traced.
-        uint8_t *new_metadata = metadata_byte_for_object(new_ref);
-        memcpy(new_metadata + 1, metadata + 1, object_granules - 1);
-        gc_edge_update(edge, new_ref);
-        metadata = new_metadata;
-        // Fall through to set mark bits.
-      } else {
-        // Well shucks; allocation failed, marking the end of
-        // opportunistic evacuation.  No future evacuation of this
-        // object will succeed.  Mark in place instead.
-        gc_atomic_forward_abort(&fwd);
-      }
-      break;
-    }
-    case GC_FORWARDING_STATE_BUSY:
-      // Someone else claimed this object first.  Spin until new address
-      // known, or evacuation aborts.
-      for (size_t spin_count = 0;; spin_count++) {
-        if (gc_atomic_forward_retry_busy(&fwd))
-          break;
-        yield_for_spin(spin_count);
-      }
-      if (fwd.state == GC_FORWARDING_STATE_ABORTED)
-        // Remove evacuation aborted; remote will mark and enqueue.
-        return 0;
-      ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED);
-      // Fall through.
-    case GC_FORWARDING_STATE_FORWARDED:
-      // The object has been evacuated already.  Update the edge;
-      // whoever forwarded the object will make sure it's eventually
-      // traced.
-      gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
-      return 0;
-    }
-  }
-
-  uint8_t mask = METADATA_BYTE_YOUNG | METADATA_BYTE_MARK_0
-    | METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
-  *metadata = (byte & ~mask) | space->marked_mask;
-  return 1;
-}
-
-static inline int mark_space_contains_address(struct mark_space *space,
-                                              uintptr_t addr) {
-  return addr - space->low_addr < space->extent;
-}
-
-static inline int mark_space_contains_conservative_ref(struct mark_space *space,
-                                                       struct gc_conservative_ref ref) {
-  return mark_space_contains_address(space, gc_conservative_ref_value(ref));
-}
-
-static inline int mark_space_contains(struct mark_space *space,
-                                      struct gc_ref ref) {
-  return mark_space_contains_address(space, gc_ref_value(ref));
-}
-
-static inline int do_trace(struct gc_heap *heap, struct gc_edge edge,
-                           struct gc_ref ref) {
+static inline int
+do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref) {
   if (!gc_ref_is_heap_object(ref))
     return 0;
-  if (GC_LIKELY(mark_space_contains(heap_mark_space(heap), ref))) {
-    if (heap_mark_space(heap)->evacuating)
-      return mark_space_evacuate_or_mark_object(heap_mark_space(heap), edge,
-                                                ref);
-    return mark_space_mark_object(heap_mark_space(heap), ref);
-  }
+  if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref)))
+    return nofl_space_evacuate_or_mark_object(heap_nofl_space(heap), edge, ref);
   else if (large_object_space_contains(heap_large_object_space(heap), ref))
     return large_object_space_mark_object(heap_large_object_space(heap),
                                           ref);
@@ -675,7 +127,8 @@ static inline int do_trace(struct gc_heap *heap, struct gc_edge edge,
 static inline int trace_edge(struct gc_heap *heap,
                              struct gc_edge edge) GC_ALWAYS_INLINE;
 
-static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge) {
+static inline int
+trace_edge(struct gc_heap *heap, struct gc_edge edge) {
   struct gc_ref ref = gc_edge_ref(edge);
   int is_new = do_trace(heap, edge, ref);
 
@@ -687,135 +140,41 @@ static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge) {
   return is_new;
 }
 
-int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
+int
+gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
   struct gc_ref ref = gc_edge_ref(edge);
   if (!gc_ref_is_heap_object(ref))
     return 0;
-  if (GC_LIKELY(mark_space_contains(heap_mark_space(heap), ref))) {
-    struct mark_space *space = heap_mark_space(heap);
-    uint8_t *metadata = metadata_byte_for_object(ref);
-    uint8_t byte = *metadata;
-    if (byte & space->marked_mask)
-      return 1;
 
-    if (!space->evacuating)
-      return 0;
-    if (!block_summary_has_flag(block_summary_for_addr(gc_ref_value(ref)),
-                                BLOCK_EVACUATE))
-      return 0;
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
+  if (GC_LIKELY(nofl_space_contains(nofl_space, ref)))
+    return nofl_space_forward_or_mark_if_traced(nofl_space, edge, ref);
+
+  struct large_object_space *lospace = heap_large_object_space(heap);
+  if (large_object_space_contains(lospace, ref))
+    return large_object_space_is_copied(lospace, ref);
 
-    struct gc_atomic_forward fwd = gc_atomic_forward_begin(ref);
-    switch (fwd.state) {
-    case GC_FORWARDING_STATE_NOT_FORWARDED:
-      return 0;
-    case GC_FORWARDING_STATE_BUSY:
-      // Someone else claimed this object first.  Spin until new address
-      // known, or evacuation aborts.
-      for (size_t spin_count = 0;; spin_count++) {
-        if (gc_atomic_forward_retry_busy(&fwd))
-          break;
-        yield_for_spin(spin_count);
-      }
-      if (fwd.state == GC_FORWARDING_STATE_ABORTED)
-        // Remote evacuation aborted; remote will mark and enqueue.
-        return 1;
-      ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED);
-      // Fall through.
-    case GC_FORWARDING_STATE_FORWARDED:
-      gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
-      return 1;
-    default:
-      GC_CRASH();
-    }
-  } else if (large_object_space_contains(heap_large_object_space(heap), ref)) {
-    return large_object_space_is_copied(heap_large_object_space(heap), ref);
-  }
   GC_CRASH();
 }
 
-static inline struct gc_ref mark_space_mark_conservative_ref(struct mark_space *space,
-                                                             struct gc_conservative_ref ref,
-                                                             int possibly_interior) {
-  uintptr_t addr = gc_conservative_ref_value(ref);
-
-  if (possibly_interior) {
-    addr = align_down(addr, GRANULE_SIZE);
-  } else {
-    // Addr not an aligned granule?  Not an object.
-    uintptr_t displacement = addr & (GRANULE_SIZE - 1);
-    if (!gc_is_valid_conservative_ref_displacement(displacement))
-      return gc_ref_null();
-    addr -= displacement;
-  }
-
-  // Addr in meta block?  Not an object.
-  if ((addr & (SLAB_SIZE - 1)) < META_BLOCKS_PER_SLAB * BLOCK_SIZE)
-    return gc_ref_null();
-
-  // Addr in block that has been paged out?  Not an object.
-  struct block_summary *summary = block_summary_for_addr(addr);
-  if (block_summary_has_flag(summary, BLOCK_UNAVAILABLE))
-    return gc_ref_null();
-
-  uint8_t *loc = metadata_byte_for_addr(addr);
-  uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed);
-
-  // Already marked object?  Nothing to do.
-  if (byte & space->marked_mask)
-    return gc_ref_null();
-
-  // Addr is the not start of an unmarked object?  Search backwards if
-  // we have interior pointers, otherwise not an object.
-  uint8_t object_start_mask = space->live_mask | METADATA_BYTE_YOUNG;
-  if (!(byte & object_start_mask)) {
-    if (!possibly_interior)
-      return gc_ref_null();
-
-    uintptr_t block_base = align_down(addr, BLOCK_SIZE);
-    uint8_t *loc_base = metadata_byte_for_addr(block_base);
-    do {
-      // Searched past block?  Not an object.
-      if (loc-- == loc_base)
-        return gc_ref_null();
-
-      byte = atomic_load_explicit(loc, memory_order_relaxed);
-
-      // Ran into the end of some other allocation?  Not an object, then.
-      if (byte & METADATA_BYTE_END)
-        return gc_ref_null();
-
-      // Continue until we find object start.
-    } while (!(byte & object_start_mask));
-
-    // Found object start, and object is unmarked; adjust addr.
-    addr = block_base + (loc - loc_base) * GRANULE_SIZE;
-  }
-
-  uint8_t mask = METADATA_BYTE_YOUNG | METADATA_BYTE_MARK_0
-    | METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
-  atomic_store_explicit(loc, (byte & ~mask) | space->marked_mask,
-                        memory_order_relaxed);
-
-  return gc_ref(addr);
-}
-
-static inline struct gc_ref do_trace_conservative_ref(struct gc_heap *heap,
-                                                      struct gc_conservative_ref ref,
-                                                      int possibly_interior) {
+static inline struct gc_ref
+do_trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref,
+                          int possibly_interior) {
   if (!gc_conservative_ref_might_be_a_heap_object(ref, possibly_interior))
     return gc_ref_null();
 
-  if (GC_LIKELY(mark_space_contains_conservative_ref(heap_mark_space(heap), ref)))
-    return mark_space_mark_conservative_ref(heap_mark_space(heap), ref,
-                                            possibly_interior);
-  else
-    return large_object_space_mark_conservative_ref(heap_large_object_space(heap),
-                                                    ref, possibly_interior);
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
+  if (GC_LIKELY(nofl_space_contains_conservative_ref(nofl_space, ref)))
+    return nofl_space_mark_conservative_ref(nofl_space, ref, possibly_interior);
+
+  struct large_object_space *lospace = heap_large_object_space(heap);
+  return large_object_space_mark_conservative_ref(lospace, ref,
+                                                  possibly_interior);
 }
 
-static inline struct gc_ref trace_conservative_ref(struct gc_heap *heap,
-                                                   struct gc_conservative_ref ref,
-                                                   int possibly_interior) {
+static inline struct gc_ref
+trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref,
+                       int possibly_interior) {
   struct gc_ref ret = do_trace_conservative_ref(heap, ref, possibly_interior);
 
   if (gc_ref_is_heap_object(ret) &&
@@ -826,35 +185,29 @@ static inline struct gc_ref trace_conservative_ref(struct gc_heap *heap,
   return ret;
 }
 
-static inline size_t mark_space_object_size(struct mark_space *space,
-                                            struct gc_ref ref) {
-  uint8_t *loc = metadata_byte_for_object(ref);
-  size_t granules = mark_space_live_object_granules(loc);
-  return granules * GRANULE_SIZE;
-}
-
-static int heap_has_multiple_mutators(struct gc_heap *heap) {
-  return atomic_load_explicit(&heap->multithreaded, memory_order_relaxed);
-}
-
-static int mutators_are_stopping(struct gc_heap *heap) {
+static int
+mutators_are_stopping(struct gc_heap *heap) {
   return atomic_load_explicit(&heap->collecting, memory_order_relaxed);
 }
 
-static inline void heap_lock(struct gc_heap *heap) {
+static inline void
+heap_lock(struct gc_heap *heap) {
   pthread_mutex_lock(&heap->lock);
 }
-static inline void heap_unlock(struct gc_heap *heap) {
+static inline void
+heap_unlock(struct gc_heap *heap) {
   pthread_mutex_unlock(&heap->lock);
 }
 
 // with heap lock
-static inline int all_mutators_stopped(struct gc_heap *heap) {
+static inline int
+all_mutators_stopped(struct gc_heap *heap) {
   return heap->mutator_count ==
     heap->paused_mutator_count + heap->inactive_mutator_count;
 }
 
-static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
+static void
+add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   mut->heap = heap;
   mut->event_listener_data =
     heap->event_listener.mutator_added(heap->event_listener_data);
@@ -869,7 +222,8 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   heap_unlock(heap);
 }
 
-static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
+static void
+remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   MUTATOR_EVENT(mut, mutator_removed);
   mut->heap = NULL;
   heap_lock(heap);
@@ -881,12 +235,14 @@ static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
   heap_unlock(heap);
 }
 
-static void request_mutators_to_stop(struct gc_heap *heap) {
+static void
+request_mutators_to_stop(struct gc_heap *heap) {
   GC_ASSERT(!mutators_are_stopping(heap));
   atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed);
 }
 
-static void allow_mutators_to_continue(struct gc_heap *heap) {
+static void
+allow_mutators_to_continue(struct gc_heap *heap) {
   GC_ASSERT(mutators_are_stopping(heap));
   GC_ASSERT(all_mutators_stopped(heap));
   heap->paused_mutator_count = 0;
@@ -895,129 +251,18 @@ static void allow_mutators_to_continue(struct gc_heap *heap) {
   pthread_cond_broadcast(&heap->mutator_cond);
 }
 
-static void push_unavailable_block(struct mark_space *space, uintptr_t block) {
-  struct block_summary *summary = block_summary_for_addr(block);
-  GC_ASSERT(!block_summary_has_flag(summary, BLOCK_NEEDS_SWEEP));
-  GC_ASSERT(!block_summary_has_flag(summary, BLOCK_UNAVAILABLE));
-  block_summary_set_flag(summary, BLOCK_UNAVAILABLE);
-  madvise((void*)block, BLOCK_SIZE, MADV_DONTNEED);
-  push_block(&space->unavailable, block);
-}
-
-static uintptr_t pop_unavailable_block(struct mark_space *space) {
-  uintptr_t block = pop_block(&space->unavailable);
-  if (!block)
-    return 0;
-  struct block_summary *summary = block_summary_for_addr(block);
-  GC_ASSERT(block_summary_has_flag(summary, BLOCK_UNAVAILABLE));
-  block_summary_clear_flag(summary, BLOCK_UNAVAILABLE);
-  return block;
-}
-
-static uintptr_t pop_empty_block(struct mark_space *space) {
-  return pop_block(&space->empty);
-}
-
-static int maybe_push_evacuation_target(struct mark_space *space,
-                                        uintptr_t block, double reserve) {
-  GC_ASSERT(!block_summary_has_flag(block_summary_for_addr(block),
-                                    BLOCK_NEEDS_SWEEP));
-  size_t targets = atomic_load_explicit(&space->evacuation_targets.count,
-                                        memory_order_acquire);
-  size_t total = space->nslabs * NONMETA_BLOCKS_PER_SLAB;
-  size_t unavailable = atomic_load_explicit(&space->unavailable.count,
-                                            memory_order_acquire);
-  if (targets >= (total - unavailable) * reserve)
-    return 0;
-
-  push_block(&space->evacuation_targets, block);
-  return 1;
-}
-
-static int push_evacuation_target_if_needed(struct mark_space *space,
-                                            uintptr_t block) {
-  return maybe_push_evacuation_target(space, block,
-                                      space->evacuation_minimum_reserve);
-}
-
-static int push_evacuation_target_if_possible(struct mark_space *space,
-                                              uintptr_t block) {
-  return maybe_push_evacuation_target(space, block,
-                                      space->evacuation_reserve);
-}
-
-static void push_empty_block(struct mark_space *space, uintptr_t block) {
-  GC_ASSERT(!block_summary_has_flag(block_summary_for_addr(block),
-                                    BLOCK_NEEDS_SWEEP));
-  push_block(&space->empty, block);
-}
-
-static ssize_t mark_space_request_release_memory(struct mark_space *space,
-                                                 size_t bytes) {
-  return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes;
-}
-
-static void mark_space_reacquire_memory(struct mark_space *space,
-                                        size_t bytes) {
-  ssize_t pending =
-    atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes;
-  while (pending + BLOCK_SIZE <= 0) {
-    uintptr_t block = pop_unavailable_block(space);
-    GC_ASSERT(block);
-    if (push_evacuation_target_if_needed(space, block))
-      continue;
-    push_empty_block(space, block);
-    pending = atomic_fetch_add(&space->pending_unavailable_bytes, BLOCK_SIZE)
-      + BLOCK_SIZE;
-  }
-}
-
-static size_t next_hole(struct gc_mutator *mut);
-
-static int sweep_until_memory_released(struct gc_mutator *mut) {
-  struct mark_space *space = heap_mark_space(mutator_heap(mut));
-  ssize_t pending = atomic_load_explicit(&space->pending_unavailable_bytes,
-                                         memory_order_acquire);
-  // First try to unmap previously-identified empty blocks.  If pending
-  // > 0 and other mutators happen to identify empty blocks, they will
-  // be unmapped directly and moved to the unavailable list.
-  while (pending > 0) {
-    uintptr_t block = pop_empty_block(space);
-    if (!block)
-      break;
-    // Note that we may have competing uses; if we're evacuating,
-    // perhaps we should push this block to the evacuation target list.
-    // That would enable us to reach a fragmentation low water-mark in
-    // fewer cycles.  But maybe evacuation started in order to obtain
-    // free blocks for large objects; in that case we should just reap
-    // the fruits of our labor.  Probably this second use-case is more
-    // important.
-    push_unavailable_block(space, block);
-    pending = atomic_fetch_sub(&space->pending_unavailable_bytes, BLOCK_SIZE);
-    pending -= BLOCK_SIZE;
-  }
-  // Otherwise, sweep, transitioning any empty blocks to unavailable and
-  // throwing away any non-empty block.  A bit wasteful but hastening
-  // the next collection is a reasonable thing to do here.
-  while (pending > 0) {
-    if (!next_hole(mut))
-      return 0;
-    pending = atomic_load_explicit(&space->pending_unavailable_bytes,
-                                   memory_order_acquire);
-  }
-  return pending <= 0;
-}
-
-static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) {
+static void
+heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) {
   size_t previous = heap->large_object_pages;
   heap->large_object_pages = npages;
   GC_ASSERT(npages <= previous);
   size_t bytes = (previous - npages) <<
     heap_large_object_space(heap)->page_size_log2;
-  mark_space_reacquire_memory(heap_mark_space(heap), bytes);
+  nofl_space_reacquire_memory(heap_nofl_space(heap), bytes);
 }
 
-static void mutator_mark_buf_grow(struct gc_mutator_mark_buf *buf) {
+static void
+mutator_mark_buf_grow(struct gc_mutator_mark_buf *buf) {
   size_t old_capacity = buf->capacity;
   size_t old_bytes = old_capacity * sizeof(struct gc_ref);
 
@@ -1038,27 +283,30 @@ static void mutator_mark_buf_grow(struct gc_mutator_mark_buf *buf) {
   buf->capacity = new_capacity;
 }
 
-static void mutator_mark_buf_push(struct gc_mutator_mark_buf *buf,
-                                  struct gc_ref ref) {
+static void
+mutator_mark_buf_push(struct gc_mutator_mark_buf *buf, struct gc_ref ref) {
   if (GC_UNLIKELY(buf->size == buf->capacity))
     mutator_mark_buf_grow(buf);
   buf->objects[buf->size++] = ref;
 }
 
-static void mutator_mark_buf_release(struct gc_mutator_mark_buf *buf) {
+static void
+mutator_mark_buf_release(struct gc_mutator_mark_buf *buf) {
   size_t bytes = buf->size * sizeof(struct gc_ref);
   if (bytes >= getpagesize())
     madvise(buf->objects, align_up(bytes, getpagesize()), MADV_DONTNEED);
   buf->size = 0;
 }
 
-static void mutator_mark_buf_destroy(struct gc_mutator_mark_buf *buf) {
+static void
+mutator_mark_buf_destroy(struct gc_mutator_mark_buf *buf) {
   size_t bytes = buf->capacity * sizeof(struct gc_ref);
   if (bytes)
     munmap(buf->objects, bytes);
 }
 
-static void enqueue_mutator_for_tracing(struct gc_mutator *mut) {
+static void
+enqueue_mutator_for_tracing(struct gc_mutator *mut) {
   struct gc_heap *heap = mutator_heap(mut);
   GC_ASSERT(mut->next == NULL);
   struct gc_mutator *next =
@@ -1069,23 +317,26 @@ static void enqueue_mutator_for_tracing(struct gc_mutator *mut) {
                                          &next, mut));
 }
 
-static int heap_should_mark_while_stopping(struct gc_heap *heap) {
+static int
+heap_should_mark_while_stopping(struct gc_heap *heap) {
   return atomic_load_explicit(&heap->mark_while_stopping, memory_order_acquire);
 }
 
-static int mutator_should_mark_while_stopping(struct gc_mutator *mut) {
+static int
+mutator_should_mark_while_stopping(struct gc_mutator *mut) {
   return heap_should_mark_while_stopping(mutator_heap(mut));
 }
 
-void gc_mutator_set_roots(struct gc_mutator *mut,
-                          struct gc_mutator_roots *roots) {
+void
+gc_mutator_set_roots(struct gc_mutator *mut, struct gc_mutator_roots *roots) {
   mut->roots = roots;
 }
-void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
+void
+gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
   heap->roots = roots;
 }
-void gc_heap_set_extern_space(struct gc_heap *heap,
-                              struct gc_extern_space *space) {
+void
+gc_heap_set_extern_space(struct gc_heap *heap, struct gc_extern_space *space) {
   heap->extern_space = space;
 }
 
@@ -1109,59 +360,67 @@ tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) {
     gc_trace_worker_enqueue(worker, gc_edge_ref(edge));
 }
 
-static void trace_and_enqueue_locally(struct gc_edge edge,
-                                      struct gc_heap *heap,
-                                      void *data) {
+static void
+trace_and_enqueue_locally(struct gc_edge edge, struct gc_heap *heap,
+                          void *data) {
   struct gc_mutator *mut = data;
   if (trace_edge(heap, edge))
     mutator_mark_buf_push(&mut->mark_buf, gc_edge_ref(edge));
 }
 
-static inline void do_trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref,
-                                                                 struct gc_heap *heap,
-                                                                 void *data,
-                                                                 int possibly_interior) {
+static inline void
+do_trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref,
+                                              struct gc_heap *heap,
+                                              void *data,
+                                              int possibly_interior) {
   struct gc_mutator *mut = data;
   struct gc_ref object = trace_conservative_ref(heap, ref, possibly_interior);
   if (gc_ref_is_heap_object(object))
     mutator_mark_buf_push(&mut->mark_buf, object);
 }
 
-static void trace_possibly_interior_conservative_ref_and_enqueue_locally
-    (struct gc_conservative_ref ref, struct gc_heap *heap, void *data) {
+static void
+trace_possibly_interior_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref,
+                                                             struct gc_heap *heap,
+                                                             void *data) {
   return do_trace_conservative_ref_and_enqueue_locally(ref, heap, data, 1);
 }
 
-static void trace_conservative_ref_and_enqueue_locally
-    (struct gc_conservative_ref ref, struct gc_heap *heap, void *data) {
+static void
+trace_conservative_ref_and_enqueue_locally(struct gc_conservative_ref ref,
+                                           struct gc_heap *heap,
+                                           void *data) {
   return do_trace_conservative_ref_and_enqueue_locally(ref, heap, data, 0);
 }
 
-static void trace_and_enqueue_globally(struct gc_edge edge,
-                                       struct gc_heap *heap,
-                                       void *unused) {
+static void
+trace_and_enqueue_globally(struct gc_edge edge, struct gc_heap *heap,
+                           void *unused) {
   if (trace_edge(heap, edge))
     gc_tracer_enqueue_root(&heap->tracer, gc_edge_ref(edge));
 }
 
-static inline void do_trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
-                                                                  struct gc_heap *heap,
-                                                                  void *data,
-                                                                  int possibly_interior) {
+static inline void
+do_trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
+                                               struct gc_heap *heap,
+                                               void *data,
+                                               int possibly_interior) {
   struct gc_ref object = trace_conservative_ref(heap, ref, possibly_interior);
   if (gc_ref_is_heap_object(object))
     gc_tracer_enqueue_root(&heap->tracer, object);
 }
 
-static void trace_possibly_interior_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
-                                                                          struct gc_heap *heap,
-                                                                          void *data) {
+static void
+trace_possibly_interior_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
+                                                              struct gc_heap *heap,
+                                                              void *data) {
   return do_trace_conservative_ref_and_enqueue_globally(ref, heap, data, 1);
 }
 
-static void trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
-                                                        struct gc_heap *heap,
-                                                        void *data) {
+static void
+trace_conservative_ref_and_enqueue_globally(struct gc_conservative_ref ref,
+                                            struct gc_heap *heap,
+                                            void *data) {
   return do_trace_conservative_ref_and_enqueue_globally(ref, heap, data, 0);
 }
 
@@ -1186,9 +445,9 @@ trace_conservative_edges(uintptr_t low,
     trace(load_conservative_ref(addr), heap, data);
 }
 
-static inline void tracer_trace_conservative_ref(struct gc_conservative_ref ref,
-                                                 struct gc_heap *heap,
-                                                 void *data) {
+static inline void
+tracer_trace_conservative_ref(struct gc_conservative_ref ref,
+                              struct gc_heap *heap, void *data) {
   struct gc_trace_worker *worker = data;
   int possibly_interior = 0;
   struct gc_ref resolved = trace_conservative_ref(heap, ref, possibly_interior);
@@ -1196,22 +455,21 @@ static inline void tracer_trace_conservative_ref(struct gc_conservative_ref ref,
     gc_trace_worker_enqueue(worker, resolved);
 }
 
-static inline void trace_one_conservatively(struct gc_ref ref,
-                                            struct gc_heap *heap,
-                                            struct gc_trace_worker *worker) {
+static inline void
+trace_one_conservatively(struct gc_ref ref, struct gc_heap *heap,
+                         struct gc_trace_worker *worker) {
   size_t bytes;
-  if (GC_LIKELY(mark_space_contains(heap_mark_space(heap), ref))) {
+  if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref))) {
     // Generally speaking we trace conservatively and don't allow much
     // in the way of incremental precise marking on a
     // conservative-by-default heap.  But, we make an exception for
     // ephemerons.
-    uint8_t meta = *metadata_byte_for_addr(gc_ref_value(ref));
-    if (GC_UNLIKELY(meta & METADATA_BYTE_EPHEMERON)) {
+    if (GC_UNLIKELY(nofl_is_ephemeron(ref))) {
       gc_trace_ephemeron(gc_ref_heap_object(ref), tracer_visit, heap,
                          worker);
       return;
     }
-    bytes = mark_space_object_size(heap_mark_space(heap), ref);
+    bytes = nofl_space_object_size(heap_nofl_space(heap), ref);
   } else {
     bytes = large_object_space_object_size(heap_large_object_space(heap), ref);
   }
@@ -1221,17 +479,18 @@ static inline void trace_one_conservatively(struct gc_ref ref,
                            worker);
 }
 
-static inline void trace_one(struct gc_ref ref, struct gc_heap *heap,
-                             struct gc_trace_worker *worker) {
+static inline void
+trace_one(struct gc_ref ref, struct gc_heap *heap,
+          struct gc_trace_worker *worker) {
   if (gc_has_conservative_intraheap_edges())
     trace_one_conservatively(ref, heap, worker);
   else
     gc_trace_object(ref, tracer_visit, heap, worker, NULL);
 }
 
-static inline void trace_root(struct gc_root root,
-                              struct gc_heap *heap,
-                              struct gc_trace_worker *worker) {
+static inline void
+trace_root(struct gc_root root, struct gc_heap *heap,
+           struct gc_trace_worker *worker) {
   switch (root.kind) {
   case GC_ROOT_KIND_HEAP:
     gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker);
@@ -1251,8 +510,8 @@ static inline void trace_root(struct gc_root root,
   }
 }
 
-static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap,
-                            void *unused) {
+static void
+visit_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) {
   gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
 }
 
@@ -1304,7 +563,8 @@ trace_mutator_conservative_roots(struct gc_mutator *mut,
 
 // Mark the roots of a mutator that is stopping for GC.  We can't
 // enqueue them directly, so we send them to the controller in a buffer.
-static void trace_stopping_mutator_roots(struct gc_mutator *mut) {
+static void
+trace_stopping_mutator_roots(struct gc_mutator *mut) {
   GC_ASSERT(mutator_should_mark_while_stopping(mut));
   struct gc_heap *heap = mutator_heap(mut);
   trace_mutator_conservative_roots(mut,
@@ -1313,20 +573,23 @@ static void trace_stopping_mutator_roots(struct gc_mutator *mut) {
   gc_trace_mutator_roots(mut->roots, trace_and_enqueue_locally, heap, mut);
 }
 
-static void trace_mutator_conservative_roots_with_lock(struct gc_mutator *mut) {
+static void
+trace_mutator_conservative_roots_with_lock(struct gc_mutator *mut) {
   trace_mutator_conservative_roots(mut,
                                    mark_and_globally_enqueue_mutator_conservative_roots,
                                    mutator_heap(mut),
                                    NULL);
 }
 
-static void trace_mutator_roots_with_lock(struct gc_mutator *mut) {
+static void
+trace_mutator_roots_with_lock(struct gc_mutator *mut) {
   trace_mutator_conservative_roots_with_lock(mut);
   gc_trace_mutator_roots(mut->roots, trace_and_enqueue_globally,
                          mutator_heap(mut), NULL);
 }
 
-static void trace_mutator_roots_with_lock_before_stop(struct gc_mutator *mut) {
+static void
+trace_mutator_roots_with_lock_before_stop(struct gc_mutator *mut) {
   gc_stack_capture_hot(&mut->stack);
   if (mutator_should_mark_while_stopping(mut))
     trace_mutator_roots_with_lock(mut);
@@ -1334,19 +597,18 @@ static void trace_mutator_roots_with_lock_before_stop(struct gc_mutator *mut) {
     enqueue_mutator_for_tracing(mut);
 }
 
-static void release_stopping_mutator_roots(struct gc_mutator *mut) {
+static void
+release_stopping_mutator_roots(struct gc_mutator *mut) {
   mutator_mark_buf_release(&mut->mark_buf);
 }
 
-static void wait_for_mutators_to_stop(struct gc_heap *heap) {
+static void
+wait_for_mutators_to_stop(struct gc_heap *heap) {
   heap->paused_mutator_count++;
   while (!all_mutators_stopped(heap))
     pthread_cond_wait(&heap->collector_cond, &heap->lock);
 }
 
-static void finish_sweeping(struct gc_mutator *mut);
-static void finish_sweeping_in_block(struct gc_mutator *mut);
-
 static void trace_mutator_conservative_roots_after_stop(struct gc_heap *heap) {
   int active_mutators_already_marked = heap_should_mark_while_stopping(heap);
   if (!active_mutators_already_marked)
@@ -1361,7 +623,8 @@ static void trace_mutator_conservative_roots_after_stop(struct gc_heap *heap) {
     trace_mutator_conservative_roots_with_lock(mut);
 }
 
-static void trace_mutator_roots_after_stop(struct gc_heap *heap) {
+static void
+trace_mutator_roots_after_stop(struct gc_heap *heap) {
   struct gc_mutator *mut = atomic_load(&heap->mutator_trace_list);
   int active_mutators_already_marked = heap_should_mark_while_stopping(heap);
   while (mut) {
@@ -1380,88 +643,42 @@ static void trace_mutator_roots_after_stop(struct gc_heap *heap) {
   atomic_store(&heap->mutator_trace_list, NULL);
 
   for (struct gc_mutator *mut = heap->inactive_mutators; mut; mut = mut->next) {
-    finish_sweeping_in_block(mut);
+    nofl_finish_sweeping_in_block(&mut->allocator, heap_nofl_space(heap));
     trace_mutator_roots_with_lock(mut);
   }
 }
 
-static void trace_global_conservative_roots(struct gc_heap *heap) {
+static void
+trace_global_conservative_roots(struct gc_heap *heap) {
   if (gc_has_global_conservative_roots())
     gc_platform_visit_global_conservative_roots
       (mark_and_globally_enqueue_heap_conservative_roots, heap, NULL);
 }
 
-static void enqueue_generational_root(struct gc_ref ref, struct gc_heap *heap) {
+static void
+enqueue_generational_root(struct gc_ref ref, struct gc_heap *heap) {
   gc_tracer_enqueue_root(&heap->tracer, ref);
 }
 
-// Note that it's quite possible (and even likely) that any given remset
-// byte doesn't hold any roots, if all stores were to nursery objects.
-STATIC_ASSERT_EQ(GRANULES_PER_REMSET_BYTE % 8, 0);
-static void mark_space_trace_card(struct mark_space *space,
-                                  struct gc_heap *heap, struct slab *slab,
-                                  size_t card) {
-  uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0];
-  size_t granule_base = card * GRANULES_PER_REMSET_BYTE;
-  for (size_t granule_in_remset = 0;
-       granule_in_remset < GRANULES_PER_REMSET_BYTE;
-       granule_in_remset += 8, granule_base += 8) {
-    uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + granule_base);
-    mark_bytes &= space->sweep_mask;
-    while (mark_bytes) {
-      size_t granule_offset = count_zero_bytes(mark_bytes);
-      mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8));
-      size_t granule = granule_base + granule_offset;
-      uintptr_t addr = first_addr_in_slab + granule * GRANULE_SIZE;
-      GC_ASSERT(metadata_byte_for_addr(addr) == &slab->metadata[granule]);
-      enqueue_generational_root(gc_ref(addr), heap);
-    }
-  }
-}
-
-static void mark_space_trace_remembered_set(struct mark_space *space,
-                                            struct gc_heap *heap) {
-  GC_ASSERT(!space->evacuating);
-  for (size_t s = 0; s < space->nslabs; s++) {
-    struct slab *slab = &space->slabs[s];
-    uint8_t *remset = slab->remembered_set;
-    for (size_t card_base = 0;
-         card_base < REMSET_BYTES_PER_SLAB;
-         card_base += 8) {
-      uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
-      if (!remset_bytes) continue;
-      memset(remset + card_base, 0, 8);
-      while (remset_bytes) {
-        size_t card_offset = count_zero_bytes(remset_bytes);
-        remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
-        mark_space_trace_card(space, heap, slab, card_base + card_offset);
-      }
-    }
-  }
-}
-
-static void mark_space_clear_remembered_set(struct mark_space *space) {
-  if (!GC_GENERATIONAL) return;
-  for (size_t slab = 0; slab < space->nslabs; slab++) {
-    memset(space->slabs[slab].remembered_set, 0, REMSET_BYTES_PER_SLAB);
-  }
-}
-
-void gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
-                             struct gc_edge edge, struct gc_ref new_val) {
+void
+gc_write_barrier_extern(struct gc_ref obj, size_t obj_size,
+                        struct gc_edge edge, struct gc_ref new_val) {
   GC_ASSERT(obj_size > gc_allocator_large_threshold());
   gc_object_set_remembered(obj);
 }
 
-static void trace_generational_roots(struct gc_heap *heap) {
+static void
+trace_generational_roots(struct gc_heap *heap) {
   // TODO: Add lospace nursery.
   if (atomic_load(&heap->gc_kind) == GC_COLLECTION_MINOR) {
-    mark_space_trace_remembered_set(heap_mark_space(heap), heap);
+    nofl_space_trace_remembered_set(heap_nofl_space(heap),
+                                    enqueue_generational_root,
+                                    heap);
     large_object_space_trace_remembered_set(heap_large_object_space(heap),
                                             enqueue_generational_root,
                                             heap);
   } else {
-    mark_space_clear_remembered_set(heap_mark_space(heap));
+    nofl_space_clear_remembered_set(heap_nofl_space(heap));
     large_object_space_clear_remembered_set(heap_large_object_space(heap));
   }
 }
@@ -1502,7 +719,7 @@ pause_mutator_for_collection_with_lock(struct gc_mutator *mut) {
   struct gc_heap *heap = mutator_heap(mut);
   GC_ASSERT(mutators_are_stopping(heap));
   MUTATOR_EVENT(mut, mutator_stopping);
-  finish_sweeping_in_block(mut);
+  nofl_finish_sweeping_in_block(&mut->allocator, heap_nofl_space(heap));
   gc_stack_capture_hot(&mut->stack);
   if (mutator_should_mark_while_stopping(mut))
     // No need to collect results in mark buf; we can enqueue roots directly.
@@ -1513,11 +730,12 @@ pause_mutator_for_collection_with_lock(struct gc_mutator *mut) {
 }
 
 static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) GC_NEVER_INLINE;
-static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
+static void
+pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
   struct gc_heap *heap = mutator_heap(mut);
   GC_ASSERT(mutators_are_stopping(heap));
   MUTATOR_EVENT(mut, mutator_stopping);
-  finish_sweeping(mut);
+  nofl_finish_sweeping(&mut->allocator, heap_nofl_space(heap));
   gc_stack_capture_hot(&mut->stack);
   if (mutator_should_mark_while_stopping(mut))
     trace_stopping_mutator_roots(mut);
@@ -1528,66 +746,45 @@ static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
   release_stopping_mutator_roots(mut);
 }
 
-static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
+static inline void
+maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
   while (mutators_are_stopping(mutator_heap(mut)))
     pause_mutator_for_collection_without_lock(mut);
 }
 
-static void reset_sweeper(struct mark_space *space) {
-  space->next_block = (uintptr_t) &space->slabs[0].blocks;
-}
-
-static void update_mark_patterns(struct mark_space *space,
-                                 int advance_mark_mask) {
-  uint8_t survivor_mask = space->marked_mask;
-  uint8_t next_marked_mask = rotate_dead_survivor_marked(survivor_mask);
-  if (advance_mark_mask)
-    space->marked_mask = next_marked_mask;
-  space->live_mask = survivor_mask | next_marked_mask;
-  space->sweep_mask = broadcast_byte(space->live_mask);
-}
-
-static void reset_statistics(struct mark_space *space) {
-  space->granules_freed_by_last_collection = 0;
-  space->fragmentation_granules_since_last_collection = 0;
-}
-
 static int maybe_grow_heap(struct gc_heap *heap) {
   return 0;
 }
 
-static double heap_last_gc_yield(struct gc_heap *heap) {
-  struct mark_space *mark_space = heap_mark_space(heap);
-  size_t mark_space_yield = mark_space->granules_freed_by_last_collection;
-  mark_space_yield <<= GRANULE_SIZE_LOG_2;
-  size_t evacuation_block_yield =
-    atomic_load_explicit(&mark_space->evacuation_targets.count,
-                         memory_order_acquire) * BLOCK_SIZE;
-  size_t minimum_evacuation_block_yield =
-    heap->size * mark_space->evacuation_minimum_reserve;
-  if (evacuation_block_yield < minimum_evacuation_block_yield)
-    evacuation_block_yield = 0;
-  else
-    evacuation_block_yield -= minimum_evacuation_block_yield;
+static double
+heap_last_gc_yield(struct gc_heap *heap) {
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
+  size_t nofl_yield = nofl_space_yield(nofl_space);
+  size_t evacuation_reserve = nofl_space_evacuation_reserve(nofl_space);
+  // FIXME: Size nofl evacuation reserve based on size of nofl space,
+  // not heap size.
+  size_t minimum_evacuation_reserve =
+    heap->size * nofl_space->evacuation_minimum_reserve;
+  if (evacuation_reserve > minimum_evacuation_reserve)
+    nofl_yield += evacuation_reserve - minimum_evacuation_reserve;
   struct large_object_space *lospace = heap_large_object_space(heap);
   size_t lospace_yield = lospace->pages_freed_by_last_collection;
   lospace_yield <<= lospace->page_size_log2;
 
-  double yield = mark_space_yield + lospace_yield + evacuation_block_yield;
+  double yield = nofl_yield + lospace_yield;
   return yield / heap->size;
 }
 
-static double heap_fragmentation(struct gc_heap *heap) {
-  struct mark_space *mark_space = heap_mark_space(heap);
-  size_t fragmentation_granules =
-    mark_space->fragmentation_granules_since_last_collection;
-  size_t heap_granules = heap->size >> GRANULE_SIZE_LOG_2;
-
-  return ((double)fragmentation_granules) / heap_granules;
+static double
+heap_fragmentation(struct gc_heap *heap) {
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
+  size_t fragmentation = nofl_space_fragmentation(nofl_space);
+  return ((double)fragmentation) / heap->size;
 }
 
-static void detect_out_of_memory(struct gc_heap *heap) {
-  struct mark_space *mark_space = heap_mark_space(heap);
+static void
+detect_out_of_memory(struct gc_heap *heap) {
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
   struct large_object_space *lospace = heap_large_object_space(heap);
 
   if (heap->count == 0)
@@ -1596,28 +793,28 @@ static void detect_out_of_memory(struct gc_heap *heap) {
   double last_yield = heap_last_gc_yield(heap);
   double fragmentation = heap_fragmentation(heap);
 
-  double yield_epsilon = BLOCK_SIZE * 1.0 / heap->size;
-  double fragmentation_epsilon = LARGE_OBJECT_THRESHOLD * 1.0 / BLOCK_SIZE;
+  double yield_epsilon = NOFL_BLOCK_SIZE * 1.0 / heap->size;
+  double fragmentation_epsilon = LARGE_OBJECT_THRESHOLD * 1.0 / NOFL_BLOCK_SIZE;
 
   if (last_yield - fragmentation > yield_epsilon)
     return;
 
   if (fragmentation > fragmentation_epsilon
-      && atomic_load(&mark_space->evacuation_targets.count))
+      && atomic_load(&nofl_space->evacuation_targets.count))
     return;
 
   // No yield in last gc and we do not expect defragmentation to
   // be able to yield more space: out of memory.
   fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
-          heap->size, mark_space->nslabs);
+          heap->size, nofl_space->nslabs);
   GC_CRASH();
 }
 
-static double clamp_major_gc_yield_threshold(struct gc_heap *heap,
-                                             double threshold) {
+static double
+clamp_major_gc_yield_threshold(struct gc_heap *heap, double threshold) {
   if (threshold < heap->minimum_major_gc_yield_threshold)
     threshold = heap->minimum_major_gc_yield_threshold;
-  double one_block = BLOCK_SIZE * 1.0 / heap->size;
+  double one_block = NOFL_BLOCK_SIZE * 1.0 / heap->size;
   if (threshold < one_block)
     threshold = one_block;
   return threshold;
@@ -1626,13 +823,13 @@ static double clamp_major_gc_yield_threshold(struct gc_heap *heap,
 static enum gc_collection_kind
 determine_collection_kind(struct gc_heap *heap,
                           enum gc_collection_kind requested) {
-  struct mark_space *mark_space = heap_mark_space(heap);
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
   enum gc_collection_kind previous_gc_kind = atomic_load(&heap->gc_kind);
   enum gc_collection_kind gc_kind;
   int mark_while_stopping = 1;
   double yield = heap_last_gc_yield(heap);
   double fragmentation = heap_fragmentation(heap);
-  ssize_t pending = atomic_load_explicit(&mark_space->pending_unavailable_bytes,
+  ssize_t pending = atomic_load_explicit(&nofl_space->pending_unavailable_bytes,
                                          memory_order_acquire);
 
   if (heap->count == 0) {
@@ -1725,186 +922,44 @@ determine_collection_kind(struct gc_heap *heap,
   return gc_kind;
 }
 
-static void release_evacuation_target_blocks(struct mark_space *space) {
-  // Move excess evacuation target blocks back to empties.
-  size_t total = space->nslabs * NONMETA_BLOCKS_PER_SLAB;
-  size_t unavailable = atomic_load_explicit(&space->unavailable.count,
-                                            memory_order_acquire);
-  size_t reserve = space->evacuation_minimum_reserve * (total - unavailable);
-  finish_evacuation_allocator(&space->evacuation_allocator,
-                              &space->evacuation_targets, &space->empty,
-                              reserve);
-}
-
-static void prepare_for_evacuation(struct gc_heap *heap) {
-  struct mark_space *space = heap_mark_space(heap);
-
-  if (heap->gc_kind != GC_COLLECTION_COMPACTING) {
-    space->evacuating = 0;
-    space->evacuation_reserve = space->evacuation_minimum_reserve;
-    return;
-  }
-
-  // Put the mutator into evacuation mode, collecting up to 50% of free space as
-  // evacuation blocks.
-  space->evacuation_reserve = 0.5;
-
-  size_t target_blocks = space->evacuation_targets.count;
-  DEBUG("evacuation target block count: %zu\n", target_blocks);
-
-  if (target_blocks == 0) {
-    DEBUG("no evacuation target blocks, disabling evacuation for this round\n");
-    space->evacuating = 0;
-    return;
-  }
-
-  size_t target_granules = target_blocks * GRANULES_PER_BLOCK;
-  // Compute histogram where domain is the number of granules in a block
-  // that survived the last collection, aggregated into 33 buckets, and
-  // range is number of blocks in that bucket.  (Bucket 0 is for blocks
-  // that were found to be completely empty; such blocks may be on the
-  // evacuation target list.)
-  const size_t bucket_count = 33;
-  size_t histogram[33] = {0,};
-  size_t bucket_size = GRANULES_PER_BLOCK / 32;
-  size_t empties = 0;
-  for (size_t slab = 0; slab < space->nslabs; slab++) {
-    for (size_t block = 0; block < NONMETA_BLOCKS_PER_SLAB; block++) {
-      struct block_summary *summary = &space->slabs[slab].summaries[block];
-      if (block_summary_has_flag(summary, BLOCK_UNAVAILABLE))
-        continue;
-      if (!block_summary_has_flag(summary, BLOCK_NEEDS_SWEEP)) {
-        empties++;
-        continue;
-      }
-      size_t survivor_granules = GRANULES_PER_BLOCK - summary->free_granules;
-      size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size;
-      histogram[bucket]++;
-    }
-  }
-
-  // Blocks which lack the NEEDS_SWEEP flag are empty, either because
-  // they have been removed from the pool and have the UNAVAILABLE flag
-  // set, or because they are on the empties or evacuation target
-  // lists.  When evacuation starts, the empties list should be empty.
-  GC_ASSERT(empties == target_blocks);
-
-  // Now select a number of blocks that is likely to fill the space in
-  // the target blocks.  Prefer candidate blocks with fewer survivors
-  // from the last GC, to increase expected free block yield.
-  for (size_t bucket = 0; bucket < bucket_count; bucket++) {
-    size_t bucket_granules = bucket * bucket_size * histogram[bucket];
-    if (bucket_granules <= target_granules) {
-      target_granules -= bucket_granules;
-    } else {
-      histogram[bucket] = target_granules / (bucket_size * bucket);
-      target_granules = 0;
-    }
-  }
-
-  // Having selected the number of blocks, now we set the evacuation
-  // candidate flag on all blocks.
-  for (size_t slab = 0; slab < space->nslabs; slab++) {
-    for (size_t block = 0; block < NONMETA_BLOCKS_PER_SLAB; block++) {
-      struct block_summary *summary = &space->slabs[slab].summaries[block];
-      if (block_summary_has_flag(summary, BLOCK_UNAVAILABLE))
-        continue;
-      if (!block_summary_has_flag(summary, BLOCK_NEEDS_SWEEP))
-        continue;
-      size_t survivor_granules = GRANULES_PER_BLOCK - summary->free_granules;
-      size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size;
-      if (histogram[bucket]) {
-        block_summary_set_flag(summary, BLOCK_EVACUATE);
-        histogram[bucket]--;
-      } else {
-        block_summary_clear_flag(summary, BLOCK_EVACUATE);
-      }
-    }
-  }
-
-  // We are ready to evacuate!
-  prepare_evacuation_allocator(&space->evacuation_allocator,
-                               &space->evacuation_targets);
-  space->evacuating = 1;
-}
-
-static void trace_conservative_roots_after_stop(struct gc_heap *heap) {
-  GC_ASSERT(!heap_mark_space(heap)->evacuating);
+static void
+trace_conservative_roots_after_stop(struct gc_heap *heap) {
+  GC_ASSERT(!heap_nofl_space(heap)->evacuating);
   if (gc_has_mutator_conservative_roots())
     trace_mutator_conservative_roots_after_stop(heap);
   if (gc_has_global_conservative_roots())
     trace_global_conservative_roots(heap);
 }
 
-static void trace_pinned_roots_after_stop(struct gc_heap *heap) {
-  GC_ASSERT(!heap_mark_space(heap)->evacuating);
+static void
+trace_pinned_roots_after_stop(struct gc_heap *heap) {
+  GC_ASSERT(!heap_nofl_space(heap)->evacuating);
   trace_conservative_roots_after_stop(heap);
 }
 
-static void trace_roots_after_stop(struct gc_heap *heap) {
+static void
+trace_roots_after_stop(struct gc_heap *heap) {
   trace_mutator_roots_after_stop(heap);
   gc_trace_heap_roots(heap->roots, trace_and_enqueue_globally, heap, NULL);
   gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL);
   trace_generational_roots(heap);
 }
 
-static void verify_mark_space_before_restart(struct mark_space *space) {
-  // Iterate objects in each block, verifying that the END bytes correspond to
-  // the measured object size.
-  for (size_t slab = 0; slab < space->nslabs; slab++) {
-    for (size_t block = 0; block < NONMETA_BLOCKS_PER_SLAB; block++) {
-      struct block_summary *summary = &space->slabs[slab].summaries[block];
-      if (block_summary_has_flag(summary, BLOCK_UNAVAILABLE))
-        continue;
-
-      uintptr_t addr = (uintptr_t)space->slabs[slab].blocks[block].data;
-      uintptr_t limit = addr + BLOCK_SIZE;
-      uint8_t *meta = metadata_byte_for_addr(addr);
-      while (addr < limit) {
-        if (meta[0] & space->live_mask) {
-          struct gc_ref obj = gc_ref(addr);
-          size_t obj_bytes = 0;
-          gc_trace_object(gc_ref(addr), NULL, NULL, NULL, &obj_bytes);
-          size_t granules = size_to_granules(obj_bytes);
-          GC_ASSERT(granules);
-          for (size_t granule = 0; granule < granules - 1; granule++)
-            GC_ASSERT(!(meta[granule] & METADATA_BYTE_END));
-          GC_ASSERT(meta[granules - 1] & METADATA_BYTE_END);
-          meta += granules;
-          addr += granules * GRANULE_SIZE;
-        } else {
-          meta++;
-          addr += GRANULE_SIZE;
-        }
-      }
-      GC_ASSERT(addr == limit);
-    }
-  }
-}
-
-static void mark_space_finish_gc(struct mark_space *space,
-                                 enum gc_collection_kind gc_kind) {
-  space->evacuating = 0;
-  reset_sweeper(space);
-  update_mark_patterns(space, 0);
-  reset_statistics(space);
-  release_evacuation_target_blocks(space);
-  if (GC_DEBUG)
-    verify_mark_space_before_restart(space);
-}
-
-static void resolve_ephemerons_lazily(struct gc_heap *heap) {
+static void
+resolve_ephemerons_lazily(struct gc_heap *heap) {
   atomic_store_explicit(&heap->check_pending_ephemerons, 0,
                         memory_order_release);
 }
 
-static void resolve_ephemerons_eagerly(struct gc_heap *heap) {
+static void
+resolve_ephemerons_eagerly(struct gc_heap *heap) {
   atomic_store_explicit(&heap->check_pending_ephemerons, 1,
                         memory_order_release);
   gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1);
 }
 
-static int enqueue_resolved_ephemerons(struct gc_heap *heap) {
+static int
+enqueue_resolved_ephemerons(struct gc_heap *heap) {
   struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
   if (!resolved)
     return 0;
@@ -1912,7 +967,8 @@ static int enqueue_resolved_ephemerons(struct gc_heap *heap) {
   return 1;
 }
 
-static void trace_resolved_ephemerons(struct gc_heap *heap) {
+static void
+trace_resolved_ephemerons(struct gc_heap *heap) {
   for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap);
        resolved;
        resolved = gc_pop_resolved_ephemerons(heap)) {
@@ -1921,7 +977,8 @@ static void trace_resolved_ephemerons(struct gc_heap *heap) {
   }
 }
 
-static void resolve_finalizers(struct gc_heap *heap) {
+static void
+resolve_finalizers(struct gc_heap *heap) {
   for (size_t priority = 0;
        priority < gc_finalizer_priority_count();
        priority++) {
@@ -1934,14 +991,15 @@ static void resolve_finalizers(struct gc_heap *heap) {
   gc_notify_finalizers(heap->finalizer_state, heap);
 }
 
-static void sweep_ephemerons(struct gc_heap *heap) {
+static void
+sweep_ephemerons(struct gc_heap *heap) {
   return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
 }
 
-static void collect(struct gc_mutator *mut,
-                    enum gc_collection_kind requested_kind) {
+static void
+collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) {
   struct gc_heap *heap = mutator_heap(mut);
-  struct mark_space *space = heap_mark_space(heap);
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
   struct large_object_space *lospace = heap_large_object_space(heap);
   struct gc_extern_space *exspace = heap_extern_space(heap);
   if (maybe_grow_heap(heap)) {
@@ -1954,7 +1012,7 @@ static void collect(struct gc_mutator *mut,
     determine_collection_kind(heap, requested_kind);
   int is_minor = gc_kind == GC_COLLECTION_MINOR;
   HEAP_EVENT(heap, prepare_gc, gc_kind);
-  update_mark_patterns(space, !is_minor);
+  nofl_space_update_mark_patterns(nofl_space, !is_minor);
   large_object_space_start_gc(lospace, is_minor);
   gc_extern_space_start_gc(exspace, is_minor);
   resolve_ephemerons_lazily(heap);
@@ -1962,7 +1020,7 @@ static void collect(struct gc_mutator *mut,
   HEAP_EVENT(heap, requesting_stop);
   request_mutators_to_stop(heap);
   trace_mutator_roots_with_lock_before_stop(mut);
-  finish_sweeping(mut);
+  nofl_finish_sweeping(&mut->allocator, nofl_space);
   HEAP_EVENT(heap, waiting_for_stop);
   wait_for_mutators_to_stop(heap);
   HEAP_EVENT(heap, mutators_stopped);
@@ -1972,7 +1030,7 @@ static void collect(struct gc_mutator *mut,
   DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation);
   detect_out_of_memory(heap);
   trace_pinned_roots_after_stop(heap);
-  prepare_for_evacuation(heap);
+  nofl_space_prepare_for_evacuation(nofl_space, gc_kind);
   trace_roots_after_stop(heap);
   HEAP_EVENT(heap, roots_traced);
   gc_tracer_trace(&heap->tracer);
@@ -1984,7 +1042,7 @@ static void collect(struct gc_mutator *mut,
   HEAP_EVENT(heap, finalizers_traced);
   sweep_ephemerons(heap);
   gc_tracer_release(&heap->tracer);
-  mark_space_finish_gc(space, gc_kind);
+  nofl_space_finish_gc(nofl_space, gc_kind);
   large_object_space_finish_gc(lospace, is_minor);
   gc_extern_space_finish_gc(exspace, is_minor);
   heap->count++;
@@ -1994,281 +1052,9 @@ static void collect(struct gc_mutator *mut,
   allow_mutators_to_continue(heap);
 }
 
-static int sweep_byte(uint8_t *loc, uintptr_t sweep_mask) {
-  uint8_t metadata = atomic_load_explicit(loc, memory_order_relaxed);
-  // If the metadata byte is nonzero, that means either a young, dead,
-  // survived, or marked object.  If it's live (survived or marked), we
-  // found the next mark.  Otherwise it's dead and we clear the byte.
-  // If we see an END, that means an end of a dead object; clear it.
-  if (metadata) {
-    if (metadata & sweep_mask)
-      return 1;
-    atomic_store_explicit(loc, 0, memory_order_relaxed);
-  }
-  return 0;
-}
-
-static int sweep_word(uintptr_t *loc, uintptr_t sweep_mask) {
-  uintptr_t metadata = atomic_load_explicit(loc, memory_order_relaxed);
-  if (metadata) {
-    if (metadata & sweep_mask)
-      return 1;
-    atomic_store_explicit(loc, 0, memory_order_relaxed);
-  }
-  return 0;
-}
-
-static uintptr_t mark_space_next_block_to_sweep(struct mark_space *space) {
-  uintptr_t block = atomic_load_explicit(&space->next_block,
-                                         memory_order_acquire);
-  uintptr_t next_block;
-  do {
-    if (block == 0)
-      return 0;
-
-    next_block = block + BLOCK_SIZE;
-    if (next_block % SLAB_SIZE == 0) {
-      uintptr_t hi_addr = space->low_addr + space->extent;
-      if (next_block == hi_addr)
-        next_block = 0;
-      else
-        next_block += META_BLOCKS_PER_SLAB * BLOCK_SIZE;
-    }
-  } while (!atomic_compare_exchange_weak(&space->next_block, &block,
-                                         next_block));
-  return block;
-}
-
-static void finish_block(struct gc_mutator *mut) {
-  GC_ASSERT(mut->block);
-  struct block_summary *block = block_summary_for_addr(mut->block);
-  struct mark_space *space = heap_mark_space(mutator_heap(mut));
-  atomic_fetch_add(&space->granules_freed_by_last_collection,
-                   block->free_granules);
-  atomic_fetch_add(&space->fragmentation_granules_since_last_collection,
-                   block->fragmentation_granules);
-
-  // If this block has mostly survivors, we should avoid sweeping it and
-  // trying to allocate into it for a minor GC.  Sweep it next time to
-  // clear any garbage allocated in this cycle and mark it as
-  // "venerable" (i.e., old).
-  GC_ASSERT(!block_summary_has_flag(block, BLOCK_VENERABLE));
-  if (!block_summary_has_flag(block, BLOCK_VENERABLE_AFTER_SWEEP) &&
-      block->free_granules < GRANULES_PER_BLOCK * space->venerable_threshold)
-    block_summary_set_flag(block, BLOCK_VENERABLE_AFTER_SWEEP);
-
-  mut->block = mut->alloc = mut->sweep = 0;
-}
-
-// Sweep some heap to reclaim free space, resetting mut->alloc and
-// mut->sweep.  Return the size of the hole in granules.
-static size_t next_hole_in_block(struct gc_mutator *mut) {
-  uintptr_t sweep = mut->sweep;
-  if (sweep == 0)
-    return 0;
-  uintptr_t limit = mut->block + BLOCK_SIZE;
-  uintptr_t sweep_mask = heap_mark_space(mutator_heap(mut))->sweep_mask;
-
-  while (sweep != limit) {
-    GC_ASSERT((sweep & (GRANULE_SIZE - 1)) == 0);
-    uint8_t* metadata = metadata_byte_for_addr(sweep);
-    size_t limit_granules = (limit - sweep) >> GRANULE_SIZE_LOG_2;
-
-    // Except for when we first get a block, mut->sweep is positioned
-    // right after a hole, which can point to either the end of the
-    // block or to a live object.  Assume that a live object is more
-    // common.
-    {
-      size_t live_granules = 0;
-      while (limit_granules && (metadata[0] & sweep_mask)) {
-        // Object survived collection; skip over it and continue sweeping.
-        size_t object_granules = mark_space_live_object_granules(metadata);
-        live_granules += object_granules;
-        limit_granules -= object_granules;
-        metadata += object_granules;
-      }
-      if (!limit_granules)
-        break;
-      sweep += live_granules * GRANULE_SIZE;
-    }
-
-    size_t free_granules = next_mark(metadata, limit_granules, sweep_mask);
-    GC_ASSERT(free_granules);
-    GC_ASSERT(free_granules <= limit_granules);
-
-    struct block_summary *summary = block_summary_for_addr(sweep);
-    summary->hole_count++;
-    GC_ASSERT(free_granules <= GRANULES_PER_BLOCK - summary->free_granules);
-    summary->free_granules += free_granules;
-
-    size_t free_bytes = free_granules * GRANULE_SIZE;
-    mut->alloc = sweep;
-    mut->sweep = sweep + free_bytes;
-    return free_granules;
-  }
-
-  finish_block(mut);
-  return 0;
-}
-
-static void finish_hole(struct gc_mutator *mut) {
-  size_t granules = (mut->sweep - mut->alloc) / GRANULE_SIZE;
-  if (granules) {
-    struct block_summary *summary = block_summary_for_addr(mut->block);
-    summary->holes_with_fragmentation++;
-    summary->fragmentation_granules += granules;
-    uint8_t *metadata = metadata_byte_for_addr(mut->alloc);
-    memset(metadata, 0, granules);
-    mut->alloc = mut->sweep;
-  }
-  // FIXME: add to fragmentation
-}
-
-static int maybe_release_swept_empty_block(struct gc_mutator *mut) {
-  GC_ASSERT(mut->block);
-  struct mark_space *space = heap_mark_space(mutator_heap(mut));
-  uintptr_t block = mut->block;
-  if (atomic_load_explicit(&space->pending_unavailable_bytes,
-                           memory_order_acquire) <= 0)
-    return 0;
-
-  push_unavailable_block(space, block);
-  atomic_fetch_sub(&space->pending_unavailable_bytes, BLOCK_SIZE);
-  mut->alloc = mut->sweep = mut->block = 0;
-  return 1;
-}
-
-static size_t next_hole(struct gc_mutator *mut) {
-  finish_hole(mut);
-  // As we sweep if we find that a block is empty, we return it to the
-  // empties list.  Empties are precious.  But if we return 10 blocks in
-  // a row, and still find an 11th empty, go ahead and use it.
-  size_t empties_countdown = 10;
-  struct mark_space *space = heap_mark_space(mutator_heap(mut));
-  while (1) {
-    // Sweep current block for a hole.
-    size_t granules = next_hole_in_block(mut);
-    if (granules) {
-      // If the hole spans only part of a block, give it to the mutator.
-      if (granules < GRANULES_PER_BLOCK)
-        return granules;
-      struct block_summary *summary = block_summary_for_addr(mut->block);
-      // Sweep mark bytes for completely empty block.
-      memset(metadata_byte_for_addr(mut->block), 0, GRANULES_PER_BLOCK);
-      block_summary_clear_flag(summary, BLOCK_NEEDS_SWEEP);
-      // Sweeping found a completely empty block.  If we are below the
-      // minimum evacuation reserve, take the block.
-      if (push_evacuation_target_if_needed(space, mut->block)) {
-        mut->alloc = mut->sweep = mut->block = 0;
-        continue;
-      }
-      // If we have pending pages to release to the OS, we should unmap
-      // this block.
-      if (maybe_release_swept_empty_block(mut))
-        continue;
-      // Otherwise if we've already returned lots of empty blocks to the
-      // freelist, give this block to the mutator.
-      if (!empties_countdown) {
-        // After this block is allocated into, it will need to be swept.
-        block_summary_set_flag(summary, BLOCK_NEEDS_SWEEP);
-        return granules;
-      }
-      // Otherwise we push to the empty blocks list.
-      push_empty_block(space, mut->block);
-      mut->alloc = mut->sweep = mut->block = 0;
-      empties_countdown--;
-    }
-    GC_ASSERT(mut->block == 0);
-    while (1) {
-      uintptr_t block = mark_space_next_block_to_sweep(space);
-      if (block) {
-        // Sweeping found a block.  We might take it for allocation, or
-        // we might send it back.
-        struct block_summary *summary = block_summary_for_addr(block);
-        // If it's marked unavailable, it's already on a list of
-        // unavailable blocks, so skip and get the next block.
-        if (block_summary_has_flag(summary, BLOCK_UNAVAILABLE))
-          continue;
-        if (block_summary_has_flag(summary, BLOCK_VENERABLE)) {
-          // Skip venerable blocks after a minor GC -- we don't need to
-          // sweep as they weren't allocated into last cycle, and the
-          // mark bytes didn't rotate, so we have no cleanup to do; and
-          // we shouldn't try to allocate into them as it's not worth
-          // it.  Any wasted space is measured as fragmentation.
-          if (mutator_heap(mut)->last_collection_was_minor)
-            continue;
-          else
-            block_summary_clear_flag(summary, BLOCK_VENERABLE);
-        }
-        if (block_summary_has_flag(summary, BLOCK_NEEDS_SWEEP)) {
-          // Prepare to sweep the block for holes.
-          mut->alloc = mut->sweep = mut->block = block;
-          if (block_summary_has_flag(summary, BLOCK_VENERABLE_AFTER_SWEEP)) {
-            // In the last cycle we noted that this block consists of
-            // mostly old data.  Sweep any garbage, commit the mark as
-            // venerable, and avoid allocating into it.
-            block_summary_clear_flag(summary, BLOCK_VENERABLE_AFTER_SWEEP);
-            if (mutator_heap(mut)->last_collection_was_minor) {
-              finish_sweeping_in_block(mut);
-              block_summary_set_flag(summary, BLOCK_VENERABLE);
-              continue;
-            }
-          }
-          // This block was marked in the last GC and needs sweeping.
-          // As we sweep we'll want to record how many bytes were live
-          // at the last collection.  As we allocate we'll record how
-          // many granules were wasted because of fragmentation.
-          summary->hole_count = 0;
-          summary->free_granules = 0;
-          summary->holes_with_fragmentation = 0;
-          summary->fragmentation_granules = 0;
-          break;
-        } else {
-          // Otherwise this block is completely empty and is on the
-          // empties list.  We take from the empties list only after all
-          // the NEEDS_SWEEP blocks are processed.
-          continue;
-        }
-      } else {
-        // We are done sweeping for blocks.  Now take from the empties
-        // list.
-        block = pop_empty_block(space);
-        // No empty block?  Return 0 to cause collection.
-        if (!block)
-          return 0;
-
-        // Maybe we should use this empty as a target for evacuation.
-        if (push_evacuation_target_if_possible(space, block))
-          continue;
-
-        // Otherwise return the block to the mutator.
-        struct block_summary *summary = block_summary_for_addr(block);
-        block_summary_set_flag(summary, BLOCK_NEEDS_SWEEP);
-        summary->hole_count = 1;
-        summary->free_granules = GRANULES_PER_BLOCK;
-        summary->holes_with_fragmentation = 0;
-        summary->fragmentation_granules = 0;
-        mut->block = block;
-        mut->alloc = block;
-        mut->sweep = block + BLOCK_SIZE;
-        return GRANULES_PER_BLOCK;
-      }
-    }
-  }
-}
-
-static void finish_sweeping_in_block(struct gc_mutator *mut) {
-  do { finish_hole(mut); } while (next_hole_in_block(mut));
-}
-
-// Another thread is triggering GC.  Before we stop, finish clearing the
-// dead mark bytes for the mutator's block, and release the block.
-static void finish_sweeping(struct gc_mutator *mut) {
-  while (next_hole(mut)) {}
-}
-
-static void trigger_collection(struct gc_mutator *mut,
-                               enum gc_collection_kind requested_kind) {
+static void
+trigger_collection(struct gc_mutator *mut,
+                   enum gc_collection_kind requested_kind) {
   struct gc_heap *heap = mutator_heap(mut);
   int prev_kind = -1;
   heap_lock(heap);
@@ -2279,26 +1065,30 @@ static void trigger_collection(struct gc_mutator *mut,
   heap_unlock(heap);
 }
 
-void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) {
+void
+gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) {
   trigger_collection(mut, kind);
 }
 
-static void* allocate_large(struct gc_mutator *mut, size_t size) {
+static void*
+allocate_large(struct gc_mutator *mut, size_t size) {
   struct gc_heap *heap = mutator_heap(mut);
-  struct large_object_space *space = heap_large_object_space(heap);
+  struct nofl_space *nofl_space = heap_nofl_space(heap);
+  struct large_object_space *lospace = heap_large_object_space(heap);
 
-  size_t npages = large_object_space_npages(space, size);
+  size_t npages = large_object_space_npages(lospace, size);
 
-  mark_space_request_release_memory(heap_mark_space(heap),
-                                    npages << space->page_size_log2);
+  nofl_space_request_release_memory(nofl_space,
+                                    npages << lospace->page_size_log2);
 
-  while (!sweep_until_memory_released(mut))
+  while (!nofl_space_sweep_until_memory_released(nofl_space,
+                                                 &mut->allocator))
     trigger_collection(mut, GC_COLLECTION_COMPACTING);
   atomic_fetch_add(&heap->large_object_pages, npages);
 
-  void *ret = large_object_space_alloc(space, npages);
+  void *ret = large_object_space_alloc(lospace, npages);
   if (!ret)
-    ret = large_object_space_obtain_and_alloc(space, npages);
+    ret = large_object_space_obtain_and_alloc(lospace, npages);
 
   if (!ret) {
     perror("weird: we have the space but mmap didn't work");
@@ -2308,113 +1098,81 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) {
   return ret;
 }
 
-void* gc_allocate_slow(struct gc_mutator *mut, size_t size) {
+static void
+collect_for_small_allocation(void *mut) {
+  trigger_collection(mut, GC_COLLECTION_ANY);
+}
+
+void*
+gc_allocate_slow(struct gc_mutator *mut, size_t size) {
   GC_ASSERT(size > 0); // allocating 0 bytes would be silly
 
   if (size > gc_allocator_large_threshold())
     return allocate_large(mut, size);
 
-  size = align_up(size, GRANULE_SIZE);
-  uintptr_t alloc = mut->alloc;
-  uintptr_t sweep = mut->sweep;
-  uintptr_t new_alloc = alloc + size;
-  struct gc_ref ret;
-  if (new_alloc <= sweep) {
-    mut->alloc = new_alloc;
-    ret = gc_ref(alloc);
-  } else {
-    size_t granules = size >> GRANULE_SIZE_LOG_2;
-    while (1) {
-      size_t hole = next_hole(mut);
-      if (hole >= granules) {
-        clear_memory(mut->alloc, hole * GRANULE_SIZE);
-        break;
-      }
-      if (!hole)
-        trigger_collection(mut, GC_COLLECTION_ANY);
-    }
-    ret = gc_ref(mut->alloc);
-    mut->alloc += size;
-  }
-  gc_update_alloc_table(mut, ret, size);
-  return gc_ref_heap_object(ret);
+  return gc_ref_heap_object(nofl_allocate(&mut->allocator,
+                                          heap_nofl_space(mutator_heap(mut)),
+                                          size, collect_for_small_allocation,
+                                          mut));
 }
 
-void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
+void*
+gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
   return gc_allocate(mut, size);
 }
 
-struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) {
+struct gc_ephemeron*
+gc_allocate_ephemeron(struct gc_mutator *mut) {
   struct gc_ref ret =
     gc_ref_from_heap_object(gc_allocate(mut, gc_ephemeron_size()));
-  if (gc_has_conservative_intraheap_edges()) {
-    uint8_t *metadata = metadata_byte_for_addr(gc_ref_value(ret));
-    *metadata |= METADATA_BYTE_EPHEMERON;
-  }
+  nofl_space_set_ephemeron_flag(ret);
   return gc_ref_heap_object(ret);
 }
 
-void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
-                       struct gc_ref key, struct gc_ref value) {
+void
+gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
+                  struct gc_ref key, struct gc_ref value) {
   gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value);
   // No write barrier: we require that the ephemeron be newer than the
   // key or the value.
 }
 
-struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) {
+struct gc_pending_ephemerons *
+gc_heap_pending_ephemerons(struct gc_heap *heap) {
   return heap->pending_ephemerons;
 }
 
-unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
+unsigned
+gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
   return heap->count;
 }
 
-struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) {
+struct gc_finalizer*
+gc_allocate_finalizer(struct gc_mutator *mut) {
   return gc_allocate(mut, gc_finalizer_size());
 }
 
-void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
-                         unsigned priority, struct gc_ref object,
-                         struct gc_ref closure) {
+void
+gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
+                    unsigned priority, struct gc_ref object,
+                    struct gc_ref closure) {
   gc_finalizer_init_internal(finalizer, object, closure);
   gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state,
                                finalizer, priority);
   // No write barrier.
 }
 
-struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) {
+struct gc_finalizer*
+gc_pop_finalizable(struct gc_mutator *mut) {
   return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state);
 }
 
-void gc_set_finalizer_callback(struct gc_heap *heap,
+void
+gc_set_finalizer_callback(struct gc_heap *heap,
                                gc_finalizer_callback callback) {
   gc_finalizer_state_set_callback(heap->finalizer_state, callback);
 }
 
-static struct slab* allocate_slabs(size_t nslabs) {
-  size_t size = nslabs * SLAB_SIZE;
-  size_t extent = size + SLAB_SIZE;
-
-  char *mem = mmap(NULL, extent, PROT_READ|PROT_WRITE,
-                   MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-  if (mem == MAP_FAILED) {
-    perror("mmap failed");
-    return NULL;
-  }
-
-  uintptr_t base = (uintptr_t) mem;
-  uintptr_t end = base + extent;
-  uintptr_t aligned_base = align_up(base, SLAB_SIZE);
-  uintptr_t aligned_end = aligned_base + size;
-
-  if (aligned_base - base)
-    munmap((void*)base, aligned_base - base);
-  if (end - aligned_end)
-    munmap((void*)aligned_end, end - aligned_end);
-
-  return (struct slab*) aligned_base;
-}
-
 static int heap_prepare_pending_ephemerons(struct gc_heap *heap) {
   struct gc_pending_ephemerons *cur = heap->pending_ephemerons;
   size_t target = heap->size * heap->pending_ephemerons_size_factor;
@@ -2482,55 +1240,23 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) {
   return 1;
 }
 
-static int mark_space_init(struct mark_space *space, struct gc_heap *heap) {
-  size_t size = align_up(heap->size, SLAB_SIZE);
-  size_t nslabs = size / SLAB_SIZE;
-  struct slab *slabs = allocate_slabs(nslabs);
-  if (!slabs)
-    return 0;
-
-  space->marked_mask = METADATA_BYTE_MARK_0;
-  update_mark_patterns(space, 0);
-  space->slabs = slabs;
-  space->nslabs = nslabs;
-  space->low_addr = (uintptr_t) slabs;
-  space->extent = size;
-  space->next_block = 0;
-  space->evacuation_minimum_reserve = 0.02;
-  space->evacuation_reserve = space->evacuation_minimum_reserve;
-  space->venerable_threshold = heap->fragmentation_low_threshold;
-  for (size_t slab = 0; slab < nslabs; slab++) {
-    for (size_t block = 0; block < NONMETA_BLOCKS_PER_SLAB; block++) {
-      uintptr_t addr = (uintptr_t)slabs[slab].blocks[block].data;
-      if (size > heap->size) {
-        push_unavailable_block(space, addr);
-        size -= BLOCK_SIZE;
-      } else {
-        if (!push_evacuation_target_if_needed(space, addr))
-          push_empty_block(space, addr);
-      }
-    }
-  }
-  return 1;
-}
-
 int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
             struct gc_heap **heap, struct gc_mutator **mut,
             struct gc_event_listener event_listener,
             void *event_listener_data) {
-  GC_ASSERT_EQ(gc_allocator_small_granule_size(), GRANULE_SIZE);
+  GC_ASSERT_EQ(gc_allocator_small_granule_size(), NOFL_GRANULE_SIZE);
   GC_ASSERT_EQ(gc_allocator_large_threshold(), LARGE_OBJECT_THRESHOLD);
   GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(),
-               offsetof(struct gc_mutator, alloc));
+               offsetof(struct nofl_allocator, alloc));
   GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
-               offsetof(struct gc_mutator, sweep));
-  GC_ASSERT_EQ(gc_allocator_alloc_table_alignment(), SLAB_SIZE);
-  GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), METADATA_BYTE_YOUNG);
-  GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), METADATA_BYTE_END);
+               offsetof(struct nofl_allocator, sweep));
+  GC_ASSERT_EQ(gc_allocator_alloc_table_alignment(), NOFL_SLAB_SIZE);
+  GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), NOFL_METADATA_BYTE_YOUNG);
+  GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END);
   if (GC_GENERATIONAL) {
-    GC_ASSERT_EQ(gc_write_barrier_card_table_alignment(), SLAB_SIZE);
+    GC_ASSERT_EQ(gc_write_barrier_card_table_alignment(), NOFL_SLAB_SIZE);
     GC_ASSERT_EQ(gc_write_barrier_card_size(),
-                 BLOCK_SIZE / REMSET_BYTES_PER_BLOCK);
+                 NOFL_BLOCK_SIZE / NOFL_REMSET_BYTES_PER_BLOCK);
   }
 
   if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
@@ -2548,8 +1274,10 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
   (*heap)->event_listener_data = event_listener_data;
   HEAP_EVENT(*heap, init, (*heap)->size);
 
-  struct mark_space *space = heap_mark_space(*heap);
-  if (!mark_space_init(space, *heap)) {
+  struct nofl_space *space = heap_nofl_space(*heap);
+  if (!nofl_space_init(space, (*heap)->size,
+                       options->common.parallelism != 1,
+                       (*heap)->fragmentation_low_threshold)) {
     free(*heap);
     *heap = NULL;
     return 0;