refactor pcc. no functional change

2025-06-08 13:10:19 +02:00 · 2024-07-29 11:21:17 +02:00 · 2024-07-29 11:21:17 +02:00 · 6c5cdd73c9
commit 6c5cdd73c9
parent 22f5b44330
1 changed files with 170 additions and 167 deletions
--- a/src/pcc.c
+++ b/src/pcc.c
@ -52,7 +52,7 @@ struct pcc_block {
    struct {
      struct pcc_block *next;
      uint8_t in_core;
-      size_t allocated; // For partially-allocated blocks.
+      size_t allocated; // For partly-empty blocks.
    };
    uint8_t padding[HEADER_BYTES_PER_BLOCK];
  };
@ -101,10 +101,10 @@ struct pcc_extent {
 };

 struct pcc_space {
-  struct pcc_block *available;
-  struct pcc_block *partially_allocated;
-  struct pcc_block *allocated ALIGNED_TO_AVOID_FALSE_SHARING;
-  size_t allocated_block_count;
+  struct pcc_block *empty;
+  struct pcc_block *partly_full;
+  struct pcc_block *full ALIGNED_TO_AVOID_FALSE_SHARING;
+  size_t full_block_count;
  struct pcc_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING;
  size_t fragmentation ALIGNED_TO_AVOID_FALSE_SHARING;
  ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING;
@ -150,10 +150,14 @@ struct gc_heap {
 #define MUTATOR_EVENT(mut, event, ...)                                  \
  (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__)

-struct gc_mutator {
+struct gc_allocator {
  uintptr_t hp;
  uintptr_t limit;
  struct pcc_block *block;
+};
+
+struct gc_mutator {
+  struct gc_allocator allocator;
  struct gc_heap *heap;
  struct gc_mutator_roots *roots;
  void *event_listener_data;
@ -162,9 +166,7 @@ struct gc_mutator {
 };

 struct gc_trace_worker_data {
-  uintptr_t hp;
-  uintptr_t limit;
-  struct pcc_block *block;
+  struct gc_allocator allocator;
 };

 static inline struct pcc_space* heap_pcc_space(struct gc_heap *heap) {
@ -180,15 +182,6 @@ static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) {
  return mutator->heap;
 }

-static inline void pcc_space_compute_region(struct pcc_space *space,
-                                            struct pcc_block *block,
-                                            uintptr_t *hp, uintptr_t *limit) {
-  struct pcc_block_payload *payload = block_payload(block);
-  struct pcc_region *region = &payload->regions[space->active_region];
-  *hp = (uintptr_t)&region[0];
-  *limit = (uintptr_t)&region[1];
-}
-
 static void push_block(struct pcc_block **list,
                       struct pcc_block *block) {
  struct pcc_block *next = atomic_load_explicit(list, memory_order_acquire);
@ -208,39 +201,33 @@ static struct pcc_block* pop_block(struct pcc_block **list) {
  return head;
 }

-static struct pcc_block* pop_available_block(struct pcc_space *space) {
-  return pop_block(&space->available);
+static struct pcc_block* pop_empty_block(struct pcc_space *space) {
+  return pop_block(&space->empty);
 }
-static void push_available_block(struct pcc_space *space,
-                                 struct pcc_block *block) {
-  push_block(&space->available, block);
+static void push_empty_block(struct pcc_space *space,
+                             struct pcc_block *block) {
+  push_block(&space->empty, block);
 }

-static struct pcc_block* pop_allocated_block(struct pcc_space *space) {
-  return pop_block(&space->allocated);
+static struct pcc_block* pop_full_block(struct pcc_space *space) {
+  return pop_block(&space->full);
 }
-static void push_allocated_block(struct pcc_space *space,
-                                 struct pcc_block *block) {
-  push_block(&space->allocated, block);
-  atomic_fetch_add_explicit(&space->allocated_block_count, 1,
+static void push_full_block(struct pcc_space *space,
+                            struct pcc_block *block) {
+  push_block(&space->full, block);
+  atomic_fetch_add_explicit(&space->full_block_count, 1,
                            memory_order_relaxed);
 }

-static struct pcc_block* pop_partially_allocated_block(struct pcc_space *space) {
-  return pop_block(&space->partially_allocated);
+static struct pcc_block* pop_partly_full_block(struct pcc_space *space) {
+  return pop_block(&space->partly_full);
 }
-static void push_partially_allocated_block(struct pcc_space *space,
-                                           struct pcc_block *block,
-                                           uintptr_t hp) {
-  size_t allocated = hp & (REGION_SIZE - 1);
-  if (allocated) {
-    block->allocated = allocated;
-    push_block(&space->partially_allocated, block);
-  } else {
-    // Could be hp was bumped all the way to the limit, in which case
-    // allocated wraps to 0; in any case the block is full.
-    push_allocated_block(space, block);
-  }
+static void push_partly_full_block(struct pcc_space *space,
+                                   struct pcc_block *block,
+                                   size_t allocated_bytes) {
+  GC_ASSERT(allocated_bytes);
+  block->allocated = allocated_bytes;
+  push_block(&space->partly_full, block);
 }

 static struct pcc_block* pop_paged_out_block(struct pcc_space *space) {
@ -271,7 +258,7 @@ static void record_fragmentation(struct pcc_space *space,
 }

 static ssize_t pcc_space_request_release_memory(struct pcc_space *space,
-                                                  size_t bytes) {
+                                                size_t bytes) {
  return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes;
 }

@ -279,7 +266,7 @@ static int
 pcc_space_page_out_blocks_until_memory_released(struct pcc_space *space) {
  ssize_t pending = atomic_load(&space->bytes_to_page_out);
  while (pending > 0) {
-    struct pcc_block *block = pop_available_block(space);
+    struct pcc_block *block = pop_empty_block(space);
    if (!block) return 0;
    page_out_block(space, block);
    pending =
@ -295,12 +282,102 @@ static void pcc_space_reacquire_memory(struct pcc_space *space,
  while (pending + BLOCK_SIZE <= 0) {
    struct pcc_block *block = page_in_block(space);
    GC_ASSERT(block);
-    push_available_block(space, block);
+    push_empty_block(space, block);
    pending =
      atomic_fetch_add(&space->bytes_to_page_out, BLOCK_SIZE) + BLOCK_SIZE;
  }
 }

+static inline void allocator_set_block(struct gc_allocator *alloc,
+                                       struct pcc_block *block,
+                                       int active_region) {
+  struct pcc_block_payload *payload = block_payload(block);
+  struct pcc_region *region = &payload->regions[active_region];
+  alloc->block = block;
+  alloc->hp = (uintptr_t)&region[0];
+  alloc->limit = (uintptr_t)&region[1];
+}
+
+static inline int allocator_acquire_block(struct gc_allocator *alloc,
+                                          struct pcc_block *block,
+                                          int active_region) {
+  if (block) {
+    allocator_set_block(alloc, block, active_region);
+    return 1;
+  }
+  return 0;
+}
+
+static int
+allocator_acquire_empty_block(struct gc_allocator *alloc,
+                              struct pcc_space *space) {
+  return allocator_acquire_block(alloc, pop_empty_block(space),
+                                 space->active_region);
+}
+
+static int
+allocator_acquire_partly_full_block(struct gc_allocator *alloc,
+                                    struct pcc_space *space) {
+  if (allocator_acquire_block(alloc, pop_partly_full_block(space),
+                              space->active_region)) {
+    alloc->hp += alloc->block->allocated;
+    return 1;
+  }
+  return 0;
+}
+
+static void allocator_release_full_block(struct gc_allocator *alloc,
+                                         struct pcc_space *space) {
+  record_fragmentation(space, alloc->limit - alloc->hp);
+  push_full_block(space, alloc->block);
+  alloc->hp = alloc->limit = 0;
+  alloc->block = NULL;
+}
+
+static void allocator_release_partly_full_block(struct gc_allocator *alloc,
+                                                struct pcc_space *space) {
+  size_t allocated = alloc->hp & (REGION_SIZE - 1);
+  if (allocated) {
+    push_partly_full_block(space, alloc->block, allocated);
+  } else {
+    // Could be hp was bumped all the way to the limit, in which case
+    // allocated wraps to 0; in any case the block is full.
+    push_full_block(space, alloc->block);
+  }
+  alloc->hp = alloc->limit = 0;
+  alloc->block = NULL;
+}
+
+static inline struct gc_ref allocate(struct gc_allocator *alloc,
+                                     struct pcc_space *space,
+                                     size_t size,
+                                     void (*get_more_empty_blocks)(void *data),
+                                     void *data) {
+  GC_ASSERT(size > 0);
+  GC_ASSERT(size <= gc_allocator_large_threshold());
+  size = align_up(size, GC_ALIGNMENT);
+
+  if (alloc->hp + size <= alloc->limit)
+    goto done;
+
+  if (alloc->block)
+    allocator_release_full_block(alloc, space);
+  while (allocator_acquire_partly_full_block(alloc, space)) {
+    if (alloc->hp + size <= alloc->limit)
+      goto done;
+    allocator_release_full_block(alloc, space);
+  }
+  while (!allocator_acquire_empty_block(alloc, space))
+    get_more_empty_blocks(data);
+  // The newly acquired block is empty and is therefore large enough for
+  // a small allocation.
+
+done:
+  struct gc_ref ret = gc_ref(alloc->hp);
+  alloc->hp += size;
+  return ret;
+}
+
 static void
 gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
                                         struct gc_heap *heap,
@ -309,23 +386,10 @@ gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
                               struct gc_tracer *tracer,
                               struct gc_heap *heap,
                               struct gc_trace_worker *worker) {
-  struct gc_trace_worker_data data = {0,0,NULL};
+  struct gc_trace_worker_data data = {{0,0,NULL},};
  f(tracer, heap, worker, &data);
-  if (data.block)
-    push_partially_allocated_block(heap_pcc_space(heap), data.block,
-                                   data.hp);
-  // FIXME: Add (data.limit - data.hp) to fragmentation.
-}
-
-static void clear_mutator_allocation_buffers(struct gc_heap *heap) {
-  for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next) {
-    if (mut->block) {
-      record_fragmentation(heap_pcc_space(heap), mut->limit - mut->hp);
-      push_allocated_block(heap_pcc_space(heap), mut->block);
-      mut->block = NULL;
-    }
-    mut->hp = mut->limit = 0;
-  }
+  if (data.allocator.block)
+    allocator_release_partly_full_block(&data.allocator, heap_pcc_space(heap));
 }

 static struct pcc_block*
@ -342,70 +406,31 @@ append_block_lists(struct pcc_block *head, struct pcc_block *tail) {

 static void pcc_space_flip(struct pcc_space *space) {
  // Mutators stopped, can access nonatomically.
-  struct pcc_block *available = space->available;
-  struct pcc_block *partially_allocated = space->partially_allocated;
-  struct pcc_block *allocated = space->allocated;
-  allocated = append_block_lists(partially_allocated, allocated);
-  space->available = append_block_lists(available, allocated);
-  space->partially_allocated = NULL;
-  space->allocated = NULL;
-  space->allocated_block_count = 0;
+  space->empty =
+    append_block_lists(space->empty,
+                       append_block_lists(space->partly_full, space->full));
+  space->partly_full = NULL;
+  space->full = NULL;
+  space->full_block_count = 0;
  space->fragmentation = 0;
  space->active_region ^= 1;
 }

 static void pcc_space_finish_gc(struct pcc_space *space) {
  // Mutators stopped, can access nonatomically.
-  space->live_bytes_at_last_gc = space->allocated_block_count * REGION_SIZE;
+  space->live_bytes_at_last_gc = space->full_block_count * REGION_SIZE;
  space->fragmentation_at_last_gc = space->fragmentation;
 }

-static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
-
-static struct gc_ref evacuation_allocate(struct pcc_space *space,
-                                         struct gc_trace_worker_data *data,
-                                         size_t size) {
-  GC_ASSERT(size > 0);
-  GC_ASSERT(size <= gc_allocator_large_threshold());
-  size = align_up(size, GC_ALIGNMENT);
-
-  uintptr_t hp = data->hp;
-  uintptr_t limit = data->limit;
-  uintptr_t new_hp = hp + size;
-  if (new_hp <= limit)
-    goto done;
-
-  if (data->block) {
-    record_fragmentation(space, limit - hp);
-    push_allocated_block(space, data->block);
-  }
-  while ((data->block = pop_partially_allocated_block(space))) {
-    pcc_space_compute_region(space, data->block, &hp, &limit);
-    hp += data->block->allocated;
-    new_hp = hp + size;
-    if (new_hp <= limit) {
-      data->limit = limit;
-      goto done;
-    }
-    record_fragmentation(space, limit - hp);
-    push_allocated_block(space, data->block);
-  }
-  data->block = pop_available_block(space);
-  if (!data->block) {
-    // Can happen if space is really tight and reordering of objects
-    // during evacuation resulted in more end-of-block fragmentation and
-    // thus block use than before collection started.  A dire situation.
-    fprintf(stderr, "Out of memory\n");
-    GC_CRASH();
-  }
-  pcc_space_compute_region(space, data->block, &hp, &data->limit);
-  new_hp = hp + size;
-  // The region is empty and is therefore large enough for a small
-  // allocation.
-
-done:
-  data->hp = new_hp;
-  return gc_ref(hp);
+static void get_more_empty_blocks_during_evacuation(void *data) {
+  // If space is really tight and reordering of objects during
+  // evacuation resulted in more end-of-block fragmentation and thus
+  // block use than before collection started, we can actually run out
+  // of memory while collecting.  We should probably attempt to expand
+  // the heap here, at least by a single block; it's better than the
+  // alternatives.
+  fprintf(stderr, "Out of memory\n");
+  GC_CRASH();
 }

 static inline int pcc_space_forward(struct pcc_space *space,
@ -427,7 +452,9 @@ static inline int pcc_space_forward(struct pcc_space *space,
  case GC_FORWARDING_STATE_ACQUIRED: {
    // We claimed the object successfully; evacuating is up to us.
    size_t bytes = gc_atomic_forward_object_size(&fwd);
-    struct gc_ref new_ref = evacuation_allocate(space, data, bytes);
+    struct gc_ref new_ref = allocate(&data->allocator, space, bytes,
+                                     get_more_empty_blocks_during_evacuation,
+                                     NULL);
    // Copy object contents before committing, as we don't know what
    // part of the object (if any) will be overwritten by the
    // commit.
@ -545,7 +572,6 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
  mut->event_listener_data =
    heap->event_listener.mutator_added(heap->event_listener_data);
  heap_lock(heap);
-  mut->block = NULL;
  // We have no roots.  If there is a GC currently in progress, we have
  // nothing to add.  Just wait until it's done.
  while (mutators_are_stopping(heap))
@ -564,11 +590,8 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
 static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
  MUTATOR_EVENT(mut, mutator_removed);
  mut->heap = NULL;
-  if (mut->block) {
-    push_partially_allocated_block(heap_pcc_space(heap), mut->block,
-                                   mut->hp);
-    mut->block = NULL;
-  }
+  if (mut->allocator.block)
+    allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap));
  heap_lock(heap);
  heap->mutator_count--;
  if (mut->next)
@ -703,6 +726,8 @@ static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
  struct gc_heap *heap = mutator_heap(mut);
  GC_ASSERT(mutators_are_stopping(heap));
  MUTATOR_EVENT(mut, mutator_stopping);
+  if (mut->allocator.block)
+    allocator_release_full_block(&mut->allocator, heap_pcc_space(heap));
  heap_lock(heap);
  pause_mutator_for_collection(heap, mut);
  heap_unlock(heap);
@ -766,6 +791,7 @@ static void sweep_ephemerons(struct gc_heap *heap) {
  return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
 }

+static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
 static void collect(struct gc_mutator *mut) {
  struct gc_heap *heap = mutator_heap(mut);
  struct pcc_space *cspace = heap_pcc_space(heap);
@ -782,7 +808,6 @@ static void collect(struct gc_mutator *mut) {
  HEAP_EVENT(heap, waiting_for_stop);
  wait_for_mutators_to_stop(heap);
  HEAP_EVENT(heap, mutators_stopped);
-  clear_mutator_allocation_buffers(heap);
  pcc_space_flip(cspace);
  gc_tracer_prepare(&heap->tracer);
  add_roots(heap);
@ -816,6 +841,8 @@ static void collect(struct gc_mutator *mut) {

 static void trigger_collection(struct gc_mutator *mut) {
  struct gc_heap *heap = mutator_heap(mut);
+  if (mut->allocator.block)
+    allocator_release_full_block(&mut->allocator, heap_pcc_space(heap));
  heap_lock(heap);
  long epoch = heap->count;
  while (mutators_are_stopping(heap))
@ -853,47 +880,20 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) {
  return ret;
 }

+static void get_more_empty_blocks_for_mutator(void *mut) {
+  trigger_collection(mut);
+}
+
 void* gc_allocate_slow(struct gc_mutator *mut, size_t size) {
  GC_ASSERT(size > 0); // allocating 0 bytes would be silly

  if (size > gc_allocator_large_threshold())
    return allocate_large(mut, size);

-  size = align_up(size, GC_ALIGNMENT);
-  uintptr_t hp = mut->hp;
-  uintptr_t limit = mut->limit;
-  uintptr_t new_hp = hp + size;
-  if (new_hp <= limit)
-    goto done;
-
-  struct pcc_space *space = heap_pcc_space(mutator_heap(mut));
-  if (mut->block) {
-    record_fragmentation(space, limit - hp);
-    push_allocated_block(space, mut->block);
-  }
-  while ((mut->block = pop_partially_allocated_block(space))) {
-    pcc_space_compute_region(space, mut->block, &hp, &limit);
-    hp += mut->block->allocated;
-    new_hp = hp + size;
-    if (new_hp <= limit) {
-      mut->limit = limit;
-      goto done;
-    }
-    record_fragmentation(space, limit - hp);
-    push_allocated_block(space, mut->block);
-  }
-  while (!(mut->block = pop_available_block(space))) {
-    trigger_collection(mut);
-  }
-  pcc_space_compute_region(space, mut->block, &hp, &mut->limit);
-  new_hp = hp + size;
-  // The region is empty and is therefore large enough for a small
-  // allocation.
-
-done:
-  mut->hp = new_hp;
-  gc_clear_fresh_allocation(gc_ref(hp), size);
-  return gc_ref_heap_object(gc_ref(hp));
+  return gc_ref_heap_object(allocate(&mut->allocator,
+                                     heap_pcc_space(mutator_heap(mut)),
+                                     size, get_more_empty_blocks_for_mutator,
+                                     mut));
 }

 void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
@ -1031,10 +1031,10 @@ static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) {
  if (!slabs)
    return 0;

-  space->available = NULL;
-  space->partially_allocated = NULL;
-  space->allocated = NULL;
-  space->allocated_block_count = 0;
+  space->empty = NULL;
+  space->partly_full = NULL;
+  space->full = NULL;
+  space->full_block_count = 0;
  space->paged_out = NULL;
  space->fragmentation = 0;
  space->bytes_to_page_out = 0;
@ -1056,7 +1056,7 @@ static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) {
        size -= BLOCK_SIZE;
      } else {
        block->in_core = 1;
-        push_available_block(space, block);
+        push_empty_block(space, block);
      }
    }
  }
@ -1069,10 +1069,11 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
            void *event_listener_data) {
  GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT);
  GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD);
+  GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator));
  GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(),
-               offsetof(struct gc_mutator, hp));
+               offsetof(struct gc_allocator, hp));
  GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
-               offsetof(struct gc_mutator, limit));
+               offsetof(struct gc_allocator, limit));

  if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
    fprintf(stderr, "fixed heap size is currently required\n");
@ -1121,6 +1122,8 @@ void gc_finish_for_thread(struct gc_mutator *mut) {

 static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
  GC_ASSERT(mut->next == NULL);
+  if (mut->allocator.block)
+    allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap));
  heap_lock(heap);
  heap->inactive_mutator_count++;
  if (all_mutators_stopped(heap))