1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-08 21:20:19 +02:00

refactor pcc. no functional change

This commit is contained in:
Andy Wingo 2024-07-29 11:21:17 +02:00
parent 22f5b44330
commit 6c5cdd73c9

325
src/pcc.c
View file

@ -52,7 +52,7 @@ struct pcc_block {
struct { struct {
struct pcc_block *next; struct pcc_block *next;
uint8_t in_core; uint8_t in_core;
size_t allocated; // For partially-allocated blocks. size_t allocated; // For partly-empty blocks.
}; };
uint8_t padding[HEADER_BYTES_PER_BLOCK]; uint8_t padding[HEADER_BYTES_PER_BLOCK];
}; };
@ -101,10 +101,10 @@ struct pcc_extent {
}; };
struct pcc_space { struct pcc_space {
struct pcc_block *available; struct pcc_block *empty;
struct pcc_block *partially_allocated; struct pcc_block *partly_full;
struct pcc_block *allocated ALIGNED_TO_AVOID_FALSE_SHARING; struct pcc_block *full ALIGNED_TO_AVOID_FALSE_SHARING;
size_t allocated_block_count; size_t full_block_count;
struct pcc_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING; struct pcc_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING;
size_t fragmentation ALIGNED_TO_AVOID_FALSE_SHARING; size_t fragmentation ALIGNED_TO_AVOID_FALSE_SHARING;
ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING; ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING;
@ -150,10 +150,14 @@ struct gc_heap {
#define MUTATOR_EVENT(mut, event, ...) \ #define MUTATOR_EVENT(mut, event, ...) \
(mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__) (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__)
struct gc_mutator { struct gc_allocator {
uintptr_t hp; uintptr_t hp;
uintptr_t limit; uintptr_t limit;
struct pcc_block *block; struct pcc_block *block;
};
struct gc_mutator {
struct gc_allocator allocator;
struct gc_heap *heap; struct gc_heap *heap;
struct gc_mutator_roots *roots; struct gc_mutator_roots *roots;
void *event_listener_data; void *event_listener_data;
@ -162,9 +166,7 @@ struct gc_mutator {
}; };
struct gc_trace_worker_data { struct gc_trace_worker_data {
uintptr_t hp; struct gc_allocator allocator;
uintptr_t limit;
struct pcc_block *block;
}; };
static inline struct pcc_space* heap_pcc_space(struct gc_heap *heap) { static inline struct pcc_space* heap_pcc_space(struct gc_heap *heap) {
@ -180,15 +182,6 @@ static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) {
return mutator->heap; return mutator->heap;
} }
static inline void pcc_space_compute_region(struct pcc_space *space,
struct pcc_block *block,
uintptr_t *hp, uintptr_t *limit) {
struct pcc_block_payload *payload = block_payload(block);
struct pcc_region *region = &payload->regions[space->active_region];
*hp = (uintptr_t)&region[0];
*limit = (uintptr_t)&region[1];
}
static void push_block(struct pcc_block **list, static void push_block(struct pcc_block **list,
struct pcc_block *block) { struct pcc_block *block) {
struct pcc_block *next = atomic_load_explicit(list, memory_order_acquire); struct pcc_block *next = atomic_load_explicit(list, memory_order_acquire);
@ -208,39 +201,33 @@ static struct pcc_block* pop_block(struct pcc_block **list) {
return head; return head;
} }
static struct pcc_block* pop_available_block(struct pcc_space *space) { static struct pcc_block* pop_empty_block(struct pcc_space *space) {
return pop_block(&space->available); return pop_block(&space->empty);
} }
static void push_available_block(struct pcc_space *space, static void push_empty_block(struct pcc_space *space,
struct pcc_block *block) { struct pcc_block *block) {
push_block(&space->available, block); push_block(&space->empty, block);
} }
static struct pcc_block* pop_allocated_block(struct pcc_space *space) { static struct pcc_block* pop_full_block(struct pcc_space *space) {
return pop_block(&space->allocated); return pop_block(&space->full);
} }
static void push_allocated_block(struct pcc_space *space, static void push_full_block(struct pcc_space *space,
struct pcc_block *block) { struct pcc_block *block) {
push_block(&space->allocated, block); push_block(&space->full, block);
atomic_fetch_add_explicit(&space->allocated_block_count, 1, atomic_fetch_add_explicit(&space->full_block_count, 1,
memory_order_relaxed); memory_order_relaxed);
} }
static struct pcc_block* pop_partially_allocated_block(struct pcc_space *space) { static struct pcc_block* pop_partly_full_block(struct pcc_space *space) {
return pop_block(&space->partially_allocated); return pop_block(&space->partly_full);
} }
static void push_partially_allocated_block(struct pcc_space *space, static void push_partly_full_block(struct pcc_space *space,
struct pcc_block *block, struct pcc_block *block,
uintptr_t hp) { size_t allocated_bytes) {
size_t allocated = hp & (REGION_SIZE - 1); GC_ASSERT(allocated_bytes);
if (allocated) { block->allocated = allocated_bytes;
block->allocated = allocated; push_block(&space->partly_full, block);
push_block(&space->partially_allocated, block);
} else {
// Could be hp was bumped all the way to the limit, in which case
// allocated wraps to 0; in any case the block is full.
push_allocated_block(space, block);
}
} }
static struct pcc_block* pop_paged_out_block(struct pcc_space *space) { static struct pcc_block* pop_paged_out_block(struct pcc_space *space) {
@ -279,7 +266,7 @@ static int
pcc_space_page_out_blocks_until_memory_released(struct pcc_space *space) { pcc_space_page_out_blocks_until_memory_released(struct pcc_space *space) {
ssize_t pending = atomic_load(&space->bytes_to_page_out); ssize_t pending = atomic_load(&space->bytes_to_page_out);
while (pending > 0) { while (pending > 0) {
struct pcc_block *block = pop_available_block(space); struct pcc_block *block = pop_empty_block(space);
if (!block) return 0; if (!block) return 0;
page_out_block(space, block); page_out_block(space, block);
pending = pending =
@ -295,12 +282,102 @@ static void pcc_space_reacquire_memory(struct pcc_space *space,
while (pending + BLOCK_SIZE <= 0) { while (pending + BLOCK_SIZE <= 0) {
struct pcc_block *block = page_in_block(space); struct pcc_block *block = page_in_block(space);
GC_ASSERT(block); GC_ASSERT(block);
push_available_block(space, block); push_empty_block(space, block);
pending = pending =
atomic_fetch_add(&space->bytes_to_page_out, BLOCK_SIZE) + BLOCK_SIZE; atomic_fetch_add(&space->bytes_to_page_out, BLOCK_SIZE) + BLOCK_SIZE;
} }
} }
static inline void allocator_set_block(struct gc_allocator *alloc,
struct pcc_block *block,
int active_region) {
struct pcc_block_payload *payload = block_payload(block);
struct pcc_region *region = &payload->regions[active_region];
alloc->block = block;
alloc->hp = (uintptr_t)&region[0];
alloc->limit = (uintptr_t)&region[1];
}
static inline int allocator_acquire_block(struct gc_allocator *alloc,
struct pcc_block *block,
int active_region) {
if (block) {
allocator_set_block(alloc, block, active_region);
return 1;
}
return 0;
}
static int
allocator_acquire_empty_block(struct gc_allocator *alloc,
struct pcc_space *space) {
return allocator_acquire_block(alloc, pop_empty_block(space),
space->active_region);
}
static int
allocator_acquire_partly_full_block(struct gc_allocator *alloc,
struct pcc_space *space) {
if (allocator_acquire_block(alloc, pop_partly_full_block(space),
space->active_region)) {
alloc->hp += alloc->block->allocated;
return 1;
}
return 0;
}
static void allocator_release_full_block(struct gc_allocator *alloc,
struct pcc_space *space) {
record_fragmentation(space, alloc->limit - alloc->hp);
push_full_block(space, alloc->block);
alloc->hp = alloc->limit = 0;
alloc->block = NULL;
}
static void allocator_release_partly_full_block(struct gc_allocator *alloc,
struct pcc_space *space) {
size_t allocated = alloc->hp & (REGION_SIZE - 1);
if (allocated) {
push_partly_full_block(space, alloc->block, allocated);
} else {
// Could be hp was bumped all the way to the limit, in which case
// allocated wraps to 0; in any case the block is full.
push_full_block(space, alloc->block);
}
alloc->hp = alloc->limit = 0;
alloc->block = NULL;
}
static inline struct gc_ref allocate(struct gc_allocator *alloc,
struct pcc_space *space,
size_t size,
void (*get_more_empty_blocks)(void *data),
void *data) {
GC_ASSERT(size > 0);
GC_ASSERT(size <= gc_allocator_large_threshold());
size = align_up(size, GC_ALIGNMENT);
if (alloc->hp + size <= alloc->limit)
goto done;
if (alloc->block)
allocator_release_full_block(alloc, space);
while (allocator_acquire_partly_full_block(alloc, space)) {
if (alloc->hp + size <= alloc->limit)
goto done;
allocator_release_full_block(alloc, space);
}
while (!allocator_acquire_empty_block(alloc, space))
get_more_empty_blocks(data);
// The newly acquired block is empty and is therefore large enough for
// a small allocation.
done:
struct gc_ref ret = gc_ref(alloc->hp);
alloc->hp += size;
return ret;
}
static void static void
gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
struct gc_heap *heap, struct gc_heap *heap,
@ -309,23 +386,10 @@ gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer,
struct gc_tracer *tracer, struct gc_tracer *tracer,
struct gc_heap *heap, struct gc_heap *heap,
struct gc_trace_worker *worker) { struct gc_trace_worker *worker) {
struct gc_trace_worker_data data = {0,0,NULL}; struct gc_trace_worker_data data = {{0,0,NULL},};
f(tracer, heap, worker, &data); f(tracer, heap, worker, &data);
if (data.block) if (data.allocator.block)
push_partially_allocated_block(heap_pcc_space(heap), data.block, allocator_release_partly_full_block(&data.allocator, heap_pcc_space(heap));
data.hp);
// FIXME: Add (data.limit - data.hp) to fragmentation.
}
static void clear_mutator_allocation_buffers(struct gc_heap *heap) {
for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next) {
if (mut->block) {
record_fragmentation(heap_pcc_space(heap), mut->limit - mut->hp);
push_allocated_block(heap_pcc_space(heap), mut->block);
mut->block = NULL;
}
mut->hp = mut->limit = 0;
}
} }
static struct pcc_block* static struct pcc_block*
@ -342,70 +406,31 @@ append_block_lists(struct pcc_block *head, struct pcc_block *tail) {
static void pcc_space_flip(struct pcc_space *space) { static void pcc_space_flip(struct pcc_space *space) {
// Mutators stopped, can access nonatomically. // Mutators stopped, can access nonatomically.
struct pcc_block *available = space->available; space->empty =
struct pcc_block *partially_allocated = space->partially_allocated; append_block_lists(space->empty,
struct pcc_block *allocated = space->allocated; append_block_lists(space->partly_full, space->full));
allocated = append_block_lists(partially_allocated, allocated); space->partly_full = NULL;
space->available = append_block_lists(available, allocated); space->full = NULL;
space->partially_allocated = NULL; space->full_block_count = 0;
space->allocated = NULL;
space->allocated_block_count = 0;
space->fragmentation = 0; space->fragmentation = 0;
space->active_region ^= 1; space->active_region ^= 1;
} }
static void pcc_space_finish_gc(struct pcc_space *space) { static void pcc_space_finish_gc(struct pcc_space *space) {
// Mutators stopped, can access nonatomically. // Mutators stopped, can access nonatomically.
space->live_bytes_at_last_gc = space->allocated_block_count * REGION_SIZE; space->live_bytes_at_last_gc = space->full_block_count * REGION_SIZE;
space->fragmentation_at_last_gc = space->fragmentation; space->fragmentation_at_last_gc = space->fragmentation;
} }
static void collect(struct gc_mutator *mut) GC_NEVER_INLINE; static void get_more_empty_blocks_during_evacuation(void *data) {
// If space is really tight and reordering of objects during
static struct gc_ref evacuation_allocate(struct pcc_space *space, // evacuation resulted in more end-of-block fragmentation and thus
struct gc_trace_worker_data *data, // block use than before collection started, we can actually run out
size_t size) { // of memory while collecting. We should probably attempt to expand
GC_ASSERT(size > 0); // the heap here, at least by a single block; it's better than the
GC_ASSERT(size <= gc_allocator_large_threshold()); // alternatives.
size = align_up(size, GC_ALIGNMENT);
uintptr_t hp = data->hp;
uintptr_t limit = data->limit;
uintptr_t new_hp = hp + size;
if (new_hp <= limit)
goto done;
if (data->block) {
record_fragmentation(space, limit - hp);
push_allocated_block(space, data->block);
}
while ((data->block = pop_partially_allocated_block(space))) {
pcc_space_compute_region(space, data->block, &hp, &limit);
hp += data->block->allocated;
new_hp = hp + size;
if (new_hp <= limit) {
data->limit = limit;
goto done;
}
record_fragmentation(space, limit - hp);
push_allocated_block(space, data->block);
}
data->block = pop_available_block(space);
if (!data->block) {
// Can happen if space is really tight and reordering of objects
// during evacuation resulted in more end-of-block fragmentation and
// thus block use than before collection started. A dire situation.
fprintf(stderr, "Out of memory\n"); fprintf(stderr, "Out of memory\n");
GC_CRASH(); GC_CRASH();
}
pcc_space_compute_region(space, data->block, &hp, &data->limit);
new_hp = hp + size;
// The region is empty and is therefore large enough for a small
// allocation.
done:
data->hp = new_hp;
return gc_ref(hp);
} }
static inline int pcc_space_forward(struct pcc_space *space, static inline int pcc_space_forward(struct pcc_space *space,
@ -427,7 +452,9 @@ static inline int pcc_space_forward(struct pcc_space *space,
case GC_FORWARDING_STATE_ACQUIRED: { case GC_FORWARDING_STATE_ACQUIRED: {
// We claimed the object successfully; evacuating is up to us. // We claimed the object successfully; evacuating is up to us.
size_t bytes = gc_atomic_forward_object_size(&fwd); size_t bytes = gc_atomic_forward_object_size(&fwd);
struct gc_ref new_ref = evacuation_allocate(space, data, bytes); struct gc_ref new_ref = allocate(&data->allocator, space, bytes,
get_more_empty_blocks_during_evacuation,
NULL);
// Copy object contents before committing, as we don't know what // Copy object contents before committing, as we don't know what
// part of the object (if any) will be overwritten by the // part of the object (if any) will be overwritten by the
// commit. // commit.
@ -545,7 +572,6 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
mut->event_listener_data = mut->event_listener_data =
heap->event_listener.mutator_added(heap->event_listener_data); heap->event_listener.mutator_added(heap->event_listener_data);
heap_lock(heap); heap_lock(heap);
mut->block = NULL;
// We have no roots. If there is a GC currently in progress, we have // We have no roots. If there is a GC currently in progress, we have
// nothing to add. Just wait until it's done. // nothing to add. Just wait until it's done.
while (mutators_are_stopping(heap)) while (mutators_are_stopping(heap))
@ -564,11 +590,8 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
MUTATOR_EVENT(mut, mutator_removed); MUTATOR_EVENT(mut, mutator_removed);
mut->heap = NULL; mut->heap = NULL;
if (mut->block) { if (mut->allocator.block)
push_partially_allocated_block(heap_pcc_space(heap), mut->block, allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap));
mut->hp);
mut->block = NULL;
}
heap_lock(heap); heap_lock(heap);
heap->mutator_count--; heap->mutator_count--;
if (mut->next) if (mut->next)
@ -703,6 +726,8 @@ static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut); struct gc_heap *heap = mutator_heap(mut);
GC_ASSERT(mutators_are_stopping(heap)); GC_ASSERT(mutators_are_stopping(heap));
MUTATOR_EVENT(mut, mutator_stopping); MUTATOR_EVENT(mut, mutator_stopping);
if (mut->allocator.block)
allocator_release_full_block(&mut->allocator, heap_pcc_space(heap));
heap_lock(heap); heap_lock(heap);
pause_mutator_for_collection(heap, mut); pause_mutator_for_collection(heap, mut);
heap_unlock(heap); heap_unlock(heap);
@ -766,6 +791,7 @@ static void sweep_ephemerons(struct gc_heap *heap) {
return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1);
} }
static void collect(struct gc_mutator *mut) GC_NEVER_INLINE;
static void collect(struct gc_mutator *mut) { static void collect(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut); struct gc_heap *heap = mutator_heap(mut);
struct pcc_space *cspace = heap_pcc_space(heap); struct pcc_space *cspace = heap_pcc_space(heap);
@ -782,7 +808,6 @@ static void collect(struct gc_mutator *mut) {
HEAP_EVENT(heap, waiting_for_stop); HEAP_EVENT(heap, waiting_for_stop);
wait_for_mutators_to_stop(heap); wait_for_mutators_to_stop(heap);
HEAP_EVENT(heap, mutators_stopped); HEAP_EVENT(heap, mutators_stopped);
clear_mutator_allocation_buffers(heap);
pcc_space_flip(cspace); pcc_space_flip(cspace);
gc_tracer_prepare(&heap->tracer); gc_tracer_prepare(&heap->tracer);
add_roots(heap); add_roots(heap);
@ -816,6 +841,8 @@ static void collect(struct gc_mutator *mut) {
static void trigger_collection(struct gc_mutator *mut) { static void trigger_collection(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut); struct gc_heap *heap = mutator_heap(mut);
if (mut->allocator.block)
allocator_release_full_block(&mut->allocator, heap_pcc_space(heap));
heap_lock(heap); heap_lock(heap);
long epoch = heap->count; long epoch = heap->count;
while (mutators_are_stopping(heap)) while (mutators_are_stopping(heap))
@ -853,47 +880,20 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) {
return ret; return ret;
} }
static void get_more_empty_blocks_for_mutator(void *mut) {
trigger_collection(mut);
}
void* gc_allocate_slow(struct gc_mutator *mut, size_t size) { void* gc_allocate_slow(struct gc_mutator *mut, size_t size) {
GC_ASSERT(size > 0); // allocating 0 bytes would be silly GC_ASSERT(size > 0); // allocating 0 bytes would be silly
if (size > gc_allocator_large_threshold()) if (size > gc_allocator_large_threshold())
return allocate_large(mut, size); return allocate_large(mut, size);
size = align_up(size, GC_ALIGNMENT); return gc_ref_heap_object(allocate(&mut->allocator,
uintptr_t hp = mut->hp; heap_pcc_space(mutator_heap(mut)),
uintptr_t limit = mut->limit; size, get_more_empty_blocks_for_mutator,
uintptr_t new_hp = hp + size; mut));
if (new_hp <= limit)
goto done;
struct pcc_space *space = heap_pcc_space(mutator_heap(mut));
if (mut->block) {
record_fragmentation(space, limit - hp);
push_allocated_block(space, mut->block);
}
while ((mut->block = pop_partially_allocated_block(space))) {
pcc_space_compute_region(space, mut->block, &hp, &limit);
hp += mut->block->allocated;
new_hp = hp + size;
if (new_hp <= limit) {
mut->limit = limit;
goto done;
}
record_fragmentation(space, limit - hp);
push_allocated_block(space, mut->block);
}
while (!(mut->block = pop_available_block(space))) {
trigger_collection(mut);
}
pcc_space_compute_region(space, mut->block, &hp, &mut->limit);
new_hp = hp + size;
// The region is empty and is therefore large enough for a small
// allocation.
done:
mut->hp = new_hp;
gc_clear_fresh_allocation(gc_ref(hp), size);
return gc_ref_heap_object(gc_ref(hp));
} }
void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) { void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) {
@ -1031,10 +1031,10 @@ static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) {
if (!slabs) if (!slabs)
return 0; return 0;
space->available = NULL; space->empty = NULL;
space->partially_allocated = NULL; space->partly_full = NULL;
space->allocated = NULL; space->full = NULL;
space->allocated_block_count = 0; space->full_block_count = 0;
space->paged_out = NULL; space->paged_out = NULL;
space->fragmentation = 0; space->fragmentation = 0;
space->bytes_to_page_out = 0; space->bytes_to_page_out = 0;
@ -1056,7 +1056,7 @@ static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) {
size -= BLOCK_SIZE; size -= BLOCK_SIZE;
} else { } else {
block->in_core = 1; block->in_core = 1;
push_available_block(space, block); push_empty_block(space, block);
} }
} }
} }
@ -1069,10 +1069,11 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
void *event_listener_data) { void *event_listener_data) {
GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT); GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT);
GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD); GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD);
GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator));
GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(),
offsetof(struct gc_mutator, hp)); offsetof(struct gc_allocator, hp));
GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
offsetof(struct gc_mutator, limit)); offsetof(struct gc_allocator, limit));
if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
fprintf(stderr, "fixed heap size is currently required\n"); fprintf(stderr, "fixed heap size is currently required\n");
@ -1121,6 +1122,8 @@ void gc_finish_for_thread(struct gc_mutator *mut) {
static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
GC_ASSERT(mut->next == NULL); GC_ASSERT(mut->next == NULL);
if (mut->allocator.block)
allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap));
heap_lock(heap); heap_lock(heap);
heap->inactive_mutator_count++; heap->inactive_mutator_count++;
if (all_mutators_stopped(heap)) if (all_mutators_stopped(heap))