diff --git a/src/pcc.c b/src/pcc.c index abee8ce49..1638abf85 100644 --- a/src/pcc.c +++ b/src/pcc.c @@ -52,7 +52,7 @@ struct pcc_block { struct { struct pcc_block *next; uint8_t in_core; - size_t allocated; // For partially-allocated blocks. + size_t allocated; // For partly-empty blocks. }; uint8_t padding[HEADER_BYTES_PER_BLOCK]; }; @@ -101,10 +101,10 @@ struct pcc_extent { }; struct pcc_space { - struct pcc_block *available; - struct pcc_block *partially_allocated; - struct pcc_block *allocated ALIGNED_TO_AVOID_FALSE_SHARING; - size_t allocated_block_count; + struct pcc_block *empty; + struct pcc_block *partly_full; + struct pcc_block *full ALIGNED_TO_AVOID_FALSE_SHARING; + size_t full_block_count; struct pcc_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING; size_t fragmentation ALIGNED_TO_AVOID_FALSE_SHARING; ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING; @@ -150,10 +150,14 @@ struct gc_heap { #define MUTATOR_EVENT(mut, event, ...) \ (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__) -struct gc_mutator { +struct gc_allocator { uintptr_t hp; uintptr_t limit; struct pcc_block *block; +}; + +struct gc_mutator { + struct gc_allocator allocator; struct gc_heap *heap; struct gc_mutator_roots *roots; void *event_listener_data; @@ -162,9 +166,7 @@ struct gc_mutator { }; struct gc_trace_worker_data { - uintptr_t hp; - uintptr_t limit; - struct pcc_block *block; + struct gc_allocator allocator; }; static inline struct pcc_space* heap_pcc_space(struct gc_heap *heap) { @@ -180,15 +182,6 @@ static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) { return mutator->heap; } -static inline void pcc_space_compute_region(struct pcc_space *space, - struct pcc_block *block, - uintptr_t *hp, uintptr_t *limit) { - struct pcc_block_payload *payload = block_payload(block); - struct pcc_region *region = &payload->regions[space->active_region]; - *hp = (uintptr_t)®ion[0]; - *limit = (uintptr_t)®ion[1]; -} - static void push_block(struct pcc_block **list, struct pcc_block *block) { struct pcc_block *next = atomic_load_explicit(list, memory_order_acquire); @@ -208,39 +201,33 @@ static struct pcc_block* pop_block(struct pcc_block **list) { return head; } -static struct pcc_block* pop_available_block(struct pcc_space *space) { - return pop_block(&space->available); +static struct pcc_block* pop_empty_block(struct pcc_space *space) { + return pop_block(&space->empty); } -static void push_available_block(struct pcc_space *space, - struct pcc_block *block) { - push_block(&space->available, block); +static void push_empty_block(struct pcc_space *space, + struct pcc_block *block) { + push_block(&space->empty, block); } -static struct pcc_block* pop_allocated_block(struct pcc_space *space) { - return pop_block(&space->allocated); +static struct pcc_block* pop_full_block(struct pcc_space *space) { + return pop_block(&space->full); } -static void push_allocated_block(struct pcc_space *space, - struct pcc_block *block) { - push_block(&space->allocated, block); - atomic_fetch_add_explicit(&space->allocated_block_count, 1, +static void push_full_block(struct pcc_space *space, + struct pcc_block *block) { + push_block(&space->full, block); + atomic_fetch_add_explicit(&space->full_block_count, 1, memory_order_relaxed); } -static struct pcc_block* pop_partially_allocated_block(struct pcc_space *space) { - return pop_block(&space->partially_allocated); +static struct pcc_block* pop_partly_full_block(struct pcc_space *space) { + return pop_block(&space->partly_full); } -static void push_partially_allocated_block(struct pcc_space *space, - struct pcc_block *block, - uintptr_t hp) { - size_t allocated = hp & (REGION_SIZE - 1); - if (allocated) { - block->allocated = allocated; - push_block(&space->partially_allocated, block); - } else { - // Could be hp was bumped all the way to the limit, in which case - // allocated wraps to 0; in any case the block is full. - push_allocated_block(space, block); - } +static void push_partly_full_block(struct pcc_space *space, + struct pcc_block *block, + size_t allocated_bytes) { + GC_ASSERT(allocated_bytes); + block->allocated = allocated_bytes; + push_block(&space->partly_full, block); } static struct pcc_block* pop_paged_out_block(struct pcc_space *space) { @@ -271,7 +258,7 @@ static void record_fragmentation(struct pcc_space *space, } static ssize_t pcc_space_request_release_memory(struct pcc_space *space, - size_t bytes) { + size_t bytes) { return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes; } @@ -279,7 +266,7 @@ static int pcc_space_page_out_blocks_until_memory_released(struct pcc_space *space) { ssize_t pending = atomic_load(&space->bytes_to_page_out); while (pending > 0) { - struct pcc_block *block = pop_available_block(space); + struct pcc_block *block = pop_empty_block(space); if (!block) return 0; page_out_block(space, block); pending = @@ -295,12 +282,102 @@ static void pcc_space_reacquire_memory(struct pcc_space *space, while (pending + BLOCK_SIZE <= 0) { struct pcc_block *block = page_in_block(space); GC_ASSERT(block); - push_available_block(space, block); + push_empty_block(space, block); pending = atomic_fetch_add(&space->bytes_to_page_out, BLOCK_SIZE) + BLOCK_SIZE; } } +static inline void allocator_set_block(struct gc_allocator *alloc, + struct pcc_block *block, + int active_region) { + struct pcc_block_payload *payload = block_payload(block); + struct pcc_region *region = &payload->regions[active_region]; + alloc->block = block; + alloc->hp = (uintptr_t)®ion[0]; + alloc->limit = (uintptr_t)®ion[1]; +} + +static inline int allocator_acquire_block(struct gc_allocator *alloc, + struct pcc_block *block, + int active_region) { + if (block) { + allocator_set_block(alloc, block, active_region); + return 1; + } + return 0; +} + +static int +allocator_acquire_empty_block(struct gc_allocator *alloc, + struct pcc_space *space) { + return allocator_acquire_block(alloc, pop_empty_block(space), + space->active_region); +} + +static int +allocator_acquire_partly_full_block(struct gc_allocator *alloc, + struct pcc_space *space) { + if (allocator_acquire_block(alloc, pop_partly_full_block(space), + space->active_region)) { + alloc->hp += alloc->block->allocated; + return 1; + } + return 0; +} + +static void allocator_release_full_block(struct gc_allocator *alloc, + struct pcc_space *space) { + record_fragmentation(space, alloc->limit - alloc->hp); + push_full_block(space, alloc->block); + alloc->hp = alloc->limit = 0; + alloc->block = NULL; +} + +static void allocator_release_partly_full_block(struct gc_allocator *alloc, + struct pcc_space *space) { + size_t allocated = alloc->hp & (REGION_SIZE - 1); + if (allocated) { + push_partly_full_block(space, alloc->block, allocated); + } else { + // Could be hp was bumped all the way to the limit, in which case + // allocated wraps to 0; in any case the block is full. + push_full_block(space, alloc->block); + } + alloc->hp = alloc->limit = 0; + alloc->block = NULL; +} + +static inline struct gc_ref allocate(struct gc_allocator *alloc, + struct pcc_space *space, + size_t size, + void (*get_more_empty_blocks)(void *data), + void *data) { + GC_ASSERT(size > 0); + GC_ASSERT(size <= gc_allocator_large_threshold()); + size = align_up(size, GC_ALIGNMENT); + + if (alloc->hp + size <= alloc->limit) + goto done; + + if (alloc->block) + allocator_release_full_block(alloc, space); + while (allocator_acquire_partly_full_block(alloc, space)) { + if (alloc->hp + size <= alloc->limit) + goto done; + allocator_release_full_block(alloc, space); + } + while (!allocator_acquire_empty_block(alloc, space)) + get_more_empty_blocks(data); + // The newly acquired block is empty and is therefore large enough for + // a small allocation. + +done: + struct gc_ref ret = gc_ref(alloc->hp); + alloc->hp += size; + return ret; +} + static void gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, struct gc_heap *heap, @@ -309,23 +386,10 @@ gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, struct gc_tracer *tracer, struct gc_heap *heap, struct gc_trace_worker *worker) { - struct gc_trace_worker_data data = {0,0,NULL}; + struct gc_trace_worker_data data = {{0,0,NULL},}; f(tracer, heap, worker, &data); - if (data.block) - push_partially_allocated_block(heap_pcc_space(heap), data.block, - data.hp); - // FIXME: Add (data.limit - data.hp) to fragmentation. -} - -static void clear_mutator_allocation_buffers(struct gc_heap *heap) { - for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next) { - if (mut->block) { - record_fragmentation(heap_pcc_space(heap), mut->limit - mut->hp); - push_allocated_block(heap_pcc_space(heap), mut->block); - mut->block = NULL; - } - mut->hp = mut->limit = 0; - } + if (data.allocator.block) + allocator_release_partly_full_block(&data.allocator, heap_pcc_space(heap)); } static struct pcc_block* @@ -342,70 +406,31 @@ append_block_lists(struct pcc_block *head, struct pcc_block *tail) { static void pcc_space_flip(struct pcc_space *space) { // Mutators stopped, can access nonatomically. - struct pcc_block *available = space->available; - struct pcc_block *partially_allocated = space->partially_allocated; - struct pcc_block *allocated = space->allocated; - allocated = append_block_lists(partially_allocated, allocated); - space->available = append_block_lists(available, allocated); - space->partially_allocated = NULL; - space->allocated = NULL; - space->allocated_block_count = 0; + space->empty = + append_block_lists(space->empty, + append_block_lists(space->partly_full, space->full)); + space->partly_full = NULL; + space->full = NULL; + space->full_block_count = 0; space->fragmentation = 0; space->active_region ^= 1; } static void pcc_space_finish_gc(struct pcc_space *space) { // Mutators stopped, can access nonatomically. - space->live_bytes_at_last_gc = space->allocated_block_count * REGION_SIZE; + space->live_bytes_at_last_gc = space->full_block_count * REGION_SIZE; space->fragmentation_at_last_gc = space->fragmentation; } -static void collect(struct gc_mutator *mut) GC_NEVER_INLINE; - -static struct gc_ref evacuation_allocate(struct pcc_space *space, - struct gc_trace_worker_data *data, - size_t size) { - GC_ASSERT(size > 0); - GC_ASSERT(size <= gc_allocator_large_threshold()); - size = align_up(size, GC_ALIGNMENT); - - uintptr_t hp = data->hp; - uintptr_t limit = data->limit; - uintptr_t new_hp = hp + size; - if (new_hp <= limit) - goto done; - - if (data->block) { - record_fragmentation(space, limit - hp); - push_allocated_block(space, data->block); - } - while ((data->block = pop_partially_allocated_block(space))) { - pcc_space_compute_region(space, data->block, &hp, &limit); - hp += data->block->allocated; - new_hp = hp + size; - if (new_hp <= limit) { - data->limit = limit; - goto done; - } - record_fragmentation(space, limit - hp); - push_allocated_block(space, data->block); - } - data->block = pop_available_block(space); - if (!data->block) { - // Can happen if space is really tight and reordering of objects - // during evacuation resulted in more end-of-block fragmentation and - // thus block use than before collection started. A dire situation. - fprintf(stderr, "Out of memory\n"); - GC_CRASH(); - } - pcc_space_compute_region(space, data->block, &hp, &data->limit); - new_hp = hp + size; - // The region is empty and is therefore large enough for a small - // allocation. - -done: - data->hp = new_hp; - return gc_ref(hp); +static void get_more_empty_blocks_during_evacuation(void *data) { + // If space is really tight and reordering of objects during + // evacuation resulted in more end-of-block fragmentation and thus + // block use than before collection started, we can actually run out + // of memory while collecting. We should probably attempt to expand + // the heap here, at least by a single block; it's better than the + // alternatives. + fprintf(stderr, "Out of memory\n"); + GC_CRASH(); } static inline int pcc_space_forward(struct pcc_space *space, @@ -427,7 +452,9 @@ static inline int pcc_space_forward(struct pcc_space *space, case GC_FORWARDING_STATE_ACQUIRED: { // We claimed the object successfully; evacuating is up to us. size_t bytes = gc_atomic_forward_object_size(&fwd); - struct gc_ref new_ref = evacuation_allocate(space, data, bytes); + struct gc_ref new_ref = allocate(&data->allocator, space, bytes, + get_more_empty_blocks_during_evacuation, + NULL); // Copy object contents before committing, as we don't know what // part of the object (if any) will be overwritten by the // commit. @@ -545,7 +572,6 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { mut->event_listener_data = heap->event_listener.mutator_added(heap->event_listener_data); heap_lock(heap); - mut->block = NULL; // We have no roots. If there is a GC currently in progress, we have // nothing to add. Just wait until it's done. while (mutators_are_stopping(heap)) @@ -564,11 +590,8 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { MUTATOR_EVENT(mut, mutator_removed); mut->heap = NULL; - if (mut->block) { - push_partially_allocated_block(heap_pcc_space(heap), mut->block, - mut->hp); - mut->block = NULL; - } + if (mut->allocator.block) + allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap)); heap_lock(heap); heap->mutator_count--; if (mut->next) @@ -703,6 +726,8 @@ static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); GC_ASSERT(mutators_are_stopping(heap)); MUTATOR_EVENT(mut, mutator_stopping); + if (mut->allocator.block) + allocator_release_full_block(&mut->allocator, heap_pcc_space(heap)); heap_lock(heap); pause_mutator_for_collection(heap, mut); heap_unlock(heap); @@ -766,6 +791,7 @@ static void sweep_ephemerons(struct gc_heap *heap) { return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); } +static void collect(struct gc_mutator *mut) GC_NEVER_INLINE; static void collect(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); struct pcc_space *cspace = heap_pcc_space(heap); @@ -782,7 +808,6 @@ static void collect(struct gc_mutator *mut) { HEAP_EVENT(heap, waiting_for_stop); wait_for_mutators_to_stop(heap); HEAP_EVENT(heap, mutators_stopped); - clear_mutator_allocation_buffers(heap); pcc_space_flip(cspace); gc_tracer_prepare(&heap->tracer); add_roots(heap); @@ -816,6 +841,8 @@ static void collect(struct gc_mutator *mut) { static void trigger_collection(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); + if (mut->allocator.block) + allocator_release_full_block(&mut->allocator, heap_pcc_space(heap)); heap_lock(heap); long epoch = heap->count; while (mutators_are_stopping(heap)) @@ -853,47 +880,20 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) { return ret; } +static void get_more_empty_blocks_for_mutator(void *mut) { + trigger_collection(mut); +} + void* gc_allocate_slow(struct gc_mutator *mut, size_t size) { GC_ASSERT(size > 0); // allocating 0 bytes would be silly if (size > gc_allocator_large_threshold()) return allocate_large(mut, size); - size = align_up(size, GC_ALIGNMENT); - uintptr_t hp = mut->hp; - uintptr_t limit = mut->limit; - uintptr_t new_hp = hp + size; - if (new_hp <= limit) - goto done; - - struct pcc_space *space = heap_pcc_space(mutator_heap(mut)); - if (mut->block) { - record_fragmentation(space, limit - hp); - push_allocated_block(space, mut->block); - } - while ((mut->block = pop_partially_allocated_block(space))) { - pcc_space_compute_region(space, mut->block, &hp, &limit); - hp += mut->block->allocated; - new_hp = hp + size; - if (new_hp <= limit) { - mut->limit = limit; - goto done; - } - record_fragmentation(space, limit - hp); - push_allocated_block(space, mut->block); - } - while (!(mut->block = pop_available_block(space))) { - trigger_collection(mut); - } - pcc_space_compute_region(space, mut->block, &hp, &mut->limit); - new_hp = hp + size; - // The region is empty and is therefore large enough for a small - // allocation. - -done: - mut->hp = new_hp; - gc_clear_fresh_allocation(gc_ref(hp), size); - return gc_ref_heap_object(gc_ref(hp)); + return gc_ref_heap_object(allocate(&mut->allocator, + heap_pcc_space(mutator_heap(mut)), + size, get_more_empty_blocks_for_mutator, + mut)); } void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) { @@ -1031,10 +1031,10 @@ static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) { if (!slabs) return 0; - space->available = NULL; - space->partially_allocated = NULL; - space->allocated = NULL; - space->allocated_block_count = 0; + space->empty = NULL; + space->partly_full = NULL; + space->full = NULL; + space->full_block_count = 0; space->paged_out = NULL; space->fragmentation = 0; space->bytes_to_page_out = 0; @@ -1056,7 +1056,7 @@ static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) { size -= BLOCK_SIZE; } else { block->in_core = 1; - push_available_block(space, block); + push_empty_block(space, block); } } } @@ -1069,10 +1069,11 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, void *event_listener_data) { GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT); GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD); + GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator)); GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), - offsetof(struct gc_mutator, hp)); + offsetof(struct gc_allocator, hp)); GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), - offsetof(struct gc_mutator, limit)); + offsetof(struct gc_allocator, limit)); if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { fprintf(stderr, "fixed heap size is currently required\n"); @@ -1121,6 +1122,8 @@ void gc_finish_for_thread(struct gc_mutator *mut) { static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { GC_ASSERT(mut->next == NULL); + if (mut->allocator.block) + allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap)); heap_lock(heap); heap->inactive_mutator_count++; if (all_mutators_stopped(heap))