diff --git a/src/adaptive-heap-sizer.h b/src/adaptive-heap-sizer.h index 126a493b8..df38f181d 100644 --- a/src/adaptive-heap-sizer.h +++ b/src/adaptive-heap-sizer.h @@ -139,7 +139,7 @@ gc_make_adaptive_heap_sizer(double expansiveness, sizer->last_bytes_allocated = get_allocation_counter(callback_data); sizer->last_heartbeat = gc_platform_monotonic_nanoseconds(); sizer->background_task_id = thread - ? gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_FIRST, + ? gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_MIDDLE, gc_adaptive_heap_sizer_background_task, sizer) : -1; diff --git a/src/background-thread.h b/src/background-thread.h index ee858ac58..7a141cee0 100644 --- a/src/background-thread.h +++ b/src/background-thread.h @@ -9,9 +9,9 @@ #include "debug.h" enum { - GC_BACKGROUND_TASK_FIRST = 0, - GC_BACKGROUND_TASK_NORMAL = 100, - GC_BACKGROUND_TASK_LAST = 200 + GC_BACKGROUND_TASK_START = 0, + GC_BACKGROUND_TASK_MIDDLE = 100, + GC_BACKGROUND_TASK_END = 200 }; struct gc_background_task { @@ -21,12 +21,18 @@ struct gc_background_task { void *data; }; +enum gc_background_thread_state { + GC_BACKGROUND_THREAD_STARTING, + GC_BACKGROUND_THREAD_RUNNING, + GC_BACKGROUND_THREAD_STOPPING +}; + struct gc_background_thread { size_t count; size_t capacity; struct gc_background_task *tasks; int next_id; - int stopping; + enum gc_background_thread_state state; pthread_t thread; pthread_mutex_t lock; pthread_cond_t cond; @@ -35,19 +41,20 @@ struct gc_background_thread { static void* gc_background_thread(void *data) { struct gc_background_thread *thread = data; + pthread_mutex_lock(&thread->lock); + while (thread->state == GC_BACKGROUND_THREAD_STARTING) + pthread_cond_wait(&thread->cond, &thread->lock); struct timespec ts; if (clock_gettime(CLOCK_REALTIME, &ts)) { perror("background thread: failed to get time!"); return NULL; } - pthread_mutex_lock(&thread->lock); - while (!thread->stopping) { + while (thread->state == GC_BACKGROUND_THREAD_RUNNING) { ts.tv_sec += 1; pthread_cond_timedwait(&thread->cond, &thread->lock, &ts); - if (thread->stopping) - break; - for (size_t i = 0; i < thread->count; i++) - thread->tasks[i].run(thread->tasks[i].data); + if (thread->state == GC_BACKGROUND_THREAD_RUNNING) + for (size_t i = 0; i < thread->count; i++) + thread->tasks[i].run(thread->tasks[i].data); } pthread_mutex_unlock(&thread->lock); return NULL; @@ -63,6 +70,7 @@ gc_make_background_thread(void) { thread->tasks = NULL; thread->count = 0; thread->capacity = 0; + thread->state = GC_BACKGROUND_THREAD_STARTING; pthread_mutex_init(&thread->lock, NULL); pthread_cond_init(&thread->cond, NULL); if (pthread_create(&thread->thread, NULL, gc_background_thread, thread)) { @@ -72,6 +80,15 @@ gc_make_background_thread(void) { return thread; } +static void +gc_background_thread_start(struct gc_background_thread *thread) { + pthread_mutex_lock(&thread->lock); + GC_ASSERT_EQ(thread->state, GC_BACKGROUND_THREAD_STARTING); + thread->state = GC_BACKGROUND_THREAD_RUNNING; + pthread_mutex_unlock(&thread->lock); + pthread_cond_signal(&thread->cond); +} + static int gc_background_thread_add_task(struct gc_background_thread *thread, int priority, void (*run)(void *data), @@ -126,8 +143,8 @@ gc_background_thread_remove_task(struct gc_background_thread *thread, static void gc_destroy_background_thread(struct gc_background_thread *thread) { pthread_mutex_lock(&thread->lock); - GC_ASSERT(!thread->stopping); - thread->stopping = 1; + GC_ASSERT(thread->state == GC_BACKGROUND_THREAD_RUNNING); + thread->state = GC_BACKGROUND_THREAD_STOPPING; pthread_mutex_unlock(&thread->lock); pthread_cond_signal(&thread->cond); pthread_join(thread->thread, NULL); diff --git a/src/copy-space.h b/src/copy-space.h index 7d8ab98a2..98d3f6146 100644 --- a/src/copy-space.h +++ b/src/copy-space.h @@ -10,6 +10,7 @@ #include "gc-internal.h" #include "assert.h" +#include "background-thread.h" #include "debug.h" #include "extents.h" #include "gc-align.h" @@ -102,13 +103,16 @@ copy_space_object_region(struct gc_ref obj) { return (gc_ref_value(obj) / COPY_SPACE_REGION_SIZE) & 1; } +#define COPY_SPACE_PAGE_OUT_QUEUE_SIZE 4 + struct copy_space { struct copy_space_block *empty; struct copy_space_block *partly_full; struct copy_space_block *full ALIGNED_TO_AVOID_FALSE_SHARING; size_t allocated_bytes; size_t fragmentation; - struct copy_space_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING; + struct copy_space_block *paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE] + ALIGNED_TO_AVOID_FALSE_SHARING; ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING; // The rest of these members are only changed rarely and with the heap // lock. @@ -186,31 +190,23 @@ copy_space_push_partly_full_block(struct copy_space *space, copy_space_push_block(&space->partly_full, block); } -static struct copy_space_block* -copy_space_pop_paged_out_block(struct copy_space *space) { - return copy_space_pop_block(&space->paged_out); -} - -static void -copy_space_push_paged_out_block(struct copy_space *space, - struct copy_space_block *block) { - copy_space_push_block(&space->paged_out, block); -} - static void copy_space_page_out_block(struct copy_space *space, struct copy_space_block *block) { - block->in_core = 0; - block->all_zeroes[0] = block->all_zeroes[1] = 1; - madvise(copy_space_block_payload(block), COPY_SPACE_BLOCK_SIZE, MADV_DONTNEED); - copy_space_push_paged_out_block(space, block); + copy_space_push_block(block->in_core + ? &space->paged_out[0] + : &space->paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE-1], + block); } static struct copy_space_block* copy_space_page_in_block(struct copy_space *space) { - struct copy_space_block* block = copy_space_pop_paged_out_block(space); - if (block) block->in_core = 1; - return block; + for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) { + struct copy_space_block *block = + copy_space_pop_block(&space->paged_out[age]); + if (block) return block; + } + return NULL; } static ssize_t @@ -280,6 +276,7 @@ copy_space_allocator_acquire_empty_block(struct copy_space_allocator *alloc, if (copy_space_allocator_acquire_block(alloc, copy_space_pop_empty_block(space), space->active_region)) { + alloc->block->in_core = 1; if (alloc->block->all_zeroes[space->active_region]) alloc->block->all_zeroes[space->active_region] = 0; else @@ -629,15 +626,45 @@ copy_space_expand(struct copy_space *space, size_t bytes) { struct copy_space_block *block = &slabs[slab].headers[idx]; block->all_zeroes[0] = block->all_zeroes[1] = 1; block->in_core = 0; - copy_space_push_paged_out_block(space, block); + copy_space_page_out_block(space, block); reserved -= COPY_SPACE_BLOCK_SIZE; } } copy_space_reacquire_memory(space, 0); } +static void +copy_space_advance_page_out_queue(void *data) { + struct copy_space *space = data; + for (int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) { + while (1) { + struct copy_space_block *block = + copy_space_pop_block(&space->paged_out[age]); + if (!block) break; + copy_space_push_block(&space->paged_out[age + 1], block); + } + } +} + +static void +copy_space_page_out_blocks(void *data) { + struct copy_space *space = data; + int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 2; + while (1) { + struct copy_space_block *block = + copy_space_pop_block(&space->paged_out[age]); + if (!block) break; + block->in_core = 0; + block->all_zeroes[0] = block->all_zeroes[1] = 1; + madvise(copy_space_block_payload(block), COPY_SPACE_BLOCK_SIZE, + MADV_DONTNEED); + copy_space_push_block(&space->paged_out[age + 1], block); + } +} + static int -copy_space_init(struct copy_space *space, size_t size, int atomic) { +copy_space_init(struct copy_space *space, size_t size, int atomic, + struct gc_background_thread *thread) { size = align_up(size, COPY_SPACE_BLOCK_SIZE); size_t reserved = align_up(size, COPY_SPACE_SLAB_SIZE); size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE; @@ -648,7 +675,8 @@ copy_space_init(struct copy_space *space, size_t size, int atomic) { space->empty = NULL; space->partly_full = NULL; space->full = NULL; - space->paged_out = NULL; + for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) + space->paged_out[age] = NULL; space->allocated_bytes = 0; space->fragmentation = 0; space->bytes_to_page_out = 0; @@ -662,16 +690,21 @@ copy_space_init(struct copy_space *space, size_t size, int atomic) { for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) { struct copy_space_block *block = &slabs[slab].headers[idx]; block->all_zeroes[0] = block->all_zeroes[1] = 1; + block->in_core = 0; if (reserved > size) { - block->in_core = 0; - copy_space_push_paged_out_block(space, block); + copy_space_page_out_block(space, block); reserved -= COPY_SPACE_BLOCK_SIZE; } else { - block->in_core = 1; copy_space_push_empty_block(space, block); } } } + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START, + copy_space_advance_page_out_queue, + space); + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END, + copy_space_page_out_blocks, + space); return 1; } diff --git a/src/mmc.c b/src/mmc.c index 061d3b80f..bd0cc958b 100644 --- a/src/mmc.c +++ b/src/mmc.c @@ -1010,6 +1010,8 @@ heap_init(struct gc_heap *heap, const struct gc_options *options) { if (!heap->finalizer_state) GC_CRASH(); + heap->background_thread = gc_make_background_thread(); + return 1; } @@ -1064,7 +1066,8 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, struct nofl_space *space = heap_nofl_space(*heap); if (!nofl_space_init(space, (*heap)->size, options->common.parallelism != 1, - (*heap)->fragmentation_low_threshold)) { + (*heap)->fragmentation_low_threshold, + (*heap)->background_thread)) { free(*heap); *heap = NULL; return 0; @@ -1073,7 +1076,6 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, if (!large_object_space_init(heap_large_object_space(*heap), *heap)) GC_CRASH(); - (*heap)->background_thread = gc_make_background_thread(); (*heap)->sizer = gc_make_heap_sizer(*heap, &options->common, allocation_counter_from_thread, set_heap_size_from_thread, @@ -1084,6 +1086,9 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, if (!*mut) GC_CRASH(); gc_stack_init(&(*mut)->stack, stack_base); add_mutator(*heap, *mut); + + gc_background_thread_start((*heap)->background_thread); + return 1; } diff --git a/src/nofl-space.h b/src/nofl-space.h index 5c46bb7ff..f52ad9e28 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -137,6 +137,8 @@ struct nofl_block_list { uintptr_t blocks; }; +#define NOFL_PAGE_OUT_QUEUE_SIZE 4 + struct nofl_space { uint64_t sweep_mask; uint8_t live_mask; @@ -146,7 +148,7 @@ struct nofl_space { size_t heap_size; uint8_t last_collection_was_minor; struct nofl_block_list empty; - struct nofl_block_list unavailable; + struct nofl_block_list paged_out[NOFL_PAGE_OUT_QUEUE_SIZE]; struct nofl_block_list to_sweep; struct nofl_block_list partly_full; struct nofl_block_list full; @@ -407,31 +409,26 @@ nofl_block_count(struct nofl_block_list *list) { return atomic_load_explicit(&list->count, memory_order_acquire); } -static void -nofl_push_paged_out_block(struct nofl_space *space, - struct nofl_block_ref block) { - GC_ASSERT(nofl_block_has_flag(block, - NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT)); - nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE); - nofl_push_block(&space->unavailable, block); -} - static void nofl_push_unavailable_block(struct nofl_space *space, struct nofl_block_ref block) { - if (!nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT)) { - nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); - madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED); - } - nofl_push_paged_out_block(space, block); + nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE); + nofl_push_block(nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT) + ? &space->paged_out[NOFL_PAGE_OUT_QUEUE_SIZE-1] + : &space->paged_out[0], + block); } static struct nofl_block_ref nofl_pop_unavailable_block(struct nofl_space *space) { - struct nofl_block_ref block = nofl_pop_block(&space->unavailable); - if (!nofl_block_is_null(block)) - nofl_block_clear_flag(block, NOFL_BLOCK_UNAVAILABLE); - return block; + for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) { + struct nofl_block_ref block = nofl_pop_block(&space->paged_out[age]); + if (!nofl_block_is_null(block)) { + nofl_block_clear_flag(block, NOFL_BLOCK_UNAVAILABLE); + return block; + } + } + return nofl_block_null(); } static void @@ -445,14 +442,23 @@ nofl_pop_empty_block(struct nofl_space *space) { return nofl_pop_block(&space->empty); } +static size_t +nofl_active_block_count(struct nofl_space *space) { + size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; + size_t unavailable = 0; + for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) + unavailable += nofl_block_count(&space->paged_out[age]); + GC_ASSERT(unavailable <= total); + return total - unavailable; +} + static int nofl_maybe_push_evacuation_target(struct nofl_space *space, struct nofl_block_ref block, double reserve) { size_t targets = nofl_block_count(&space->evacuation_targets); - size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; - size_t unavailable = nofl_block_count(&space->unavailable); - if (targets >= (total - unavailable) * reserve) + size_t active = nofl_active_block_count(space); + if (targets >= active * reserve) return 0; nofl_push_block(&space->evacuation_targets, block); @@ -1084,9 +1090,8 @@ nofl_space_finish_evacuation(struct nofl_space *space) { // repopulate the reserve. GC_ASSERT(space->evacuating); space->evacuating = 0; - size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; - size_t unavailable = nofl_block_count(&space->unavailable); - size_t reserve = space->evacuation_minimum_reserve * (total - unavailable); + size_t active = nofl_active_block_count(space); + size_t reserve = space->evacuation_minimum_reserve * active; GC_ASSERT(nofl_block_count(&space->evacuation_targets) == 0); while (reserve--) { struct nofl_block_ref block = nofl_pop_block(&space->empty); @@ -1214,7 +1219,8 @@ nofl_space_verify_before_restart(struct nofl_space *space) { nofl_space_verify_swept_blocks(space, &space->full); nofl_space_verify_swept_blocks(space, &space->old); nofl_space_verify_empty_blocks(space, &space->empty, 1); - nofl_space_verify_empty_blocks(space, &space->unavailable, 0); + for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) + nofl_space_verify_empty_blocks(space, &space->paged_out[age], 0); // GC_ASSERT(space->last_collection_was_minor || !nofl_block_count(&space->old)); } @@ -1229,9 +1235,8 @@ nofl_space_finish_gc(struct nofl_space *space, // If we were evacuating and preferentially allocated empty blocks // to the evacuation reserve, return those blocks to the empty set // for allocation by the mutator. - size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; - size_t unavailable = nofl_block_count(&space->unavailable); - size_t target = space->evacuation_minimum_reserve * (total - unavailable); + size_t active = nofl_active_block_count(space); + size_t target = space->evacuation_minimum_reserve * active; size_t reserve = nofl_block_count(&space->evacuation_targets); while (reserve-- > target) nofl_push_block(&space->empty, @@ -1626,9 +1631,8 @@ nofl_space_shrink(struct nofl_space *space, size_t bytes) { // during trace, synchronously from gc_heap_sizer_on_gc, or async but subject // to the heap lock. if (pending > 0) { - size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; - size_t unavailable = nofl_block_count(&space->unavailable); - size_t target = space->evacuation_minimum_reserve * (total - unavailable); + size_t active = nofl_active_block_count(space); + size_t target = space->evacuation_minimum_reserve * active; ssize_t avail = nofl_block_count(&space->evacuation_targets); while (avail > target && pending > 0) { struct nofl_block_ref block = nofl_pop_block(&space->evacuation_targets); @@ -1660,15 +1664,52 @@ nofl_space_expand(struct nofl_space *space, size_t bytes) { uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data; struct nofl_block_ref block = nofl_block_for_addr(addr); nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); - nofl_push_paged_out_block(space, block); + nofl_push_unavailable_block(space, block); } } nofl_space_reacquire_memory(space, 0); } +static void +nofl_space_advance_page_out_queue(void *data) { + // When the nofl space goes to return a block to the OS, it goes on the head + // of the page-out queue. Every second, the background thread will age the + // queue, moving all blocks from index 0 to index 1, and so on. When a block + // reaches the end of the queue it is paged out (and stays at the end of the + // queue). In this task, invoked by the background thread, we age queue + // items, except that we don't page out yet, as it could be that some other + // background task will need to pull pages back in. + struct nofl_space *space = data; + for (int age = NOFL_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) { + while (1) { + struct nofl_block_ref block = nofl_pop_block(&space->paged_out[age]); + if (nofl_block_is_null(block)) + break; + nofl_push_block(&space->paged_out[age + 1], block); + } + } +} + +static void +nofl_space_page_out_blocks(void *data) { + // This task is invoked by the background thread after other tasks. It + // actually pages out blocks that reached the end of the queue. + struct nofl_space *space = data; + int age = NOFL_PAGE_OUT_QUEUE_SIZE - 2; + while (1) { + struct nofl_block_ref block = nofl_pop_block(&space->paged_out[age]); + if (nofl_block_is_null(block)) + break; + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED); + nofl_push_block(&space->paged_out[age + 1], block); + } +} + static int nofl_space_init(struct nofl_space *space, size_t size, int atomic, - double promotion_threshold) { + double promotion_threshold, + struct gc_background_thread *thread) { size = align_up(size, NOFL_BLOCK_SIZE); size_t reserved = align_up(size, NOFL_SLAB_SIZE); size_t nslabs = reserved / NOFL_SLAB_SIZE; @@ -1689,7 +1730,7 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic, struct nofl_block_ref block = nofl_block_for_addr(addr); nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); if (reserved > size) { - nofl_push_paged_out_block(space, block); + nofl_push_unavailable_block(space, block); reserved -= NOFL_BLOCK_SIZE; } else { if (!nofl_push_evacuation_target_if_needed(space, block)) @@ -1697,6 +1738,12 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic, } } } + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START, + nofl_space_advance_page_out_queue, + space); + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END, + nofl_space_page_out_blocks, + space); return 1; } diff --git a/src/pcc.c b/src/pcc.c index 2cd919e50..54c03404e 100644 --- a/src/pcc.c +++ b/src/pcc.c @@ -606,6 +606,8 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) { if (!heap->finalizer_state) GC_CRASH(); + heap->background_thread = gc_make_background_thread(); + return 1; } @@ -651,7 +653,8 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, struct copy_space *space = heap_copy_space(*heap); int atomic_forward = options->common.parallelism > 1; - if (!copy_space_init(space, (*heap)->size, atomic_forward)) { + if (!copy_space_init(space, (*heap)->size, atomic_forward, + (*heap)->background_thread)) { free(*heap); *heap = NULL; return 0; @@ -670,6 +673,9 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, *mut = calloc(1, sizeof(struct gc_mutator)); if (!*mut) GC_CRASH(); add_mutator(*heap, *mut); + + gc_background_thread_start((*heap)->background_thread); + return 1; }