diff --git a/Makefile b/Makefile index 30d84ff71..3f7278bdc 100644 --- a/Makefile +++ b/Makefile @@ -62,13 +62,16 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc` GC_STEM_semi = semi GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +GC_LIBS_semi = -lm GC_STEM_pcc = pcc GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 +GC_LIBS_pcc = -lm define mmc_variant GC_STEM_$(1) = mmc GC_CFLAGS_$(1) = $(2) +GC_LIBS_$(1) = -lm endef define generational_mmc_variants diff --git a/api/gc-options.h b/api/gc-options.h index 35cb7aacf..2f3f7f792 100644 --- a/api/gc-options.h +++ b/api/gc-options.h @@ -14,7 +14,7 @@ enum { GC_OPTION_HEAP_SIZE, GC_OPTION_MAXIMUM_HEAP_SIZE, GC_OPTION_HEAP_SIZE_MULTIPLIER, - GC_OPTION_HEAP_FRUGALITY, + GC_OPTION_HEAP_EXPANSIVENESS, GC_OPTION_PARALLELISM }; diff --git a/doc/manual.md b/doc/manual.md index eb648f54d..e88ac8198 100644 --- a/doc/manual.md +++ b/doc/manual.md @@ -460,8 +460,9 @@ defined for all collectors: * `GC_OPTION_HEAP_SIZE_MULTIPLIER`: For growable heaps, the target heap multiplier. A heap multiplier of 2.5 means that for 100 MB of live data, the heap should be 250 MB. - * `GC_OPTION_HEAP_FRUGALITY`: Something that will be used in adaptive - heaps, apparently! Not yet implemented. + * `GC_OPTION_HEAP_EXPANSIVENESS`: For adaptive heap sizing, an + indication of how much free space will be given to heaps, as a + proportion of the square root of the live data size. * `GC_OPTION_PARALLELISM`: How many threads to devote to collection tasks during GC pauses. By default, the current number of processors, with a maximum of 8. diff --git a/embed.mk b/embed.mk index d0608b345..9ef4d8ab7 100644 --- a/embed.mk +++ b/embed.mk @@ -41,13 +41,16 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc` GC_STEM_semi = semi GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +GC_LIBS_semi = -lm GC_STEM_pcc = pcc GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 +GC_LIBS_pcc = -lm define mmc_variant GC_STEM_$(1) = mmc GC_CFLAGS_$(1) = $(2) +GC_LIBS_$(1) = -lm endef define generational_mmc_variants diff --git a/src/adaptive-heap-sizer.h b/src/adaptive-heap-sizer.h new file mode 100644 index 000000000..796f07469 --- /dev/null +++ b/src/adaptive-heap-sizer.h @@ -0,0 +1,173 @@ +#ifndef ADAPTIVE_HEAP_SIZER_H +#define ADAPTIVE_HEAP_SIZER_H + +#include +#include +#include +#include + +#include "assert.h" +#include "debug.h" +#include "heap-sizer.h" +#include "gc-platform.h" + +// This is the MemBalancer algorithm from "Optimal Heap Limits for Reducing +// Browser Memory Use" by Marisa Kirisame, Pranav Shenoy, and Pavel Panchekha +// (https://arxiv.org/abs/2204.10455). +// +// This implementation differs slightly in that the constant "c" of the paper +// has been extracted outside the radical, and notionally reversed: it is a +// unitless "expansiveness" parameter whose domain is [0,+∞]. Also there are +// minimum and maximum heap size multipliers, and a minimum amount of free +// space. The initial collection rate is an informed guess. The initial +// allocation rate estimate is high, considering that allocation rates are often +// high on program startup. + +struct gc_adaptive_heap_sizer { + uint64_t (*get_allocation_counter)(void *callback_data); + void (*set_heap_size)(size_t size, void *callback_data); + void *callback_data; + uint64_t smoothed_pause_time; + uint64_t smoothed_live_bytes; + uint64_t live_bytes; + double smoothed_allocation_rate; + double collection_smoothing_factor; + double allocation_smoothing_factor; + double minimum_multiplier; + double maximum_multiplier; + double minimum_free_space; + double expansiveness; + int stopping; + pthread_t thread; + pthread_mutex_t lock; + pthread_cond_t cond; +}; + +// With lock +static uint64_t +gc_adaptive_heap_sizer_calculate_size(struct gc_adaptive_heap_sizer *sizer) { + double allocation_rate = sizer->smoothed_allocation_rate; + double collection_rate = + (double)sizer->smoothed_pause_time / (double)sizer->smoothed_live_bytes; + double radicand = sizer->live_bytes * allocation_rate / collection_rate; + double multiplier = 1.0 + sizer->expansiveness * sqrt(radicand); + if (isnan(multiplier) || multiplier < sizer->minimum_multiplier) + multiplier = sizer->minimum_multiplier; + else if (multiplier > sizer->maximum_multiplier) + multiplier = sizer->maximum_multiplier; + uint64_t size = sizer->live_bytes * multiplier; + if (size - sizer->live_bytes < sizer->minimum_free_space) + size = sizer->live_bytes + sizer->minimum_free_space; + return size; +} + +static uint64_t +gc_adaptive_heap_sizer_set_expansiveness(struct gc_adaptive_heap_sizer *sizer, + double expansiveness) { + pthread_mutex_lock(&sizer->lock); + sizer->expansiveness = expansiveness; + uint64_t heap_size = gc_adaptive_heap_sizer_calculate_size(sizer); + pthread_mutex_unlock(&sizer->lock); + return heap_size; +} + +static void +gc_adaptive_heap_sizer_on_gc(struct gc_adaptive_heap_sizer *sizer, + size_t live_bytes, uint64_t pause_ns, + void (*set_heap_size)(size_t, void*), + void *data) { + pthread_mutex_lock(&sizer->lock); + sizer->live_bytes = live_bytes; + sizer->smoothed_live_bytes *= 1.0 - sizer->collection_smoothing_factor; + sizer->smoothed_live_bytes += sizer->collection_smoothing_factor * live_bytes; + sizer->smoothed_pause_time *= 1.0 - sizer->collection_smoothing_factor; + sizer->smoothed_pause_time += sizer->collection_smoothing_factor * pause_ns; + set_heap_size(gc_adaptive_heap_sizer_calculate_size(sizer), data); + pthread_mutex_unlock(&sizer->lock); +} + +static void* +gc_adaptive_heap_sizer_thread(void *data) { + struct gc_adaptive_heap_sizer *sizer = data; + uint64_t last_bytes_allocated = + sizer->get_allocation_counter(sizer->callback_data); + uint64_t last_heartbeat = gc_platform_monotonic_nanoseconds(); + pthread_mutex_lock(&sizer->lock); + while (!sizer->stopping) { + { + struct timespec ts; + if (clock_gettime(CLOCK_REALTIME, &ts)) { + perror("adaptive heap sizer thread: failed to get time!"); + break; + } + ts.tv_sec += 1; + pthread_cond_timedwait(&sizer->cond, &sizer->lock, &ts); + } + uint64_t bytes_allocated = + sizer->get_allocation_counter(sizer->callback_data); + uint64_t heartbeat = gc_platform_monotonic_nanoseconds(); + double rate = (double) (bytes_allocated - last_bytes_allocated) / + (double) (heartbeat - last_heartbeat); + // Just smooth the rate, under the assumption that the denominator is almost + // always 1. + sizer->smoothed_allocation_rate *= 1.0 - sizer->allocation_smoothing_factor; + sizer->smoothed_allocation_rate += rate * sizer->allocation_smoothing_factor; + last_heartbeat = heartbeat; + last_bytes_allocated = bytes_allocated; + sizer->set_heap_size(gc_adaptive_heap_sizer_calculate_size(sizer), + sizer->callback_data); + } + pthread_mutex_unlock(&sizer->lock); + return NULL; +} + +static struct gc_adaptive_heap_sizer* +gc_make_adaptive_heap_sizer(double expansiveness, + uint64_t (*get_allocation_counter)(void *), + void (*set_heap_size)(size_t , void *), + void *callback_data) { + struct gc_adaptive_heap_sizer *sizer; + sizer = malloc(sizeof(*sizer)); + if (!sizer) + GC_CRASH(); + memset(sizer, 0, sizeof(*sizer)); + sizer->get_allocation_counter = get_allocation_counter; + sizer->set_heap_size = set_heap_size; + sizer->callback_data = callback_data; + // Baseline estimate of GC speed: 10 MB/ms, or 10 bytes/ns. However since we + // observe this speed by separately noisy measurements, we have to provide + // defaults for numerator and denominator; estimate 2ms for initial GC pauses + // for 20 MB of live data during program startup. + sizer->smoothed_pause_time = 2 * 1000 * 1000; + sizer->smoothed_live_bytes = 20 * 1024 * 1024; + // Baseline estimate of allocation rate during startup: 50 MB in 10ms, or 5 + // bytes/ns. + sizer->smoothed_allocation_rate = 5; + sizer->collection_smoothing_factor = 0.5; + sizer->allocation_smoothing_factor = 0.95; + sizer->minimum_multiplier = 1.1; + sizer->maximum_multiplier = 5; + sizer->minimum_free_space = 4 * 1024 * 1024; + sizer->expansiveness = expansiveness; + pthread_mutex_init(&sizer->lock, NULL); + pthread_cond_init(&sizer->cond, NULL); + if (pthread_create(&sizer->thread, NULL, gc_adaptive_heap_sizer_thread, + sizer)) { + perror("spawning adaptive heap size thread failed"); + GC_CRASH(); + } + return sizer; +} + +static void +gc_destroy_adaptive_heap_sizer(struct gc_adaptive_heap_sizer *sizer) { + pthread_mutex_lock(&sizer->lock); + GC_ASSERT(!sizer->stopping); + sizer->stopping = 1; + pthread_mutex_unlock(&sizer->lock); + pthread_cond_signal(&sizer->cond); + pthread_join(sizer->thread, NULL); + free(sizer); +} + +#endif // ADAPTIVE_HEAP_SIZER_H diff --git a/src/copy-space.h b/src/copy-space.h index ba26444bb..7d8ab98a2 100644 --- a/src/copy-space.h +++ b/src/copy-space.h @@ -117,7 +117,7 @@ struct copy_space { size_t allocated_bytes_at_last_gc; size_t fragmentation_at_last_gc; struct extents *extents; - struct copy_space_slab *slabs; + struct copy_space_slab **slabs; size_t nslabs; }; @@ -231,17 +231,25 @@ copy_space_page_out_blocks_until_memory_released(struct copy_space *space) { return 1; } -static void -copy_space_reacquire_memory(struct copy_space *space, size_t bytes) { +static ssize_t +copy_space_maybe_reacquire_memory(struct copy_space *space, size_t bytes) { ssize_t pending = atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes; while (pending + COPY_SPACE_BLOCK_SIZE <= 0) { struct copy_space_block *block = copy_space_page_in_block(space); - GC_ASSERT(block); + if (!block) break; copy_space_push_empty_block(space, block); - pending = (atomic_fetch_add(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE) + pending = (atomic_fetch_add(&space->bytes_to_page_out, + COPY_SPACE_BLOCK_SIZE) + COPY_SPACE_BLOCK_SIZE); } + return pending; +} + +static void +copy_space_reacquire_memory(struct copy_space *space, size_t bytes) { + ssize_t pending = copy_space_maybe_reacquire_memory(space, bytes); + GC_ASSERT(pending + COPY_SPACE_BLOCK_SIZE > 0); } static inline void @@ -395,6 +403,12 @@ copy_space_finish_gc(struct copy_space *space) { space->fragmentation_at_last_gc = space->fragmentation; } +static void +copy_space_add_to_allocation_counter(struct copy_space *space, + uintptr_t *counter) { + *counter += space->allocated_bytes - space->allocated_bytes_at_last_gc; +} + static void copy_space_gc_during_evacuation(void *data) { // If space is really tight and reordering of objects during @@ -578,6 +592,50 @@ copy_space_allocate_slabs(size_t nslabs) { return (struct copy_space_slab*) aligned_base; } +static void +copy_space_add_slabs(struct copy_space *space, struct copy_space_slab *slabs, + size_t nslabs) { + size_t old_size = space->nslabs * sizeof(struct copy_space_slab*); + size_t additional_size = nslabs * sizeof(struct copy_space_slab*); + space->extents = extents_adjoin(space->extents, slabs, + nslabs * sizeof(struct copy_space_slab)); + space->slabs = realloc(space->slabs, old_size + additional_size); + if (!space->slabs) + GC_CRASH(); + while (nslabs--) + space->slabs[space->nslabs++] = slabs++; +} + +static void +copy_space_shrink(struct copy_space *space, size_t bytes) { + ssize_t pending = copy_space_request_release_memory(space, bytes); + copy_space_page_out_blocks_until_memory_released(space); + + // It still may be the case we need to page out more blocks. Only collection + // can help us then! +} + +static void +copy_space_expand(struct copy_space *space, size_t bytes) { + ssize_t to_acquire = -copy_space_maybe_reacquire_memory(space, bytes); + if (to_acquire <= 0) return; + size_t reserved = align_up(to_acquire, COPY_SPACE_SLAB_SIZE); + size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE; + struct copy_space_slab *slabs = copy_space_allocate_slabs(nslabs); + copy_space_add_slabs(space, slabs, nslabs); + + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) { + struct copy_space_block *block = &slabs[slab].headers[idx]; + block->all_zeroes[0] = block->all_zeroes[1] = 1; + block->in_core = 0; + copy_space_push_paged_out_block(space, block); + reserved -= COPY_SPACE_BLOCK_SIZE; + } + } + copy_space_reacquire_memory(space, 0); +} + static int copy_space_init(struct copy_space *space, size_t size, int atomic) { size = align_up(size, COPY_SPACE_BLOCK_SIZE); @@ -599,9 +657,7 @@ copy_space_init(struct copy_space *space, size_t size, int atomic) { space->allocated_bytes_at_last_gc = 0; space->fragmentation_at_last_gc = 0; space->extents = extents_allocate(10); - extents_adjoin(space->extents, slabs, reserved); - space->slabs = slabs; - space->nslabs = nslabs; + copy_space_add_slabs(space, slabs, nslabs); for (size_t slab = 0; slab < nslabs; slab++) { for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) { struct copy_space_block *block = &slabs[slab].headers[idx]; diff --git a/src/gc-internal.h b/src/gc-internal.h index 7cbb79f58..715b72a99 100644 --- a/src/gc-internal.h +++ b/src/gc-internal.h @@ -9,4 +9,8 @@ #include "gc-finalizer-internal.h" #include "gc-options-internal.h" +uint64_t gc_heap_total_bytes_allocated(struct gc_heap *heap); +void gc_mutator_adjust_heap_size(struct gc_mutator *mut, uint64_t new_size); + + #endif // GC_INTERNAL_H diff --git a/src/gc-options-internal.h b/src/gc-options-internal.h index 4190cb841..9e9fbca22 100644 --- a/src/gc-options-internal.h +++ b/src/gc-options-internal.h @@ -12,7 +12,7 @@ struct gc_common_options { size_t heap_size; size_t maximum_heap_size; double heap_size_multiplier; - double heap_frugality; + double heap_expansiveness; int parallelism; }; diff --git a/src/gc-options.c b/src/gc-options.c index c41b8fe51..31de02745 100644 --- a/src/gc-options.c +++ b/src/gc-options.c @@ -25,8 +25,8 @@ #define FOR_EACH_DOUBLE_GC_OPTION(M) \ M(HEAP_SIZE_MULTIPLIER, heap_size_multiplier, "heap-size-multiplier", \ double, double, 1.75, 1.0, 1e6) \ - M(HEAP_FRUGALITY, heap_frugality, "heap-frugality", \ - double, double, 1e-1, 1e-6, 1e6) + M(HEAP_EXPANSIVENESS, heap_expansiveness, "heap-expansiveness", \ + double, double, 1.0, 0.0, 50.0) typedef int gc_option_int; typedef size_t gc_option_size; diff --git a/src/gc-platform-gnu-linux.c b/src/gc-platform-gnu-linux.c index 82390d445..ebcfa5579 100644 --- a/src/gc-platform-gnu-linux.c +++ b/src/gc-platform-gnu-linux.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #define GC_IMPL 1 @@ -110,3 +111,13 @@ int gc_platform_processor_count(void) { return 1; return CPU_COUNT(&set); } + +uint64_t gc_platform_monotonic_nanoseconds(void) { + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + GC_CRASH(); + uint64_t s = ts.tv_sec; + uint64_t ns = ts.tv_nsec; + uint64_t ns_per_sec = 1000000000; + return s * ns_per_sec + ns; +} diff --git a/src/gc-platform.h b/src/gc-platform.h index ea6a6aa18..42335ed7a 100644 --- a/src/gc-platform.h +++ b/src/gc-platform.h @@ -21,5 +21,6 @@ void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start, struct gc_heap *heap, void *data); GC_INTERNAL int gc_platform_processor_count(void); +GC_INTERNAL uint64_t gc_platform_monotonic_nanoseconds(void); #endif // GC_PLATFORM_H diff --git a/src/growable-heap-sizer.h b/src/growable-heap-sizer.h new file mode 100644 index 000000000..bf4200893 --- /dev/null +++ b/src/growable-heap-sizer.h @@ -0,0 +1,58 @@ +#ifndef GROWABLE_HEAP_SIZER_H +#define GROWABLE_HEAP_SIZER_H + +#include +#include +#include + +#include "assert.h" +#include "heap-sizer.h" + +// This is a simple heap-sizing algorithm that will grow the heap if it is +// smaller than a given multiplier of the live data size. It does not shrink +// the heap. + +struct gc_growable_heap_sizer { + double multiplier; + pthread_mutex_t lock; +}; + +static void +gc_growable_heap_sizer_set_multiplier(struct gc_growable_heap_sizer *sizer, + double multiplier) { + pthread_mutex_lock(&sizer->lock); + sizer->multiplier = multiplier; + pthread_mutex_unlock(&sizer->lock); +} + +static void +gc_growable_heap_sizer_on_gc(struct gc_growable_heap_sizer *sizer, + size_t heap_size, size_t live_bytes, + uint64_t pause_ns, + void (*set_heap_size)(size_t, void*), + void *data) { + pthread_mutex_lock(&sizer->lock); + size_t target_size = live_bytes * sizer->multiplier; + if (target_size > heap_size) + set_heap_size(target_size, data); + pthread_mutex_unlock(&sizer->lock); +} + +static struct gc_growable_heap_sizer* +gc_make_growable_heap_sizer(double multiplier) { + struct gc_growable_heap_sizer *sizer; + sizer = malloc(sizeof(*sizer)); + if (!sizer) + GC_CRASH(); + memset(sizer, 0, sizeof(*sizer)); + sizer->multiplier = multiplier; + pthread_mutex_init(&sizer->lock, NULL); + return sizer; +} + +static void +gc_destroy_growable_heap_sizer(struct gc_growable_heap_sizer *sizer) { + free(sizer); +} + +#endif // GROWABLE_HEAP_SIZER_H diff --git a/src/heap-sizer.h b/src/heap-sizer.h new file mode 100644 index 000000000..dc6f3d2ef --- /dev/null +++ b/src/heap-sizer.h @@ -0,0 +1,71 @@ +#ifndef HEAP_SIZER_H +#define HEAP_SIZER_H + +#include "gc-api.h" + +#include "gc-options-internal.h" +#include "growable-heap-sizer.h" +#include "adaptive-heap-sizer.h" + +struct gc_heap_sizer { + enum gc_heap_size_policy policy; + union { + struct gc_growable_heap_sizer* growable; + struct gc_adaptive_heap_sizer* adaptive; + }; +}; + +static struct gc_heap_sizer +gc_make_heap_sizer(struct gc_heap *heap, + const struct gc_common_options *options, + uint64_t (*get_allocation_counter_from_thread)(void*), + void (*set_heap_size_from_thread)(size_t, void*), + void *data) { + struct gc_heap_sizer ret = { options->heap_size_policy, }; + switch (options->heap_size_policy) { + case GC_HEAP_SIZE_FIXED: + break; + + case GC_HEAP_SIZE_GROWABLE: + ret.growable = gc_make_growable_heap_sizer(options->heap_size_multiplier); + break; + + case GC_HEAP_SIZE_ADAPTIVE: + ret.adaptive = + gc_make_adaptive_heap_sizer (options->heap_expansiveness, + get_allocation_counter_from_thread, + set_heap_size_from_thread, + heap); + break; + + default: + GC_CRASH(); + } + return ret; +} + +static void +gc_heap_sizer_on_gc(struct gc_heap_sizer sizer, size_t heap_size, + size_t live_bytes, size_t pause_ns, + void (*set_heap_size)(size_t, void*), void *data) { + switch (sizer.policy) { + case GC_HEAP_SIZE_FIXED: + break; + + case GC_HEAP_SIZE_GROWABLE: + gc_growable_heap_sizer_on_gc(sizer.growable, heap_size, live_bytes, + pause_ns, set_heap_size, data); + break; + + case GC_HEAP_SIZE_ADAPTIVE: + gc_adaptive_heap_sizer_on_gc(sizer.adaptive, live_bytes, pause_ns, + set_heap_size, data); + break; + + default: + GC_CRASH(); + } +} + + +#endif // HEAP_SIZER_H diff --git a/src/large-object-space.h b/src/large-object-space.h index 3d07cf8ad..d81369f93 100644 --- a/src/large-object-space.h +++ b/src/large-object-space.h @@ -218,6 +218,14 @@ static void large_object_space_finish_gc(struct large_object_space *space, pthread_mutex_unlock(&space->lock); } +static void +large_object_space_add_to_allocation_counter(struct large_object_space *space, + uint64_t *counter) { + size_t pages = space->total_pages - space->free_pages; + pages -= space->live_pages_at_last_collection; + *counter += pages << space->page_size_log2; +} + static inline struct gc_ref large_object_space_mark_conservative_ref(struct large_object_space *space, struct gc_conservative_ref ref, diff --git a/src/mmc.c b/src/mmc.c index 7975a643c..34a96e409 100644 --- a/src/mmc.c +++ b/src/mmc.c @@ -15,6 +15,7 @@ #include "gc-platform.h" #include "gc-stack.h" #include "gc-trace.h" +#include "heap-sizer.h" #include "large-object-space.h" #include "nofl-space.h" #if GC_PARALLEL @@ -36,6 +37,8 @@ struct gc_heap { pthread_cond_t collector_cond; pthread_cond_t mutator_cond; size_t size; + size_t total_allocated_bytes_at_last_gc; + size_t size_at_last_gc; int collecting; int check_pending_ephemerons; struct gc_pending_ephemerons *pending_ephemerons; @@ -55,6 +58,7 @@ struct gc_heap { double minimum_major_gc_yield_threshold; double pending_ephemerons_size_factor; double pending_ephemerons_size_slop; + struct gc_heap_sizer sizer; struct gc_event_listener event_listener; void *event_listener_data; }; @@ -450,27 +454,32 @@ maybe_pause_mutator_for_collection(struct gc_mutator *mut) { pause_mutator_for_collection_without_lock(mut); } -static int maybe_grow_heap(struct gc_heap *heap) { - return 0; +static void +resize_heap(size_t new_size, void *data) { + struct gc_heap *heap = data; + if (new_size == heap->size) + return; + DEBUG("------ resizing heap\n"); + DEBUG("------ old heap size: %zu bytes\n", heap->size); + DEBUG("------ new heap size: %zu bytes\n", new_size); + if (new_size < heap->size) + nofl_space_shrink(heap_nofl_space(heap), heap->size - new_size); + else + nofl_space_expand(heap_nofl_space(heap), new_size - heap->size); + + heap->size = new_size; + HEAP_EVENT(heap, heap_resized, new_size); } static double heap_last_gc_yield(struct gc_heap *heap) { - struct nofl_space *nofl_space = heap_nofl_space(heap); - size_t nofl_yield = nofl_space_yield(nofl_space); - size_t evacuation_reserve = nofl_space_evacuation_reserve_bytes(nofl_space); - // FIXME: Size nofl evacuation reserve based on size of nofl space, - // not heap size. - size_t minimum_evacuation_reserve = - heap->size * nofl_space->evacuation_minimum_reserve; - if (evacuation_reserve > minimum_evacuation_reserve) - nofl_yield += evacuation_reserve - minimum_evacuation_reserve; - struct large_object_space *lospace = heap_large_object_space(heap); - size_t lospace_yield = lospace->pages_freed_by_last_collection; - lospace_yield <<= lospace->page_size_log2; + size_t live_size = + nofl_space_live_size_at_last_collection(heap_nofl_space(heap)) + + large_object_space_size_at_last_collection(heap_large_object_space(heap)); - double yield = nofl_yield + lospace_yield; - return yield / heap->size; + if (live_size > heap->size_at_last_gc) + return 0; + return 1.0 - ((double) live_size) / heap->size_at_last_gc; } static double @@ -480,31 +489,29 @@ heap_fragmentation(struct gc_heap *heap) { return ((double)fragmentation) / heap->size; } +static size_t +heap_estimate_live_data_after_gc(struct gc_heap *heap, + size_t last_live_bytes, + double last_yield) { + size_t bytes = + nofl_space_estimate_live_bytes_after_gc(heap_nofl_space(heap), + last_yield) + + large_object_space_size_at_last_collection(heap_large_object_space(heap)); + if (bytes < last_live_bytes) + return last_live_bytes; + return bytes; +} + static void -detect_out_of_memory(struct gc_heap *heap) { - struct nofl_space *nofl_space = heap_nofl_space(heap); - struct large_object_space *lospace = heap_large_object_space(heap); - - if (heap->count == 0) +detect_out_of_memory(struct gc_heap *heap, uintptr_t allocation_since_last_gc) { + if (heap->sizer.policy != GC_HEAP_SIZE_FIXED) return; - double last_yield = heap_last_gc_yield(heap); - double fragmentation = heap_fragmentation(heap); - - double yield_epsilon = NOFL_BLOCK_SIZE * 1.0 / heap->size; - double fragmentation_epsilon = LARGE_OBJECT_THRESHOLD * 1.0 / NOFL_BLOCK_SIZE; - - if (last_yield - fragmentation > yield_epsilon) + if (allocation_since_last_gc > nofl_space_fragmentation(heap_nofl_space(heap))) return; - if (fragmentation > fragmentation_epsilon - && atomic_load(&nofl_space->evacuation_targets.count)) - return; - - // No yield in last gc and we do not expect defragmentation to - // be able to yield more space: out of memory. - fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n", - heap->size, nofl_space->nslabs); + // No allocation since last gc: out of memory. + fprintf(stderr, "ran out of space, heap size %zu\n", heap->size); GC_CRASH(); } @@ -731,10 +738,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { struct nofl_space *nofl_space = heap_nofl_space(heap); struct large_object_space *lospace = heap_large_object_space(heap); struct gc_extern_space *exspace = heap_extern_space(heap); - if (maybe_grow_heap(heap)) { - DEBUG("grew heap instead of collecting #%ld:\n", heap->count); - return; - } + uint64_t start_ns = gc_platform_monotonic_nanoseconds(); MUTATOR_EVENT(mut, mutator_cause_gc); DEBUG("start collect #%ld:\n", heap->count); HEAP_EVENT(heap, requesting_stop); @@ -747,6 +751,11 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { determine_collection_kind(heap, requested_kind); int is_minor = gc_kind == GC_COLLECTION_MINOR; HEAP_EVENT(heap, prepare_gc, gc_kind); + uint64_t allocation_counter = 0; + nofl_space_add_to_allocation_counter(nofl_space, &allocation_counter); + large_object_space_add_to_allocation_counter(lospace, &allocation_counter); + heap->total_allocated_bytes_at_last_gc += allocation_counter; + detect_out_of_memory(heap, allocation_counter); nofl_space_prepare_gc(nofl_space, gc_kind); large_object_space_start_gc(lospace, is_minor); gc_extern_space_start_gc(exspace, is_minor); @@ -754,9 +763,9 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { gc_tracer_prepare(&heap->tracer); double yield = heap_last_gc_yield(heap); double fragmentation = heap_fragmentation(heap); - HEAP_EVENT(heap, live_data_size, heap->size * (1 - yield)); + size_t live_bytes = heap->size * (1.0 - yield); + HEAP_EVENT(heap, live_data_size, live_bytes); DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation); - detect_out_of_memory(heap); enqueue_pinned_roots(heap); // Eagerly trace pinned roots if we are going to relocate objects. if (gc_kind == GC_COLLECTION_COMPACTING) @@ -780,6 +789,13 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { gc_extern_space_finish_gc(exspace, is_minor); heap->count++; heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); + uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns; + size_t live_bytes_estimate = + heap_estimate_live_data_after_gc(heap, live_bytes, yield); + DEBUG("--- total live bytes estimate: %zu\n", live_bytes_estimate); + gc_heap_sizer_on_gc(heap->sizer, heap->size, live_bytes_estimate, pause_ns, + resize_heap, heap); + heap->size_at_last_gc = heap->size; HEAP_EVENT(heap, restarting_mutators); allow_mutators_to_continue(heap); } @@ -971,7 +987,7 @@ heap_init(struct gc_heap *heap, const struct gc_options *options) { pthread_mutex_init(&heap->lock, NULL); pthread_cond_init(&heap->mutator_cond, NULL); pthread_cond_init(&heap->collector_cond, NULL); - heap->size = options->common.heap_size; + heap->size = heap->size_at_last_gc = options->common.heap_size; if (!gc_tracer_init(&heap->tracer, heap, options->common.parallelism)) GC_CRASH(); @@ -995,6 +1011,24 @@ heap_init(struct gc_heap *heap, const struct gc_options *options) { return 1; } +static uint64_t allocation_counter_from_thread(void *data) { + struct gc_heap *heap = data; + uint64_t ret = heap->total_allocated_bytes_at_last_gc; + if (pthread_mutex_trylock(&heap->lock)) return ret; + nofl_space_add_to_allocation_counter(heap_nofl_space(heap), &ret); + large_object_space_add_to_allocation_counter(heap_large_object_space(heap), + &ret); + pthread_mutex_unlock(&heap->lock); + return ret; +} + +static void set_heap_size_from_thread(size_t size, void *data) { + struct gc_heap *heap = data; + if (pthread_mutex_trylock(&heap->lock)) return; + resize_heap(size, heap); + pthread_mutex_unlock(&heap->lock); +} + int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, struct gc_heap **heap, struct gc_mutator **mut, @@ -1015,11 +1049,6 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, NOFL_BLOCK_SIZE / NOFL_REMSET_BYTES_PER_BLOCK); } - if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { - fprintf(stderr, "fixed heap size is currently required\n"); - return 0; - } - *heap = calloc(1, sizeof(struct gc_heap)); if (!*heap) GC_CRASH(); @@ -1042,6 +1071,11 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, if (!large_object_space_init(heap_large_object_space(*heap), *heap)) GC_CRASH(); + (*heap)->sizer = gc_make_heap_sizer(*heap, &options->common, + allocation_counter_from_thread, + set_heap_size_from_thread, + (*heap)); + *mut = calloc(1, sizeof(struct gc_mutator)); if (!*mut) GC_CRASH(); gc_stack_init(&(*mut)->stack, stack_base); diff --git a/src/nofl-space.h b/src/nofl-space.h index 5a217cdb0..5c46bb7ff 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -75,6 +75,7 @@ enum nofl_block_summary_flag { NOFL_BLOCK_EVACUATE = 0x1, NOFL_BLOCK_ZERO = 0x2, NOFL_BLOCK_UNAVAILABLE = 0x4, + NOFL_BLOCK_PAGED_OUT = 0x8, NOFL_BLOCK_FLAG_UNUSED_3 = 0x8, NOFL_BLOCK_FLAG_UNUSED_4 = 0x10, NOFL_BLOCK_FLAG_UNUSED_5 = 0x20, @@ -95,7 +96,7 @@ struct nofl_block_summary { // Counters related to previous collection: how many holes there // were, and how much space they had. uint16_t hole_count; - uint16_t free_granules; + uint16_t hole_granules; // Counters related to allocation since previous collection: // wasted space due to fragmentation. Also used by blocks on the // "partly full" list, which have zero holes_with_fragmentation @@ -158,7 +159,9 @@ struct nofl_space { ssize_t pending_unavailable_bytes; // atomically struct nofl_slab **slabs; size_t nslabs; - uintptr_t granules_freed_by_last_collection; // atomically + uintptr_t old_generation_granules; // atomically + uintptr_t survivor_granules_at_last_collection; // atomically + uintptr_t allocated_granules_since_last_collection; // atomically uintptr_t fragmentation_granules_since_last_collection; // atomically }; @@ -271,7 +274,7 @@ nofl_block_summary_for_addr(uintptr_t addr) { static uintptr_t nofl_block_summary_has_flag(struct nofl_block_summary *summary, enum nofl_block_summary_flag flag) { - return summary->next_and_flags & flag; + return (summary->next_and_flags & flag) == flag; } static void @@ -404,12 +407,23 @@ nofl_block_count(struct nofl_block_list *list) { return atomic_load_explicit(&list->count, memory_order_acquire); } +static void +nofl_push_paged_out_block(struct nofl_space *space, + struct nofl_block_ref block) { + GC_ASSERT(nofl_block_has_flag(block, + NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT)); + nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE); + nofl_push_block(&space->unavailable, block); +} + static void nofl_push_unavailable_block(struct nofl_space *space, struct nofl_block_ref block) { - nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_UNAVAILABLE); - madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED); - nofl_push_block(&space->unavailable, block); + if (!nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT)) { + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED); + } + nofl_push_paged_out_block(space, block); } static struct nofl_block_ref @@ -483,7 +497,7 @@ nofl_should_promote_block(struct nofl_space *space, // until after the next full GC. if (!GC_GENERATIONAL) return 0; size_t threshold = NOFL_GRANULES_PER_BLOCK * space->promotion_threshold; - return block.summary->free_granules < threshold; + return block.summary->hole_granules < threshold; } static void @@ -492,8 +506,10 @@ nofl_allocator_release_full_block(struct nofl_allocator *alloc, GC_ASSERT(nofl_allocator_has_block(alloc)); struct nofl_block_ref block = alloc->block; GC_ASSERT(alloc->alloc == alloc->sweep); - atomic_fetch_add(&space->granules_freed_by_last_collection, - block.summary->free_granules); + atomic_fetch_add(&space->allocated_granules_since_last_collection, + block.summary->hole_granules); + atomic_fetch_add(&space->survivor_granules_at_last_collection, + NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules); atomic_fetch_add(&space->fragmentation_granules_since_last_collection, block.summary->fragmentation_granules); @@ -515,15 +531,13 @@ nofl_allocator_release_full_evacuation_target(struct nofl_allocator *alloc, size_t hole_size = alloc->sweep - alloc->alloc; // FIXME: Check how this affects statistics. GC_ASSERT_EQ(block.summary->hole_count, 1); - GC_ASSERT_EQ(block.summary->free_granules, NOFL_GRANULES_PER_BLOCK); - atomic_fetch_add(&space->granules_freed_by_last_collection, + GC_ASSERT_EQ(block.summary->hole_granules, NOFL_GRANULES_PER_BLOCK); + atomic_fetch_add(&space->old_generation_granules, NOFL_GRANULES_PER_BLOCK); if (hole_size) { hole_size >>= NOFL_GRANULE_SIZE_LOG_2; block.summary->holes_with_fragmentation = 1; block.summary->fragmentation_granules = hole_size / NOFL_GRANULE_SIZE; - atomic_fetch_add(&space->fragmentation_granules_since_last_collection, - block.summary->fragmentation_granules); } else { GC_ASSERT_EQ(block.summary->fragmentation_granules, 0); GC_ASSERT_EQ(block.summary->holes_with_fragmentation, 0); @@ -570,14 +584,14 @@ nofl_allocator_acquire_empty_block(struct nofl_allocator *alloc, if (nofl_block_is_null(block)) return 0; block.summary->hole_count = 1; - block.summary->free_granules = NOFL_GRANULES_PER_BLOCK; + block.summary->hole_granules = NOFL_GRANULES_PER_BLOCK; block.summary->holes_with_fragmentation = 0; block.summary->fragmentation_granules = 0; alloc->block = block; alloc->alloc = block.addr; alloc->sweep = block.addr + NOFL_BLOCK_SIZE; if (nofl_block_has_flag(block, NOFL_BLOCK_ZERO)) - nofl_block_clear_flag(block, NOFL_BLOCK_ZERO); + nofl_block_clear_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); else nofl_clear_memory(block.addr, NOFL_BLOCK_SIZE); return NOFL_GRANULES_PER_BLOCK; @@ -637,22 +651,22 @@ nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc, return 0; } - size_t free_granules = scan_for_byte(metadata, limit_granules, sweep_mask); - size_t free_bytes = free_granules * NOFL_GRANULE_SIZE; - GC_ASSERT(free_granules); - GC_ASSERT(free_granules <= limit_granules); + size_t hole_granules = scan_for_byte(metadata, limit_granules, sweep_mask); + size_t free_bytes = hole_granules * NOFL_GRANULE_SIZE; + GC_ASSERT(hole_granules); + GC_ASSERT(hole_granules <= limit_granules); - memset(metadata, 0, free_granules); + memset(metadata, 0, hole_granules); memset((char*)sweep, 0, free_bytes); alloc->block.summary->hole_count++; - GC_ASSERT(free_granules <= - NOFL_GRANULES_PER_BLOCK - alloc->block.summary->free_granules); - alloc->block.summary->free_granules += free_granules; + GC_ASSERT(hole_granules <= + NOFL_GRANULES_PER_BLOCK - alloc->block.summary->hole_granules); + alloc->block.summary->hole_granules += hole_granules; alloc->alloc = sweep; alloc->sweep = sweep + free_bytes; - return free_granules; + return hole_granules; } static void @@ -724,7 +738,7 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, // at the last collection. As we allocate we'll record how // many granules were wasted because of fragmentation. alloc->block.summary->hole_count = 0; - alloc->block.summary->free_granules = 0; + alloc->block.summary->hole_granules = 0; alloc->block.summary->holes_with_fragmentation = 0; alloc->block.summary->fragmentation_granules = 0; size_t granules = @@ -875,13 +889,53 @@ nofl_space_clear_remembered_set(struct nofl_space *space) { static void nofl_space_reset_statistics(struct nofl_space *space) { - space->granules_freed_by_last_collection = 0; + space->survivor_granules_at_last_collection = 0; + space->allocated_granules_since_last_collection = 0; space->fragmentation_granules_since_last_collection = 0; } static size_t -nofl_space_yield(struct nofl_space *space) { - return space->granules_freed_by_last_collection * NOFL_GRANULE_SIZE; +nofl_space_live_size_at_last_collection(struct nofl_space *space) { + size_t granules = space->old_generation_granules + + space->survivor_granules_at_last_collection; + return granules * NOFL_GRANULE_SIZE; +} + +static void +nofl_space_add_to_allocation_counter(struct nofl_space *space, + uint64_t *counter) { + *counter += + atomic_load_explicit(&space->allocated_granules_since_last_collection, + memory_order_relaxed) * NOFL_GRANULE_SIZE; +} + +static size_t +nofl_space_estimate_live_bytes_after_gc(struct nofl_space *space, + double last_yield) +{ + // The nofl space mostly traces via marking, and as such doesn't precisely + // know the live data size until after sweeping. But it is important to + // promptly compute the live size so that we can grow the heap if + // appropriate. Therefore sometimes we will estimate the live data size + // instead of measuring it precisely. + size_t bytes = 0; + bytes += nofl_block_count(&space->full) * NOFL_BLOCK_SIZE; + bytes += nofl_block_count(&space->partly_full) * NOFL_BLOCK_SIZE / 2; + GC_ASSERT_EQ(nofl_block_count(&space->promoted), 0); + bytes += space->old_generation_granules * NOFL_GRANULE_SIZE; + bytes += + nofl_block_count(&space->to_sweep) * NOFL_BLOCK_SIZE * (1 - last_yield); + + DEBUG("--- nofl estimate before adjustment: %zu\n", bytes); +/* + // Assume that if we have pending unavailable bytes after GC that there is a + // large object waiting to be allocated, and that probably it survives this GC + // cycle. + bytes += atomic_load_explicit(&space->pending_unavailable_bytes, + memory_order_acquire); + DEBUG("--- nofl estimate after adjustment: %zu\n", bytes); +*/ + return bytes; } static size_t @@ -891,8 +945,12 @@ nofl_space_evacuation_reserve_bytes(struct nofl_space *space) { static size_t nofl_space_fragmentation(struct nofl_space *space) { - size_t granules = space->fragmentation_granules_since_last_collection; - return granules * NOFL_GRANULE_SIZE; + size_t young = space->fragmentation_granules_since_last_collection; + GC_ASSERT(nofl_block_count(&space->old) * NOFL_GRANULES_PER_BLOCK >= + space->old_generation_granules); + size_t old = nofl_block_count(&space->old) * NOFL_GRANULES_PER_BLOCK - + space->old_generation_granules; + return (young + old) * NOFL_GRANULE_SIZE; } static void @@ -933,7 +991,7 @@ nofl_space_prepare_evacuation(struct nofl_space *space) { for (struct nofl_block_ref b = nofl_block_for_addr(space->to_sweep.blocks); !nofl_block_is_null(b); b = nofl_block_next(b)) { - size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->free_granules; + size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->hole_granules; size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size; histogram[bucket]++; } @@ -956,7 +1014,7 @@ nofl_space_prepare_evacuation(struct nofl_space *space) { for (struct nofl_block_ref b = nofl_block_for_addr(space->to_sweep.blocks); !nofl_block_is_null(b); b = nofl_block_next(b)) { - size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->free_granules; + size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->hole_granules; size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size; if (histogram[bucket]) { nofl_block_set_flag(b, NOFL_BLOCK_EVACUATE); @@ -1012,6 +1070,7 @@ nofl_space_start_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) { nofl_push_block(&space->to_sweep, block); while (!nofl_block_is_null(block = nofl_pop_block(&space->old))) nofl_push_block(&space->to_sweep, block); + space->old_generation_granules = 0; } if (gc_kind == GC_COLLECTION_COMPACTING) @@ -1042,6 +1101,8 @@ nofl_space_promote_blocks(struct nofl_space *space) { while (!nofl_block_is_null(block = nofl_pop_block(&space->promoted))) { struct nofl_allocator alloc = { block.addr, block.addr, block }; nofl_allocator_finish_sweeping_in_block(&alloc, space->sweep_mask); + atomic_fetch_add(&space->old_generation_granules, + NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules); nofl_push_block(&space->old, block); } } @@ -1210,18 +1271,25 @@ nofl_space_request_release_memory(struct nofl_space *space, size_t bytes) { return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes; } -static void -nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) { +static ssize_t +nofl_space_maybe_reacquire_memory(struct nofl_space *space, size_t bytes) { ssize_t pending = atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes; while (pending + NOFL_BLOCK_SIZE <= 0) { struct nofl_block_ref block = nofl_pop_unavailable_block(space); - GC_ASSERT(!nofl_block_is_null(block)); + if (nofl_block_is_null(block)) break; if (!nofl_push_evacuation_target_if_needed(space, block)) nofl_push_empty_block(space, block); pending = atomic_fetch_add(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE) + NOFL_BLOCK_SIZE; } + return pending; +} + +static void +nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) { + ssize_t pending = nofl_space_maybe_reacquire_memory(space, bytes); + GC_ASSERT(pending + NOFL_BLOCK_SIZE > 0); } static int @@ -1538,6 +1606,66 @@ nofl_space_add_slabs(struct nofl_space *space, struct nofl_slab *slabs, space->slabs[space->nslabs++] = slabs++; } +static void +nofl_space_shrink(struct nofl_space *space, size_t bytes) { + ssize_t pending = nofl_space_request_release_memory(space, bytes); + // First try to shrink by unmapping previously-identified empty blocks. + while (pending > 0) { + struct nofl_block_ref block = nofl_pop_empty_block(space); + if (nofl_block_is_null(block)) + break; + nofl_push_unavailable_block(space, block); + pending = atomic_fetch_sub(&space->pending_unavailable_bytes, + NOFL_BLOCK_SIZE); + pending -= NOFL_BLOCK_SIZE; + } + + // If we still need to shrink, steal from the evacuation reserve, if it's more + // than the minimum. Not racy: evacuation target lists are built during eager + // lazy sweep, which is mutually exclusive with consumption, itself either + // during trace, synchronously from gc_heap_sizer_on_gc, or async but subject + // to the heap lock. + if (pending > 0) { + size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; + size_t unavailable = nofl_block_count(&space->unavailable); + size_t target = space->evacuation_minimum_reserve * (total - unavailable); + ssize_t avail = nofl_block_count(&space->evacuation_targets); + while (avail > target && pending > 0) { + struct nofl_block_ref block = nofl_pop_block(&space->evacuation_targets); + GC_ASSERT(!nofl_block_is_null(block)); + nofl_push_unavailable_block(space, block); + pending = atomic_fetch_sub(&space->pending_unavailable_bytes, + NOFL_BLOCK_SIZE); + pending -= NOFL_BLOCK_SIZE; + } + } + + // It still may be the case we need to page out more blocks. Only evacuation + // can help us then! +} + +static void +nofl_space_expand(struct nofl_space *space, size_t bytes) { + double overhead = ((double)NOFL_META_BLOCKS_PER_SLAB) / NOFL_BLOCKS_PER_SLAB; + ssize_t to_acquire = -nofl_space_maybe_reacquire_memory(space, bytes); + if (to_acquire <= 0) return; + to_acquire *= (1 + overhead); + size_t reserved = align_up(to_acquire, NOFL_SLAB_SIZE); + size_t nslabs = reserved / NOFL_SLAB_SIZE; + struct nofl_slab *slabs = nofl_allocate_slabs(nslabs); + nofl_space_add_slabs(space, slabs, nslabs); + + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = 0; idx < NOFL_NONMETA_BLOCKS_PER_SLAB; idx++) { + uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data; + struct nofl_block_ref block = nofl_block_for_addr(addr); + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + nofl_push_paged_out_block(space, block); + } + } + nofl_space_reacquire_memory(space, 0); +} + static int nofl_space_init(struct nofl_space *space, size_t size, int atomic, double promotion_threshold) { @@ -1556,12 +1684,12 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic, space->evacuation_reserve = space->evacuation_minimum_reserve; space->promotion_threshold = promotion_threshold; for (size_t slab = 0; slab < nslabs; slab++) { - for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) { - uintptr_t addr = (uintptr_t)slabs[slab].blocks[block].data; + for (size_t idx = 0; idx < NOFL_NONMETA_BLOCKS_PER_SLAB; idx++) { + uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data; struct nofl_block_ref block = nofl_block_for_addr(addr); - nofl_block_set_flag(block, NOFL_BLOCK_ZERO); + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); if (reserved > size) { - nofl_push_unavailable_block(space, block); + nofl_push_paged_out_block(space, block); reserved -= NOFL_BLOCK_SIZE; } else { if (!nofl_push_evacuation_target_if_needed(space, block)) diff --git a/src/pcc.c b/src/pcc.c index 0f5d84abd..40ba60f8b 100644 --- a/src/pcc.c +++ b/src/pcc.c @@ -13,7 +13,9 @@ #include "debug.h" #include "gc-align.h" #include "gc-inline.h" +#include "gc-platform.h" #include "gc-trace.h" +#include "heap-sizer.h" #include "large-object-space.h" #if GC_PARALLEL #include "parallel-tracer.h" @@ -32,6 +34,7 @@ struct gc_heap { pthread_cond_t collector_cond; pthread_cond_t mutator_cond; size_t size; + size_t total_allocated_bytes_at_last_gc; int collecting; int check_pending_ephemerons; struct gc_pending_ephemerons *pending_ephemerons; @@ -45,6 +48,7 @@ struct gc_heap { struct gc_tracer tracer; double pending_ephemerons_size_factor; double pending_ephemerons_size_slop; + struct gc_heap_sizer sizer; struct gc_event_listener event_listener; void *event_listener_data; }; @@ -316,8 +320,20 @@ static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) { pause_mutator_for_collection_without_lock(mut); } -static int maybe_grow_heap(struct gc_heap *heap) { - return 0; +static void resize_heap(size_t new_size, void *data) { + struct gc_heap *heap = data; + if (new_size == heap->size) + return; + DEBUG("------ resizing heap\n"); + DEBUG("------ old heap size: %zu bytes\n", heap->size); + DEBUG("------ new heap size: %zu bytes\n", new_size); + if (new_size < heap->size) + copy_space_shrink(heap_copy_space(heap), heap->size - new_size); + else + copy_space_expand(heap_copy_space(heap), new_size - heap->size); + + heap->size = new_size; + HEAP_EVENT(heap, heap_resized, new_size); } static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap, @@ -375,6 +391,7 @@ static void collect(struct gc_mutator *mut) { struct copy_space *copy_space = heap_copy_space(heap); struct large_object_space *lospace = heap_large_object_space(heap); struct gc_extern_space *exspace = heap_extern_space(heap); + uint64_t start_ns = gc_platform_monotonic_nanoseconds(); MUTATOR_EVENT(mut, mutator_cause_gc); DEBUG("start collect #%ld:\n", heap->count); HEAP_EVENT(heap, requesting_stop); @@ -383,6 +400,9 @@ static void collect(struct gc_mutator *mut) { wait_for_mutators_to_stop(heap); HEAP_EVENT(heap, mutators_stopped); HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING); + uint64_t *counter_loc = &heap->total_allocated_bytes_at_last_gc; + copy_space_add_to_allocation_counter(copy_space, counter_loc); + large_object_space_add_to_allocation_counter(lospace, counter_loc); copy_space_flip(copy_space); large_object_space_start_gc(lospace, 0); gc_extern_space_start_gc(exspace, 0); @@ -406,9 +426,12 @@ static void collect(struct gc_mutator *mut) { heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); size_t live_size = (copy_space->allocated_bytes_at_last_gc + large_object_space_size_at_last_collection(lospace)); + uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns; HEAP_EVENT(heap, live_data_size, live_size); - maybe_grow_heap(heap); - if (!copy_space_page_out_blocks_until_memory_released(copy_space)) { + gc_heap_sizer_on_gc(heap->sizer, heap->size, live_size, pause_ns, + resize_heap, heap); + if (!copy_space_page_out_blocks_until_memory_released(copy_space) + && heap->sizer.policy == GC_HEAP_SIZE_FIXED) { fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n", heap->size, copy_space->nslabs); GC_CRASH(); @@ -584,6 +607,24 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) { return 1; } +static uint64_t allocation_counter_from_thread(void *data) { + struct gc_heap *heap = data; + uint64_t ret = heap->total_allocated_bytes_at_last_gc; + if (pthread_mutex_trylock(&heap->lock)) return ret; + copy_space_add_to_allocation_counter(heap_copy_space(heap), &ret); + large_object_space_add_to_allocation_counter(heap_large_object_space(heap), + &ret); + pthread_mutex_unlock(&heap->lock); + return ret; +} + +static void set_heap_size_from_thread(size_t size, void *data) { + struct gc_heap *heap = data; + if (pthread_mutex_trylock(&heap->lock)) return; + resize_heap(size, heap); + pthread_mutex_unlock(&heap->lock); +} + int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, struct gc_heap **heap, struct gc_mutator **mut, struct gc_event_listener event_listener, @@ -596,11 +637,6 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), offsetof(struct copy_space_allocator, limit)); - if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { - fprintf(stderr, "fixed heap size is currently required\n"); - return 0; - } - *heap = calloc(1, sizeof(struct gc_heap)); if (!*heap) GC_CRASH(); @@ -622,6 +658,11 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, if (!large_object_space_init(heap_large_object_space(*heap), *heap)) GC_CRASH(); + (*heap)->sizer = gc_make_heap_sizer(*heap, &options->common, + allocation_counter_from_thread, + set_heap_size_from_thread, + (*heap)); + *mut = calloc(1, sizeof(struct gc_mutator)); if (!*mut) GC_CRASH(); add_mutator(*heap, *mut);