mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
First version of adaptive heap sizing for pcc and mcc
This commit is contained in:
parent
d19366bea2
commit
2818958c59
17 changed files with 702 additions and 110 deletions
3
Makefile
3
Makefile
|
@ -62,13 +62,16 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc`
|
|||
|
||||
GC_STEM_semi = semi
|
||||
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
|
||||
GC_LIBS_semi = -lm
|
||||
|
||||
GC_STEM_pcc = pcc
|
||||
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
|
||||
GC_LIBS_pcc = -lm
|
||||
|
||||
define mmc_variant
|
||||
GC_STEM_$(1) = mmc
|
||||
GC_CFLAGS_$(1) = $(2)
|
||||
GC_LIBS_$(1) = -lm
|
||||
endef
|
||||
|
||||
define generational_mmc_variants
|
||||
|
|
|
@ -14,7 +14,7 @@ enum {
|
|||
GC_OPTION_HEAP_SIZE,
|
||||
GC_OPTION_MAXIMUM_HEAP_SIZE,
|
||||
GC_OPTION_HEAP_SIZE_MULTIPLIER,
|
||||
GC_OPTION_HEAP_FRUGALITY,
|
||||
GC_OPTION_HEAP_EXPANSIVENESS,
|
||||
GC_OPTION_PARALLELISM
|
||||
};
|
||||
|
||||
|
|
|
@ -460,8 +460,9 @@ defined for all collectors:
|
|||
* `GC_OPTION_HEAP_SIZE_MULTIPLIER`: For growable heaps, the target heap
|
||||
multiplier. A heap multiplier of 2.5 means that for 100 MB of live
|
||||
data, the heap should be 250 MB.
|
||||
* `GC_OPTION_HEAP_FRUGALITY`: Something that will be used in adaptive
|
||||
heaps, apparently! Not yet implemented.
|
||||
* `GC_OPTION_HEAP_EXPANSIVENESS`: For adaptive heap sizing, an
|
||||
indication of how much free space will be given to heaps, as a
|
||||
proportion of the square root of the live data size.
|
||||
* `GC_OPTION_PARALLELISM`: How many threads to devote to collection
|
||||
tasks during GC pauses. By default, the current number of
|
||||
processors, with a maximum of 8.
|
||||
|
|
3
embed.mk
3
embed.mk
|
@ -41,13 +41,16 @@ GC_LIBS_bdw = `pkg-config --libs bdw-gc`
|
|||
|
||||
GC_STEM_semi = semi
|
||||
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
|
||||
GC_LIBS_semi = -lm
|
||||
|
||||
GC_STEM_pcc = pcc
|
||||
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
|
||||
GC_LIBS_pcc = -lm
|
||||
|
||||
define mmc_variant
|
||||
GC_STEM_$(1) = mmc
|
||||
GC_CFLAGS_$(1) = $(2)
|
||||
GC_LIBS_$(1) = -lm
|
||||
endef
|
||||
|
||||
define generational_mmc_variants
|
||||
|
|
173
src/adaptive-heap-sizer.h
Normal file
173
src/adaptive-heap-sizer.h
Normal file
|
@ -0,0 +1,173 @@
|
|||
#ifndef ADAPTIVE_HEAP_SIZER_H
|
||||
#define ADAPTIVE_HEAP_SIZER_H
|
||||
|
||||
#include <math.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "debug.h"
|
||||
#include "heap-sizer.h"
|
||||
#include "gc-platform.h"
|
||||
|
||||
// This is the MemBalancer algorithm from "Optimal Heap Limits for Reducing
|
||||
// Browser Memory Use" by Marisa Kirisame, Pranav Shenoy, and Pavel Panchekha
|
||||
// (https://arxiv.org/abs/2204.10455).
|
||||
//
|
||||
// This implementation differs slightly in that the constant "c" of the paper
|
||||
// has been extracted outside the radical, and notionally reversed: it is a
|
||||
// unitless "expansiveness" parameter whose domain is [0,+∞]. Also there are
|
||||
// minimum and maximum heap size multipliers, and a minimum amount of free
|
||||
// space. The initial collection rate is an informed guess. The initial
|
||||
// allocation rate estimate is high, considering that allocation rates are often
|
||||
// high on program startup.
|
||||
|
||||
struct gc_adaptive_heap_sizer {
|
||||
uint64_t (*get_allocation_counter)(void *callback_data);
|
||||
void (*set_heap_size)(size_t size, void *callback_data);
|
||||
void *callback_data;
|
||||
uint64_t smoothed_pause_time;
|
||||
uint64_t smoothed_live_bytes;
|
||||
uint64_t live_bytes;
|
||||
double smoothed_allocation_rate;
|
||||
double collection_smoothing_factor;
|
||||
double allocation_smoothing_factor;
|
||||
double minimum_multiplier;
|
||||
double maximum_multiplier;
|
||||
double minimum_free_space;
|
||||
double expansiveness;
|
||||
int stopping;
|
||||
pthread_t thread;
|
||||
pthread_mutex_t lock;
|
||||
pthread_cond_t cond;
|
||||
};
|
||||
|
||||
// With lock
|
||||
static uint64_t
|
||||
gc_adaptive_heap_sizer_calculate_size(struct gc_adaptive_heap_sizer *sizer) {
|
||||
double allocation_rate = sizer->smoothed_allocation_rate;
|
||||
double collection_rate =
|
||||
(double)sizer->smoothed_pause_time / (double)sizer->smoothed_live_bytes;
|
||||
double radicand = sizer->live_bytes * allocation_rate / collection_rate;
|
||||
double multiplier = 1.0 + sizer->expansiveness * sqrt(radicand);
|
||||
if (isnan(multiplier) || multiplier < sizer->minimum_multiplier)
|
||||
multiplier = sizer->minimum_multiplier;
|
||||
else if (multiplier > sizer->maximum_multiplier)
|
||||
multiplier = sizer->maximum_multiplier;
|
||||
uint64_t size = sizer->live_bytes * multiplier;
|
||||
if (size - sizer->live_bytes < sizer->minimum_free_space)
|
||||
size = sizer->live_bytes + sizer->minimum_free_space;
|
||||
return size;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
gc_adaptive_heap_sizer_set_expansiveness(struct gc_adaptive_heap_sizer *sizer,
|
||||
double expansiveness) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
sizer->expansiveness = expansiveness;
|
||||
uint64_t heap_size = gc_adaptive_heap_sizer_calculate_size(sizer);
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
return heap_size;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_adaptive_heap_sizer_on_gc(struct gc_adaptive_heap_sizer *sizer,
|
||||
size_t live_bytes, uint64_t pause_ns,
|
||||
void (*set_heap_size)(size_t, void*),
|
||||
void *data) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
sizer->live_bytes = live_bytes;
|
||||
sizer->smoothed_live_bytes *= 1.0 - sizer->collection_smoothing_factor;
|
||||
sizer->smoothed_live_bytes += sizer->collection_smoothing_factor * live_bytes;
|
||||
sizer->smoothed_pause_time *= 1.0 - sizer->collection_smoothing_factor;
|
||||
sizer->smoothed_pause_time += sizer->collection_smoothing_factor * pause_ns;
|
||||
set_heap_size(gc_adaptive_heap_sizer_calculate_size(sizer), data);
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
}
|
||||
|
||||
static void*
|
||||
gc_adaptive_heap_sizer_thread(void *data) {
|
||||
struct gc_adaptive_heap_sizer *sizer = data;
|
||||
uint64_t last_bytes_allocated =
|
||||
sizer->get_allocation_counter(sizer->callback_data);
|
||||
uint64_t last_heartbeat = gc_platform_monotonic_nanoseconds();
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
while (!sizer->stopping) {
|
||||
{
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_REALTIME, &ts)) {
|
||||
perror("adaptive heap sizer thread: failed to get time!");
|
||||
break;
|
||||
}
|
||||
ts.tv_sec += 1;
|
||||
pthread_cond_timedwait(&sizer->cond, &sizer->lock, &ts);
|
||||
}
|
||||
uint64_t bytes_allocated =
|
||||
sizer->get_allocation_counter(sizer->callback_data);
|
||||
uint64_t heartbeat = gc_platform_monotonic_nanoseconds();
|
||||
double rate = (double) (bytes_allocated - last_bytes_allocated) /
|
||||
(double) (heartbeat - last_heartbeat);
|
||||
// Just smooth the rate, under the assumption that the denominator is almost
|
||||
// always 1.
|
||||
sizer->smoothed_allocation_rate *= 1.0 - sizer->allocation_smoothing_factor;
|
||||
sizer->smoothed_allocation_rate += rate * sizer->allocation_smoothing_factor;
|
||||
last_heartbeat = heartbeat;
|
||||
last_bytes_allocated = bytes_allocated;
|
||||
sizer->set_heap_size(gc_adaptive_heap_sizer_calculate_size(sizer),
|
||||
sizer->callback_data);
|
||||
}
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct gc_adaptive_heap_sizer*
|
||||
gc_make_adaptive_heap_sizer(double expansiveness,
|
||||
uint64_t (*get_allocation_counter)(void *),
|
||||
void (*set_heap_size)(size_t , void *),
|
||||
void *callback_data) {
|
||||
struct gc_adaptive_heap_sizer *sizer;
|
||||
sizer = malloc(sizeof(*sizer));
|
||||
if (!sizer)
|
||||
GC_CRASH();
|
||||
memset(sizer, 0, sizeof(*sizer));
|
||||
sizer->get_allocation_counter = get_allocation_counter;
|
||||
sizer->set_heap_size = set_heap_size;
|
||||
sizer->callback_data = callback_data;
|
||||
// Baseline estimate of GC speed: 10 MB/ms, or 10 bytes/ns. However since we
|
||||
// observe this speed by separately noisy measurements, we have to provide
|
||||
// defaults for numerator and denominator; estimate 2ms for initial GC pauses
|
||||
// for 20 MB of live data during program startup.
|
||||
sizer->smoothed_pause_time = 2 * 1000 * 1000;
|
||||
sizer->smoothed_live_bytes = 20 * 1024 * 1024;
|
||||
// Baseline estimate of allocation rate during startup: 50 MB in 10ms, or 5
|
||||
// bytes/ns.
|
||||
sizer->smoothed_allocation_rate = 5;
|
||||
sizer->collection_smoothing_factor = 0.5;
|
||||
sizer->allocation_smoothing_factor = 0.95;
|
||||
sizer->minimum_multiplier = 1.1;
|
||||
sizer->maximum_multiplier = 5;
|
||||
sizer->minimum_free_space = 4 * 1024 * 1024;
|
||||
sizer->expansiveness = expansiveness;
|
||||
pthread_mutex_init(&sizer->lock, NULL);
|
||||
pthread_cond_init(&sizer->cond, NULL);
|
||||
if (pthread_create(&sizer->thread, NULL, gc_adaptive_heap_sizer_thread,
|
||||
sizer)) {
|
||||
perror("spawning adaptive heap size thread failed");
|
||||
GC_CRASH();
|
||||
}
|
||||
return sizer;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_destroy_adaptive_heap_sizer(struct gc_adaptive_heap_sizer *sizer) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
GC_ASSERT(!sizer->stopping);
|
||||
sizer->stopping = 1;
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
pthread_cond_signal(&sizer->cond);
|
||||
pthread_join(sizer->thread, NULL);
|
||||
free(sizer);
|
||||
}
|
||||
|
||||
#endif // ADAPTIVE_HEAP_SIZER_H
|
|
@ -117,7 +117,7 @@ struct copy_space {
|
|||
size_t allocated_bytes_at_last_gc;
|
||||
size_t fragmentation_at_last_gc;
|
||||
struct extents *extents;
|
||||
struct copy_space_slab *slabs;
|
||||
struct copy_space_slab **slabs;
|
||||
size_t nslabs;
|
||||
};
|
||||
|
||||
|
@ -231,17 +231,25 @@ copy_space_page_out_blocks_until_memory_released(struct copy_space *space) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_reacquire_memory(struct copy_space *space, size_t bytes) {
|
||||
static ssize_t
|
||||
copy_space_maybe_reacquire_memory(struct copy_space *space, size_t bytes) {
|
||||
ssize_t pending =
|
||||
atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes;
|
||||
while (pending + COPY_SPACE_BLOCK_SIZE <= 0) {
|
||||
struct copy_space_block *block = copy_space_page_in_block(space);
|
||||
GC_ASSERT(block);
|
||||
if (!block) break;
|
||||
copy_space_push_empty_block(space, block);
|
||||
pending = (atomic_fetch_add(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE)
|
||||
pending = (atomic_fetch_add(&space->bytes_to_page_out,
|
||||
COPY_SPACE_BLOCK_SIZE)
|
||||
+ COPY_SPACE_BLOCK_SIZE);
|
||||
}
|
||||
return pending;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_reacquire_memory(struct copy_space *space, size_t bytes) {
|
||||
ssize_t pending = copy_space_maybe_reacquire_memory(space, bytes);
|
||||
GC_ASSERT(pending + COPY_SPACE_BLOCK_SIZE > 0);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@ -395,6 +403,12 @@ copy_space_finish_gc(struct copy_space *space) {
|
|||
space->fragmentation_at_last_gc = space->fragmentation;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_add_to_allocation_counter(struct copy_space *space,
|
||||
uintptr_t *counter) {
|
||||
*counter += space->allocated_bytes - space->allocated_bytes_at_last_gc;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_gc_during_evacuation(void *data) {
|
||||
// If space is really tight and reordering of objects during
|
||||
|
@ -578,6 +592,50 @@ copy_space_allocate_slabs(size_t nslabs) {
|
|||
return (struct copy_space_slab*) aligned_base;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_add_slabs(struct copy_space *space, struct copy_space_slab *slabs,
|
||||
size_t nslabs) {
|
||||
size_t old_size = space->nslabs * sizeof(struct copy_space_slab*);
|
||||
size_t additional_size = nslabs * sizeof(struct copy_space_slab*);
|
||||
space->extents = extents_adjoin(space->extents, slabs,
|
||||
nslabs * sizeof(struct copy_space_slab));
|
||||
space->slabs = realloc(space->slabs, old_size + additional_size);
|
||||
if (!space->slabs)
|
||||
GC_CRASH();
|
||||
while (nslabs--)
|
||||
space->slabs[space->nslabs++] = slabs++;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_shrink(struct copy_space *space, size_t bytes) {
|
||||
ssize_t pending = copy_space_request_release_memory(space, bytes);
|
||||
copy_space_page_out_blocks_until_memory_released(space);
|
||||
|
||||
// It still may be the case we need to page out more blocks. Only collection
|
||||
// can help us then!
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_expand(struct copy_space *space, size_t bytes) {
|
||||
ssize_t to_acquire = -copy_space_maybe_reacquire_memory(space, bytes);
|
||||
if (to_acquire <= 0) return;
|
||||
size_t reserved = align_up(to_acquire, COPY_SPACE_SLAB_SIZE);
|
||||
size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE;
|
||||
struct copy_space_slab *slabs = copy_space_allocate_slabs(nslabs);
|
||||
copy_space_add_slabs(space, slabs, nslabs);
|
||||
|
||||
for (size_t slab = 0; slab < nslabs; slab++) {
|
||||
for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) {
|
||||
struct copy_space_block *block = &slabs[slab].headers[idx];
|
||||
block->all_zeroes[0] = block->all_zeroes[1] = 1;
|
||||
block->in_core = 0;
|
||||
copy_space_push_paged_out_block(space, block);
|
||||
reserved -= COPY_SPACE_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
copy_space_reacquire_memory(space, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_init(struct copy_space *space, size_t size, int atomic) {
|
||||
size = align_up(size, COPY_SPACE_BLOCK_SIZE);
|
||||
|
@ -599,9 +657,7 @@ copy_space_init(struct copy_space *space, size_t size, int atomic) {
|
|||
space->allocated_bytes_at_last_gc = 0;
|
||||
space->fragmentation_at_last_gc = 0;
|
||||
space->extents = extents_allocate(10);
|
||||
extents_adjoin(space->extents, slabs, reserved);
|
||||
space->slabs = slabs;
|
||||
space->nslabs = nslabs;
|
||||
copy_space_add_slabs(space, slabs, nslabs);
|
||||
for (size_t slab = 0; slab < nslabs; slab++) {
|
||||
for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) {
|
||||
struct copy_space_block *block = &slabs[slab].headers[idx];
|
||||
|
|
|
@ -9,4 +9,8 @@
|
|||
#include "gc-finalizer-internal.h"
|
||||
#include "gc-options-internal.h"
|
||||
|
||||
uint64_t gc_heap_total_bytes_allocated(struct gc_heap *heap);
|
||||
void gc_mutator_adjust_heap_size(struct gc_mutator *mut, uint64_t new_size);
|
||||
|
||||
|
||||
#endif // GC_INTERNAL_H
|
||||
|
|
|
@ -12,7 +12,7 @@ struct gc_common_options {
|
|||
size_t heap_size;
|
||||
size_t maximum_heap_size;
|
||||
double heap_size_multiplier;
|
||||
double heap_frugality;
|
||||
double heap_expansiveness;
|
||||
int parallelism;
|
||||
};
|
||||
|
||||
|
|
|
@ -25,8 +25,8 @@
|
|||
#define FOR_EACH_DOUBLE_GC_OPTION(M) \
|
||||
M(HEAP_SIZE_MULTIPLIER, heap_size_multiplier, "heap-size-multiplier", \
|
||||
double, double, 1.75, 1.0, 1e6) \
|
||||
M(HEAP_FRUGALITY, heap_frugality, "heap-frugality", \
|
||||
double, double, 1e-1, 1e-6, 1e6)
|
||||
M(HEAP_EXPANSIVENESS, heap_expansiveness, "heap-expansiveness", \
|
||||
double, double, 1.0, 0.0, 50.0)
|
||||
|
||||
typedef int gc_option_int;
|
||||
typedef size_t gc_option_size;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define GC_IMPL 1
|
||||
|
@ -110,3 +111,13 @@ int gc_platform_processor_count(void) {
|
|||
return 1;
|
||||
return CPU_COUNT(&set);
|
||||
}
|
||||
|
||||
uint64_t gc_platform_monotonic_nanoseconds(void) {
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &ts))
|
||||
GC_CRASH();
|
||||
uint64_t s = ts.tv_sec;
|
||||
uint64_t ns = ts.tv_nsec;
|
||||
uint64_t ns_per_sec = 1000000000;
|
||||
return s * ns_per_sec + ns;
|
||||
}
|
||||
|
|
|
@ -21,5 +21,6 @@ void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start,
|
|||
struct gc_heap *heap,
|
||||
void *data);
|
||||
GC_INTERNAL int gc_platform_processor_count(void);
|
||||
GC_INTERNAL uint64_t gc_platform_monotonic_nanoseconds(void);
|
||||
|
||||
#endif // GC_PLATFORM_H
|
||||
|
|
58
src/growable-heap-sizer.h
Normal file
58
src/growable-heap-sizer.h
Normal file
|
@ -0,0 +1,58 @@
|
|||
#ifndef GROWABLE_HEAP_SIZER_H
|
||||
#define GROWABLE_HEAP_SIZER_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "heap-sizer.h"
|
||||
|
||||
// This is a simple heap-sizing algorithm that will grow the heap if it is
|
||||
// smaller than a given multiplier of the live data size. It does not shrink
|
||||
// the heap.
|
||||
|
||||
struct gc_growable_heap_sizer {
|
||||
double multiplier;
|
||||
pthread_mutex_t lock;
|
||||
};
|
||||
|
||||
static void
|
||||
gc_growable_heap_sizer_set_multiplier(struct gc_growable_heap_sizer *sizer,
|
||||
double multiplier) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
sizer->multiplier = multiplier;
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
gc_growable_heap_sizer_on_gc(struct gc_growable_heap_sizer *sizer,
|
||||
size_t heap_size, size_t live_bytes,
|
||||
uint64_t pause_ns,
|
||||
void (*set_heap_size)(size_t, void*),
|
||||
void *data) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
size_t target_size = live_bytes * sizer->multiplier;
|
||||
if (target_size > heap_size)
|
||||
set_heap_size(target_size, data);
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
}
|
||||
|
||||
static struct gc_growable_heap_sizer*
|
||||
gc_make_growable_heap_sizer(double multiplier) {
|
||||
struct gc_growable_heap_sizer *sizer;
|
||||
sizer = malloc(sizeof(*sizer));
|
||||
if (!sizer)
|
||||
GC_CRASH();
|
||||
memset(sizer, 0, sizeof(*sizer));
|
||||
sizer->multiplier = multiplier;
|
||||
pthread_mutex_init(&sizer->lock, NULL);
|
||||
return sizer;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_destroy_growable_heap_sizer(struct gc_growable_heap_sizer *sizer) {
|
||||
free(sizer);
|
||||
}
|
||||
|
||||
#endif // GROWABLE_HEAP_SIZER_H
|
71
src/heap-sizer.h
Normal file
71
src/heap-sizer.h
Normal file
|
@ -0,0 +1,71 @@
|
|||
#ifndef HEAP_SIZER_H
|
||||
#define HEAP_SIZER_H
|
||||
|
||||
#include "gc-api.h"
|
||||
|
||||
#include "gc-options-internal.h"
|
||||
#include "growable-heap-sizer.h"
|
||||
#include "adaptive-heap-sizer.h"
|
||||
|
||||
struct gc_heap_sizer {
|
||||
enum gc_heap_size_policy policy;
|
||||
union {
|
||||
struct gc_growable_heap_sizer* growable;
|
||||
struct gc_adaptive_heap_sizer* adaptive;
|
||||
};
|
||||
};
|
||||
|
||||
static struct gc_heap_sizer
|
||||
gc_make_heap_sizer(struct gc_heap *heap,
|
||||
const struct gc_common_options *options,
|
||||
uint64_t (*get_allocation_counter_from_thread)(void*),
|
||||
void (*set_heap_size_from_thread)(size_t, void*),
|
||||
void *data) {
|
||||
struct gc_heap_sizer ret = { options->heap_size_policy, };
|
||||
switch (options->heap_size_policy) {
|
||||
case GC_HEAP_SIZE_FIXED:
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_GROWABLE:
|
||||
ret.growable = gc_make_growable_heap_sizer(options->heap_size_multiplier);
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_ADAPTIVE:
|
||||
ret.adaptive =
|
||||
gc_make_adaptive_heap_sizer (options->heap_expansiveness,
|
||||
get_allocation_counter_from_thread,
|
||||
set_heap_size_from_thread,
|
||||
heap);
|
||||
break;
|
||||
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_heap_sizer_on_gc(struct gc_heap_sizer sizer, size_t heap_size,
|
||||
size_t live_bytes, size_t pause_ns,
|
||||
void (*set_heap_size)(size_t, void*), void *data) {
|
||||
switch (sizer.policy) {
|
||||
case GC_HEAP_SIZE_FIXED:
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_GROWABLE:
|
||||
gc_growable_heap_sizer_on_gc(sizer.growable, heap_size, live_bytes,
|
||||
pause_ns, set_heap_size, data);
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_ADAPTIVE:
|
||||
gc_adaptive_heap_sizer_on_gc(sizer.adaptive, live_bytes, pause_ns,
|
||||
set_heap_size, data);
|
||||
break;
|
||||
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // HEAP_SIZER_H
|
|
@ -218,6 +218,14 @@ static void large_object_space_finish_gc(struct large_object_space *space,
|
|||
pthread_mutex_unlock(&space->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_add_to_allocation_counter(struct large_object_space *space,
|
||||
uint64_t *counter) {
|
||||
size_t pages = space->total_pages - space->free_pages;
|
||||
pages -= space->live_pages_at_last_collection;
|
||||
*counter += pages << space->page_size_log2;
|
||||
}
|
||||
|
||||
static inline struct gc_ref
|
||||
large_object_space_mark_conservative_ref(struct large_object_space *space,
|
||||
struct gc_conservative_ref ref,
|
||||
|
|
130
src/mmc.c
130
src/mmc.c
|
@ -15,6 +15,7 @@
|
|||
#include "gc-platform.h"
|
||||
#include "gc-stack.h"
|
||||
#include "gc-trace.h"
|
||||
#include "heap-sizer.h"
|
||||
#include "large-object-space.h"
|
||||
#include "nofl-space.h"
|
||||
#if GC_PARALLEL
|
||||
|
@ -36,6 +37,8 @@ struct gc_heap {
|
|||
pthread_cond_t collector_cond;
|
||||
pthread_cond_t mutator_cond;
|
||||
size_t size;
|
||||
size_t total_allocated_bytes_at_last_gc;
|
||||
size_t size_at_last_gc;
|
||||
int collecting;
|
||||
int check_pending_ephemerons;
|
||||
struct gc_pending_ephemerons *pending_ephemerons;
|
||||
|
@ -55,6 +58,7 @@ struct gc_heap {
|
|||
double minimum_major_gc_yield_threshold;
|
||||
double pending_ephemerons_size_factor;
|
||||
double pending_ephemerons_size_slop;
|
||||
struct gc_heap_sizer sizer;
|
||||
struct gc_event_listener event_listener;
|
||||
void *event_listener_data;
|
||||
};
|
||||
|
@ -450,27 +454,32 @@ maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
|
|||
pause_mutator_for_collection_without_lock(mut);
|
||||
}
|
||||
|
||||
static int maybe_grow_heap(struct gc_heap *heap) {
|
||||
return 0;
|
||||
static void
|
||||
resize_heap(size_t new_size, void *data) {
|
||||
struct gc_heap *heap = data;
|
||||
if (new_size == heap->size)
|
||||
return;
|
||||
DEBUG("------ resizing heap\n");
|
||||
DEBUG("------ old heap size: %zu bytes\n", heap->size);
|
||||
DEBUG("------ new heap size: %zu bytes\n", new_size);
|
||||
if (new_size < heap->size)
|
||||
nofl_space_shrink(heap_nofl_space(heap), heap->size - new_size);
|
||||
else
|
||||
nofl_space_expand(heap_nofl_space(heap), new_size - heap->size);
|
||||
|
||||
heap->size = new_size;
|
||||
HEAP_EVENT(heap, heap_resized, new_size);
|
||||
}
|
||||
|
||||
static double
|
||||
heap_last_gc_yield(struct gc_heap *heap) {
|
||||
struct nofl_space *nofl_space = heap_nofl_space(heap);
|
||||
size_t nofl_yield = nofl_space_yield(nofl_space);
|
||||
size_t evacuation_reserve = nofl_space_evacuation_reserve_bytes(nofl_space);
|
||||
// FIXME: Size nofl evacuation reserve based on size of nofl space,
|
||||
// not heap size.
|
||||
size_t minimum_evacuation_reserve =
|
||||
heap->size * nofl_space->evacuation_minimum_reserve;
|
||||
if (evacuation_reserve > minimum_evacuation_reserve)
|
||||
nofl_yield += evacuation_reserve - minimum_evacuation_reserve;
|
||||
struct large_object_space *lospace = heap_large_object_space(heap);
|
||||
size_t lospace_yield = lospace->pages_freed_by_last_collection;
|
||||
lospace_yield <<= lospace->page_size_log2;
|
||||
size_t live_size =
|
||||
nofl_space_live_size_at_last_collection(heap_nofl_space(heap)) +
|
||||
large_object_space_size_at_last_collection(heap_large_object_space(heap));
|
||||
|
||||
double yield = nofl_yield + lospace_yield;
|
||||
return yield / heap->size;
|
||||
if (live_size > heap->size_at_last_gc)
|
||||
return 0;
|
||||
return 1.0 - ((double) live_size) / heap->size_at_last_gc;
|
||||
}
|
||||
|
||||
static double
|
||||
|
@ -480,31 +489,29 @@ heap_fragmentation(struct gc_heap *heap) {
|
|||
return ((double)fragmentation) / heap->size;
|
||||
}
|
||||
|
||||
static size_t
|
||||
heap_estimate_live_data_after_gc(struct gc_heap *heap,
|
||||
size_t last_live_bytes,
|
||||
double last_yield) {
|
||||
size_t bytes =
|
||||
nofl_space_estimate_live_bytes_after_gc(heap_nofl_space(heap),
|
||||
last_yield)
|
||||
+ large_object_space_size_at_last_collection(heap_large_object_space(heap));
|
||||
if (bytes < last_live_bytes)
|
||||
return last_live_bytes;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
static void
|
||||
detect_out_of_memory(struct gc_heap *heap) {
|
||||
struct nofl_space *nofl_space = heap_nofl_space(heap);
|
||||
struct large_object_space *lospace = heap_large_object_space(heap);
|
||||
|
||||
if (heap->count == 0)
|
||||
detect_out_of_memory(struct gc_heap *heap, uintptr_t allocation_since_last_gc) {
|
||||
if (heap->sizer.policy != GC_HEAP_SIZE_FIXED)
|
||||
return;
|
||||
|
||||
double last_yield = heap_last_gc_yield(heap);
|
||||
double fragmentation = heap_fragmentation(heap);
|
||||
|
||||
double yield_epsilon = NOFL_BLOCK_SIZE * 1.0 / heap->size;
|
||||
double fragmentation_epsilon = LARGE_OBJECT_THRESHOLD * 1.0 / NOFL_BLOCK_SIZE;
|
||||
|
||||
if (last_yield - fragmentation > yield_epsilon)
|
||||
if (allocation_since_last_gc > nofl_space_fragmentation(heap_nofl_space(heap)))
|
||||
return;
|
||||
|
||||
if (fragmentation > fragmentation_epsilon
|
||||
&& atomic_load(&nofl_space->evacuation_targets.count))
|
||||
return;
|
||||
|
||||
// No yield in last gc and we do not expect defragmentation to
|
||||
// be able to yield more space: out of memory.
|
||||
fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
|
||||
heap->size, nofl_space->nslabs);
|
||||
// No allocation since last gc: out of memory.
|
||||
fprintf(stderr, "ran out of space, heap size %zu\n", heap->size);
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
|
@ -731,10 +738,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) {
|
|||
struct nofl_space *nofl_space = heap_nofl_space(heap);
|
||||
struct large_object_space *lospace = heap_large_object_space(heap);
|
||||
struct gc_extern_space *exspace = heap_extern_space(heap);
|
||||
if (maybe_grow_heap(heap)) {
|
||||
DEBUG("grew heap instead of collecting #%ld:\n", heap->count);
|
||||
return;
|
||||
}
|
||||
uint64_t start_ns = gc_platform_monotonic_nanoseconds();
|
||||
MUTATOR_EVENT(mut, mutator_cause_gc);
|
||||
DEBUG("start collect #%ld:\n", heap->count);
|
||||
HEAP_EVENT(heap, requesting_stop);
|
||||
|
@ -747,6 +751,11 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) {
|
|||
determine_collection_kind(heap, requested_kind);
|
||||
int is_minor = gc_kind == GC_COLLECTION_MINOR;
|
||||
HEAP_EVENT(heap, prepare_gc, gc_kind);
|
||||
uint64_t allocation_counter = 0;
|
||||
nofl_space_add_to_allocation_counter(nofl_space, &allocation_counter);
|
||||
large_object_space_add_to_allocation_counter(lospace, &allocation_counter);
|
||||
heap->total_allocated_bytes_at_last_gc += allocation_counter;
|
||||
detect_out_of_memory(heap, allocation_counter);
|
||||
nofl_space_prepare_gc(nofl_space, gc_kind);
|
||||
large_object_space_start_gc(lospace, is_minor);
|
||||
gc_extern_space_start_gc(exspace, is_minor);
|
||||
|
@ -754,9 +763,9 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) {
|
|||
gc_tracer_prepare(&heap->tracer);
|
||||
double yield = heap_last_gc_yield(heap);
|
||||
double fragmentation = heap_fragmentation(heap);
|
||||
HEAP_EVENT(heap, live_data_size, heap->size * (1 - yield));
|
||||
size_t live_bytes = heap->size * (1.0 - yield);
|
||||
HEAP_EVENT(heap, live_data_size, live_bytes);
|
||||
DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation);
|
||||
detect_out_of_memory(heap);
|
||||
enqueue_pinned_roots(heap);
|
||||
// Eagerly trace pinned roots if we are going to relocate objects.
|
||||
if (gc_kind == GC_COLLECTION_COMPACTING)
|
||||
|
@ -780,6 +789,13 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) {
|
|||
gc_extern_space_finish_gc(exspace, is_minor);
|
||||
heap->count++;
|
||||
heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection);
|
||||
uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns;
|
||||
size_t live_bytes_estimate =
|
||||
heap_estimate_live_data_after_gc(heap, live_bytes, yield);
|
||||
DEBUG("--- total live bytes estimate: %zu\n", live_bytes_estimate);
|
||||
gc_heap_sizer_on_gc(heap->sizer, heap->size, live_bytes_estimate, pause_ns,
|
||||
resize_heap, heap);
|
||||
heap->size_at_last_gc = heap->size;
|
||||
HEAP_EVENT(heap, restarting_mutators);
|
||||
allow_mutators_to_continue(heap);
|
||||
}
|
||||
|
@ -971,7 +987,7 @@ heap_init(struct gc_heap *heap, const struct gc_options *options) {
|
|||
pthread_mutex_init(&heap->lock, NULL);
|
||||
pthread_cond_init(&heap->mutator_cond, NULL);
|
||||
pthread_cond_init(&heap->collector_cond, NULL);
|
||||
heap->size = options->common.heap_size;
|
||||
heap->size = heap->size_at_last_gc = options->common.heap_size;
|
||||
|
||||
if (!gc_tracer_init(&heap->tracer, heap, options->common.parallelism))
|
||||
GC_CRASH();
|
||||
|
@ -995,6 +1011,24 @@ heap_init(struct gc_heap *heap, const struct gc_options *options) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
static uint64_t allocation_counter_from_thread(void *data) {
|
||||
struct gc_heap *heap = data;
|
||||
uint64_t ret = heap->total_allocated_bytes_at_last_gc;
|
||||
if (pthread_mutex_trylock(&heap->lock)) return ret;
|
||||
nofl_space_add_to_allocation_counter(heap_nofl_space(heap), &ret);
|
||||
large_object_space_add_to_allocation_counter(heap_large_object_space(heap),
|
||||
&ret);
|
||||
pthread_mutex_unlock(&heap->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_heap_size_from_thread(size_t size, void *data) {
|
||||
struct gc_heap *heap = data;
|
||||
if (pthread_mutex_trylock(&heap->lock)) return;
|
||||
resize_heap(size, heap);
|
||||
pthread_mutex_unlock(&heap->lock);
|
||||
}
|
||||
|
||||
int
|
||||
gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
||||
struct gc_heap **heap, struct gc_mutator **mut,
|
||||
|
@ -1015,11 +1049,6 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
|||
NOFL_BLOCK_SIZE / NOFL_REMSET_BYTES_PER_BLOCK);
|
||||
}
|
||||
|
||||
if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
|
||||
fprintf(stderr, "fixed heap size is currently required\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
*heap = calloc(1, sizeof(struct gc_heap));
|
||||
if (!*heap) GC_CRASH();
|
||||
|
||||
|
@ -1042,6 +1071,11 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
|||
if (!large_object_space_init(heap_large_object_space(*heap), *heap))
|
||||
GC_CRASH();
|
||||
|
||||
(*heap)->sizer = gc_make_heap_sizer(*heap, &options->common,
|
||||
allocation_counter_from_thread,
|
||||
set_heap_size_from_thread,
|
||||
(*heap));
|
||||
|
||||
*mut = calloc(1, sizeof(struct gc_mutator));
|
||||
if (!*mut) GC_CRASH();
|
||||
gc_stack_init(&(*mut)->stack, stack_base);
|
||||
|
|
204
src/nofl-space.h
204
src/nofl-space.h
|
@ -75,6 +75,7 @@ enum nofl_block_summary_flag {
|
|||
NOFL_BLOCK_EVACUATE = 0x1,
|
||||
NOFL_BLOCK_ZERO = 0x2,
|
||||
NOFL_BLOCK_UNAVAILABLE = 0x4,
|
||||
NOFL_BLOCK_PAGED_OUT = 0x8,
|
||||
NOFL_BLOCK_FLAG_UNUSED_3 = 0x8,
|
||||
NOFL_BLOCK_FLAG_UNUSED_4 = 0x10,
|
||||
NOFL_BLOCK_FLAG_UNUSED_5 = 0x20,
|
||||
|
@ -95,7 +96,7 @@ struct nofl_block_summary {
|
|||
// Counters related to previous collection: how many holes there
|
||||
// were, and how much space they had.
|
||||
uint16_t hole_count;
|
||||
uint16_t free_granules;
|
||||
uint16_t hole_granules;
|
||||
// Counters related to allocation since previous collection:
|
||||
// wasted space due to fragmentation. Also used by blocks on the
|
||||
// "partly full" list, which have zero holes_with_fragmentation
|
||||
|
@ -158,7 +159,9 @@ struct nofl_space {
|
|||
ssize_t pending_unavailable_bytes; // atomically
|
||||
struct nofl_slab **slabs;
|
||||
size_t nslabs;
|
||||
uintptr_t granules_freed_by_last_collection; // atomically
|
||||
uintptr_t old_generation_granules; // atomically
|
||||
uintptr_t survivor_granules_at_last_collection; // atomically
|
||||
uintptr_t allocated_granules_since_last_collection; // atomically
|
||||
uintptr_t fragmentation_granules_since_last_collection; // atomically
|
||||
};
|
||||
|
||||
|
@ -271,7 +274,7 @@ nofl_block_summary_for_addr(uintptr_t addr) {
|
|||
static uintptr_t
|
||||
nofl_block_summary_has_flag(struct nofl_block_summary *summary,
|
||||
enum nofl_block_summary_flag flag) {
|
||||
return summary->next_and_flags & flag;
|
||||
return (summary->next_and_flags & flag) == flag;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -404,12 +407,23 @@ nofl_block_count(struct nofl_block_list *list) {
|
|||
return atomic_load_explicit(&list->count, memory_order_acquire);
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_push_paged_out_block(struct nofl_space *space,
|
||||
struct nofl_block_ref block) {
|
||||
GC_ASSERT(nofl_block_has_flag(block,
|
||||
NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT));
|
||||
nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE);
|
||||
nofl_push_block(&space->unavailable, block);
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_push_unavailable_block(struct nofl_space *space,
|
||||
struct nofl_block_ref block) {
|
||||
nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_UNAVAILABLE);
|
||||
if (!nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT)) {
|
||||
nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
|
||||
madvise((void*)block.addr, NOFL_BLOCK_SIZE, MADV_DONTNEED);
|
||||
nofl_push_block(&space->unavailable, block);
|
||||
}
|
||||
nofl_push_paged_out_block(space, block);
|
||||
}
|
||||
|
||||
static struct nofl_block_ref
|
||||
|
@ -483,7 +497,7 @@ nofl_should_promote_block(struct nofl_space *space,
|
|||
// until after the next full GC.
|
||||
if (!GC_GENERATIONAL) return 0;
|
||||
size_t threshold = NOFL_GRANULES_PER_BLOCK * space->promotion_threshold;
|
||||
return block.summary->free_granules < threshold;
|
||||
return block.summary->hole_granules < threshold;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -492,8 +506,10 @@ nofl_allocator_release_full_block(struct nofl_allocator *alloc,
|
|||
GC_ASSERT(nofl_allocator_has_block(alloc));
|
||||
struct nofl_block_ref block = alloc->block;
|
||||
GC_ASSERT(alloc->alloc == alloc->sweep);
|
||||
atomic_fetch_add(&space->granules_freed_by_last_collection,
|
||||
block.summary->free_granules);
|
||||
atomic_fetch_add(&space->allocated_granules_since_last_collection,
|
||||
block.summary->hole_granules);
|
||||
atomic_fetch_add(&space->survivor_granules_at_last_collection,
|
||||
NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules);
|
||||
atomic_fetch_add(&space->fragmentation_granules_since_last_collection,
|
||||
block.summary->fragmentation_granules);
|
||||
|
||||
|
@ -515,15 +531,13 @@ nofl_allocator_release_full_evacuation_target(struct nofl_allocator *alloc,
|
|||
size_t hole_size = alloc->sweep - alloc->alloc;
|
||||
// FIXME: Check how this affects statistics.
|
||||
GC_ASSERT_EQ(block.summary->hole_count, 1);
|
||||
GC_ASSERT_EQ(block.summary->free_granules, NOFL_GRANULES_PER_BLOCK);
|
||||
atomic_fetch_add(&space->granules_freed_by_last_collection,
|
||||
GC_ASSERT_EQ(block.summary->hole_granules, NOFL_GRANULES_PER_BLOCK);
|
||||
atomic_fetch_add(&space->old_generation_granules,
|
||||
NOFL_GRANULES_PER_BLOCK);
|
||||
if (hole_size) {
|
||||
hole_size >>= NOFL_GRANULE_SIZE_LOG_2;
|
||||
block.summary->holes_with_fragmentation = 1;
|
||||
block.summary->fragmentation_granules = hole_size / NOFL_GRANULE_SIZE;
|
||||
atomic_fetch_add(&space->fragmentation_granules_since_last_collection,
|
||||
block.summary->fragmentation_granules);
|
||||
} else {
|
||||
GC_ASSERT_EQ(block.summary->fragmentation_granules, 0);
|
||||
GC_ASSERT_EQ(block.summary->holes_with_fragmentation, 0);
|
||||
|
@ -570,14 +584,14 @@ nofl_allocator_acquire_empty_block(struct nofl_allocator *alloc,
|
|||
if (nofl_block_is_null(block))
|
||||
return 0;
|
||||
block.summary->hole_count = 1;
|
||||
block.summary->free_granules = NOFL_GRANULES_PER_BLOCK;
|
||||
block.summary->hole_granules = NOFL_GRANULES_PER_BLOCK;
|
||||
block.summary->holes_with_fragmentation = 0;
|
||||
block.summary->fragmentation_granules = 0;
|
||||
alloc->block = block;
|
||||
alloc->alloc = block.addr;
|
||||
alloc->sweep = block.addr + NOFL_BLOCK_SIZE;
|
||||
if (nofl_block_has_flag(block, NOFL_BLOCK_ZERO))
|
||||
nofl_block_clear_flag(block, NOFL_BLOCK_ZERO);
|
||||
nofl_block_clear_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
|
||||
else
|
||||
nofl_clear_memory(block.addr, NOFL_BLOCK_SIZE);
|
||||
return NOFL_GRANULES_PER_BLOCK;
|
||||
|
@ -637,22 +651,22 @@ nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc,
|
|||
return 0;
|
||||
}
|
||||
|
||||
size_t free_granules = scan_for_byte(metadata, limit_granules, sweep_mask);
|
||||
size_t free_bytes = free_granules * NOFL_GRANULE_SIZE;
|
||||
GC_ASSERT(free_granules);
|
||||
GC_ASSERT(free_granules <= limit_granules);
|
||||
size_t hole_granules = scan_for_byte(metadata, limit_granules, sweep_mask);
|
||||
size_t free_bytes = hole_granules * NOFL_GRANULE_SIZE;
|
||||
GC_ASSERT(hole_granules);
|
||||
GC_ASSERT(hole_granules <= limit_granules);
|
||||
|
||||
memset(metadata, 0, free_granules);
|
||||
memset(metadata, 0, hole_granules);
|
||||
memset((char*)sweep, 0, free_bytes);
|
||||
|
||||
alloc->block.summary->hole_count++;
|
||||
GC_ASSERT(free_granules <=
|
||||
NOFL_GRANULES_PER_BLOCK - alloc->block.summary->free_granules);
|
||||
alloc->block.summary->free_granules += free_granules;
|
||||
GC_ASSERT(hole_granules <=
|
||||
NOFL_GRANULES_PER_BLOCK - alloc->block.summary->hole_granules);
|
||||
alloc->block.summary->hole_granules += hole_granules;
|
||||
|
||||
alloc->alloc = sweep;
|
||||
alloc->sweep = sweep + free_bytes;
|
||||
return free_granules;
|
||||
return hole_granules;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -724,7 +738,7 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc,
|
|||
// at the last collection. As we allocate we'll record how
|
||||
// many granules were wasted because of fragmentation.
|
||||
alloc->block.summary->hole_count = 0;
|
||||
alloc->block.summary->free_granules = 0;
|
||||
alloc->block.summary->hole_granules = 0;
|
||||
alloc->block.summary->holes_with_fragmentation = 0;
|
||||
alloc->block.summary->fragmentation_granules = 0;
|
||||
size_t granules =
|
||||
|
@ -875,13 +889,53 @@ nofl_space_clear_remembered_set(struct nofl_space *space) {
|
|||
|
||||
static void
|
||||
nofl_space_reset_statistics(struct nofl_space *space) {
|
||||
space->granules_freed_by_last_collection = 0;
|
||||
space->survivor_granules_at_last_collection = 0;
|
||||
space->allocated_granules_since_last_collection = 0;
|
||||
space->fragmentation_granules_since_last_collection = 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
nofl_space_yield(struct nofl_space *space) {
|
||||
return space->granules_freed_by_last_collection * NOFL_GRANULE_SIZE;
|
||||
nofl_space_live_size_at_last_collection(struct nofl_space *space) {
|
||||
size_t granules = space->old_generation_granules
|
||||
+ space->survivor_granules_at_last_collection;
|
||||
return granules * NOFL_GRANULE_SIZE;
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_space_add_to_allocation_counter(struct nofl_space *space,
|
||||
uint64_t *counter) {
|
||||
*counter +=
|
||||
atomic_load_explicit(&space->allocated_granules_since_last_collection,
|
||||
memory_order_relaxed) * NOFL_GRANULE_SIZE;
|
||||
}
|
||||
|
||||
static size_t
|
||||
nofl_space_estimate_live_bytes_after_gc(struct nofl_space *space,
|
||||
double last_yield)
|
||||
{
|
||||
// The nofl space mostly traces via marking, and as such doesn't precisely
|
||||
// know the live data size until after sweeping. But it is important to
|
||||
// promptly compute the live size so that we can grow the heap if
|
||||
// appropriate. Therefore sometimes we will estimate the live data size
|
||||
// instead of measuring it precisely.
|
||||
size_t bytes = 0;
|
||||
bytes += nofl_block_count(&space->full) * NOFL_BLOCK_SIZE;
|
||||
bytes += nofl_block_count(&space->partly_full) * NOFL_BLOCK_SIZE / 2;
|
||||
GC_ASSERT_EQ(nofl_block_count(&space->promoted), 0);
|
||||
bytes += space->old_generation_granules * NOFL_GRANULE_SIZE;
|
||||
bytes +=
|
||||
nofl_block_count(&space->to_sweep) * NOFL_BLOCK_SIZE * (1 - last_yield);
|
||||
|
||||
DEBUG("--- nofl estimate before adjustment: %zu\n", bytes);
|
||||
/*
|
||||
// Assume that if we have pending unavailable bytes after GC that there is a
|
||||
// large object waiting to be allocated, and that probably it survives this GC
|
||||
// cycle.
|
||||
bytes += atomic_load_explicit(&space->pending_unavailable_bytes,
|
||||
memory_order_acquire);
|
||||
DEBUG("--- nofl estimate after adjustment: %zu\n", bytes);
|
||||
*/
|
||||
return bytes;
|
||||
}
|
||||
|
||||
static size_t
|
||||
|
@ -891,8 +945,12 @@ nofl_space_evacuation_reserve_bytes(struct nofl_space *space) {
|
|||
|
||||
static size_t
|
||||
nofl_space_fragmentation(struct nofl_space *space) {
|
||||
size_t granules = space->fragmentation_granules_since_last_collection;
|
||||
return granules * NOFL_GRANULE_SIZE;
|
||||
size_t young = space->fragmentation_granules_since_last_collection;
|
||||
GC_ASSERT(nofl_block_count(&space->old) * NOFL_GRANULES_PER_BLOCK >=
|
||||
space->old_generation_granules);
|
||||
size_t old = nofl_block_count(&space->old) * NOFL_GRANULES_PER_BLOCK -
|
||||
space->old_generation_granules;
|
||||
return (young + old) * NOFL_GRANULE_SIZE;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -933,7 +991,7 @@ nofl_space_prepare_evacuation(struct nofl_space *space) {
|
|||
for (struct nofl_block_ref b = nofl_block_for_addr(space->to_sweep.blocks);
|
||||
!nofl_block_is_null(b);
|
||||
b = nofl_block_next(b)) {
|
||||
size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->free_granules;
|
||||
size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->hole_granules;
|
||||
size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size;
|
||||
histogram[bucket]++;
|
||||
}
|
||||
|
@ -956,7 +1014,7 @@ nofl_space_prepare_evacuation(struct nofl_space *space) {
|
|||
for (struct nofl_block_ref b = nofl_block_for_addr(space->to_sweep.blocks);
|
||||
!nofl_block_is_null(b);
|
||||
b = nofl_block_next(b)) {
|
||||
size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->free_granules;
|
||||
size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->hole_granules;
|
||||
size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size;
|
||||
if (histogram[bucket]) {
|
||||
nofl_block_set_flag(b, NOFL_BLOCK_EVACUATE);
|
||||
|
@ -1012,6 +1070,7 @@ nofl_space_start_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) {
|
|||
nofl_push_block(&space->to_sweep, block);
|
||||
while (!nofl_block_is_null(block = nofl_pop_block(&space->old)))
|
||||
nofl_push_block(&space->to_sweep, block);
|
||||
space->old_generation_granules = 0;
|
||||
}
|
||||
|
||||
if (gc_kind == GC_COLLECTION_COMPACTING)
|
||||
|
@ -1042,6 +1101,8 @@ nofl_space_promote_blocks(struct nofl_space *space) {
|
|||
while (!nofl_block_is_null(block = nofl_pop_block(&space->promoted))) {
|
||||
struct nofl_allocator alloc = { block.addr, block.addr, block };
|
||||
nofl_allocator_finish_sweeping_in_block(&alloc, space->sweep_mask);
|
||||
atomic_fetch_add(&space->old_generation_granules,
|
||||
NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules);
|
||||
nofl_push_block(&space->old, block);
|
||||
}
|
||||
}
|
||||
|
@ -1210,18 +1271,25 @@ nofl_space_request_release_memory(struct nofl_space *space, size_t bytes) {
|
|||
return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes;
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) {
|
||||
static ssize_t
|
||||
nofl_space_maybe_reacquire_memory(struct nofl_space *space, size_t bytes) {
|
||||
ssize_t pending =
|
||||
atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes;
|
||||
while (pending + NOFL_BLOCK_SIZE <= 0) {
|
||||
struct nofl_block_ref block = nofl_pop_unavailable_block(space);
|
||||
GC_ASSERT(!nofl_block_is_null(block));
|
||||
if (nofl_block_is_null(block)) break;
|
||||
if (!nofl_push_evacuation_target_if_needed(space, block))
|
||||
nofl_push_empty_block(space, block);
|
||||
pending = atomic_fetch_add(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE)
|
||||
+ NOFL_BLOCK_SIZE;
|
||||
}
|
||||
return pending;
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_space_reacquire_memory(struct nofl_space *space, size_t bytes) {
|
||||
ssize_t pending = nofl_space_maybe_reacquire_memory(space, bytes);
|
||||
GC_ASSERT(pending + NOFL_BLOCK_SIZE > 0);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -1538,6 +1606,66 @@ nofl_space_add_slabs(struct nofl_space *space, struct nofl_slab *slabs,
|
|||
space->slabs[space->nslabs++] = slabs++;
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_space_shrink(struct nofl_space *space, size_t bytes) {
|
||||
ssize_t pending = nofl_space_request_release_memory(space, bytes);
|
||||
// First try to shrink by unmapping previously-identified empty blocks.
|
||||
while (pending > 0) {
|
||||
struct nofl_block_ref block = nofl_pop_empty_block(space);
|
||||
if (nofl_block_is_null(block))
|
||||
break;
|
||||
nofl_push_unavailable_block(space, block);
|
||||
pending = atomic_fetch_sub(&space->pending_unavailable_bytes,
|
||||
NOFL_BLOCK_SIZE);
|
||||
pending -= NOFL_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
// If we still need to shrink, steal from the evacuation reserve, if it's more
|
||||
// than the minimum. Not racy: evacuation target lists are built during eager
|
||||
// lazy sweep, which is mutually exclusive with consumption, itself either
|
||||
// during trace, synchronously from gc_heap_sizer_on_gc, or async but subject
|
||||
// to the heap lock.
|
||||
if (pending > 0) {
|
||||
size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB;
|
||||
size_t unavailable = nofl_block_count(&space->unavailable);
|
||||
size_t target = space->evacuation_minimum_reserve * (total - unavailable);
|
||||
ssize_t avail = nofl_block_count(&space->evacuation_targets);
|
||||
while (avail > target && pending > 0) {
|
||||
struct nofl_block_ref block = nofl_pop_block(&space->evacuation_targets);
|
||||
GC_ASSERT(!nofl_block_is_null(block));
|
||||
nofl_push_unavailable_block(space, block);
|
||||
pending = atomic_fetch_sub(&space->pending_unavailable_bytes,
|
||||
NOFL_BLOCK_SIZE);
|
||||
pending -= NOFL_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
// It still may be the case we need to page out more blocks. Only evacuation
|
||||
// can help us then!
|
||||
}
|
||||
|
||||
static void
|
||||
nofl_space_expand(struct nofl_space *space, size_t bytes) {
|
||||
double overhead = ((double)NOFL_META_BLOCKS_PER_SLAB) / NOFL_BLOCKS_PER_SLAB;
|
||||
ssize_t to_acquire = -nofl_space_maybe_reacquire_memory(space, bytes);
|
||||
if (to_acquire <= 0) return;
|
||||
to_acquire *= (1 + overhead);
|
||||
size_t reserved = align_up(to_acquire, NOFL_SLAB_SIZE);
|
||||
size_t nslabs = reserved / NOFL_SLAB_SIZE;
|
||||
struct nofl_slab *slabs = nofl_allocate_slabs(nslabs);
|
||||
nofl_space_add_slabs(space, slabs, nslabs);
|
||||
|
||||
for (size_t slab = 0; slab < nslabs; slab++) {
|
||||
for (size_t idx = 0; idx < NOFL_NONMETA_BLOCKS_PER_SLAB; idx++) {
|
||||
uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data;
|
||||
struct nofl_block_ref block = nofl_block_for_addr(addr);
|
||||
nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
|
||||
nofl_push_paged_out_block(space, block);
|
||||
}
|
||||
}
|
||||
nofl_space_reacquire_memory(space, 0);
|
||||
}
|
||||
|
||||
static int
|
||||
nofl_space_init(struct nofl_space *space, size_t size, int atomic,
|
||||
double promotion_threshold) {
|
||||
|
@ -1556,12 +1684,12 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic,
|
|||
space->evacuation_reserve = space->evacuation_minimum_reserve;
|
||||
space->promotion_threshold = promotion_threshold;
|
||||
for (size_t slab = 0; slab < nslabs; slab++) {
|
||||
for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) {
|
||||
uintptr_t addr = (uintptr_t)slabs[slab].blocks[block].data;
|
||||
for (size_t idx = 0; idx < NOFL_NONMETA_BLOCKS_PER_SLAB; idx++) {
|
||||
uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data;
|
||||
struct nofl_block_ref block = nofl_block_for_addr(addr);
|
||||
nofl_block_set_flag(block, NOFL_BLOCK_ZERO);
|
||||
nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT);
|
||||
if (reserved > size) {
|
||||
nofl_push_unavailable_block(space, block);
|
||||
nofl_push_paged_out_block(space, block);
|
||||
reserved -= NOFL_BLOCK_SIZE;
|
||||
} else {
|
||||
if (!nofl_push_evacuation_target_if_needed(space, block))
|
||||
|
|
59
src/pcc.c
59
src/pcc.c
|
@ -13,7 +13,9 @@
|
|||
#include "debug.h"
|
||||
#include "gc-align.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-platform.h"
|
||||
#include "gc-trace.h"
|
||||
#include "heap-sizer.h"
|
||||
#include "large-object-space.h"
|
||||
#if GC_PARALLEL
|
||||
#include "parallel-tracer.h"
|
||||
|
@ -32,6 +34,7 @@ struct gc_heap {
|
|||
pthread_cond_t collector_cond;
|
||||
pthread_cond_t mutator_cond;
|
||||
size_t size;
|
||||
size_t total_allocated_bytes_at_last_gc;
|
||||
int collecting;
|
||||
int check_pending_ephemerons;
|
||||
struct gc_pending_ephemerons *pending_ephemerons;
|
||||
|
@ -45,6 +48,7 @@ struct gc_heap {
|
|||
struct gc_tracer tracer;
|
||||
double pending_ephemerons_size_factor;
|
||||
double pending_ephemerons_size_slop;
|
||||
struct gc_heap_sizer sizer;
|
||||
struct gc_event_listener event_listener;
|
||||
void *event_listener_data;
|
||||
};
|
||||
|
@ -316,8 +320,20 @@ static inline void maybe_pause_mutator_for_collection(struct gc_mutator *mut) {
|
|||
pause_mutator_for_collection_without_lock(mut);
|
||||
}
|
||||
|
||||
static int maybe_grow_heap(struct gc_heap *heap) {
|
||||
return 0;
|
||||
static void resize_heap(size_t new_size, void *data) {
|
||||
struct gc_heap *heap = data;
|
||||
if (new_size == heap->size)
|
||||
return;
|
||||
DEBUG("------ resizing heap\n");
|
||||
DEBUG("------ old heap size: %zu bytes\n", heap->size);
|
||||
DEBUG("------ new heap size: %zu bytes\n", new_size);
|
||||
if (new_size < heap->size)
|
||||
copy_space_shrink(heap_copy_space(heap), heap->size - new_size);
|
||||
else
|
||||
copy_space_expand(heap_copy_space(heap), new_size - heap->size);
|
||||
|
||||
heap->size = new_size;
|
||||
HEAP_EVENT(heap, heap_resized, new_size);
|
||||
}
|
||||
|
||||
static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap,
|
||||
|
@ -375,6 +391,7 @@ static void collect(struct gc_mutator *mut) {
|
|||
struct copy_space *copy_space = heap_copy_space(heap);
|
||||
struct large_object_space *lospace = heap_large_object_space(heap);
|
||||
struct gc_extern_space *exspace = heap_extern_space(heap);
|
||||
uint64_t start_ns = gc_platform_monotonic_nanoseconds();
|
||||
MUTATOR_EVENT(mut, mutator_cause_gc);
|
||||
DEBUG("start collect #%ld:\n", heap->count);
|
||||
HEAP_EVENT(heap, requesting_stop);
|
||||
|
@ -383,6 +400,9 @@ static void collect(struct gc_mutator *mut) {
|
|||
wait_for_mutators_to_stop(heap);
|
||||
HEAP_EVENT(heap, mutators_stopped);
|
||||
HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING);
|
||||
uint64_t *counter_loc = &heap->total_allocated_bytes_at_last_gc;
|
||||
copy_space_add_to_allocation_counter(copy_space, counter_loc);
|
||||
large_object_space_add_to_allocation_counter(lospace, counter_loc);
|
||||
copy_space_flip(copy_space);
|
||||
large_object_space_start_gc(lospace, 0);
|
||||
gc_extern_space_start_gc(exspace, 0);
|
||||
|
@ -406,9 +426,12 @@ static void collect(struct gc_mutator *mut) {
|
|||
heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection);
|
||||
size_t live_size = (copy_space->allocated_bytes_at_last_gc +
|
||||
large_object_space_size_at_last_collection(lospace));
|
||||
uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns;
|
||||
HEAP_EVENT(heap, live_data_size, live_size);
|
||||
maybe_grow_heap(heap);
|
||||
if (!copy_space_page_out_blocks_until_memory_released(copy_space)) {
|
||||
gc_heap_sizer_on_gc(heap->sizer, heap->size, live_size, pause_ns,
|
||||
resize_heap, heap);
|
||||
if (!copy_space_page_out_blocks_until_memory_released(copy_space)
|
||||
&& heap->sizer.policy == GC_HEAP_SIZE_FIXED) {
|
||||
fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
|
||||
heap->size, copy_space->nslabs);
|
||||
GC_CRASH();
|
||||
|
@ -584,6 +607,24 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
static uint64_t allocation_counter_from_thread(void *data) {
|
||||
struct gc_heap *heap = data;
|
||||
uint64_t ret = heap->total_allocated_bytes_at_last_gc;
|
||||
if (pthread_mutex_trylock(&heap->lock)) return ret;
|
||||
copy_space_add_to_allocation_counter(heap_copy_space(heap), &ret);
|
||||
large_object_space_add_to_allocation_counter(heap_large_object_space(heap),
|
||||
&ret);
|
||||
pthread_mutex_unlock(&heap->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_heap_size_from_thread(size_t size, void *data) {
|
||||
struct gc_heap *heap = data;
|
||||
if (pthread_mutex_trylock(&heap->lock)) return;
|
||||
resize_heap(size, heap);
|
||||
pthread_mutex_unlock(&heap->lock);
|
||||
}
|
||||
|
||||
int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
||||
struct gc_heap **heap, struct gc_mutator **mut,
|
||||
struct gc_event_listener event_listener,
|
||||
|
@ -596,11 +637,6 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
|||
GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(),
|
||||
offsetof(struct copy_space_allocator, limit));
|
||||
|
||||
if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) {
|
||||
fprintf(stderr, "fixed heap size is currently required\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
*heap = calloc(1, sizeof(struct gc_heap));
|
||||
if (!*heap) GC_CRASH();
|
||||
|
||||
|
@ -622,6 +658,11 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
|||
if (!large_object_space_init(heap_large_object_space(*heap), *heap))
|
||||
GC_CRASH();
|
||||
|
||||
(*heap)->sizer = gc_make_heap_sizer(*heap, &options->common,
|
||||
allocation_counter_from_thread,
|
||||
set_heap_size_from_thread,
|
||||
(*heap));
|
||||
|
||||
*mut = calloc(1, sizeof(struct gc_mutator));
|
||||
if (!*mut) GC_CRASH();
|
||||
add_mutator(*heap, *mut);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue