mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-13 17:20:21 +02:00
Optimize sweeping
Use uint64 instead of uintptr when bulk-reading metadata bytes. Assume that live objects come in plugs rather than each object being separated by a hole. Always bulk-load metadata bytes when measuring holes, and be less branchy. Lazily clear hole bytes as we allocate. Add a place to record lost space due to fragmentation.
This commit is contained in:
parent
0d0d684952
commit
815f206e28
1 changed files with 135 additions and 52 deletions
187
mark-sweep.h
187
mark-sweep.h
|
@ -191,7 +191,7 @@ struct gcobj {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mark_space {
|
struct mark_space {
|
||||||
uintptr_t sweep_mask;
|
uint64_t sweep_mask;
|
||||||
uint8_t live_mask;
|
uint8_t live_mask;
|
||||||
uint8_t marked_mask;
|
uint8_t marked_mask;
|
||||||
uintptr_t low_addr;
|
uintptr_t low_addr;
|
||||||
|
@ -231,6 +231,7 @@ struct mutator {
|
||||||
// Bump-pointer allocation into holes.
|
// Bump-pointer allocation into holes.
|
||||||
uintptr_t alloc;
|
uintptr_t alloc;
|
||||||
uintptr_t sweep;
|
uintptr_t sweep;
|
||||||
|
uintptr_t block;
|
||||||
struct heap *heap;
|
struct heap *heap;
|
||||||
struct handle *roots;
|
struct handle *roots;
|
||||||
struct mutator_mark_buf mark_buf;
|
struct mutator_mark_buf mark_buf;
|
||||||
|
@ -453,10 +454,11 @@ static void wait_for_mutators_to_stop(struct heap *heap) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void finish_sweeping(struct mutator *mut);
|
static void finish_sweeping(struct mutator *mut);
|
||||||
|
static void finish_sweeping_in_block(struct mutator *mut);
|
||||||
|
|
||||||
static void mark_inactive_mutators(struct heap *heap) {
|
static void mark_inactive_mutators(struct heap *heap) {
|
||||||
for (struct mutator *mut = heap->deactivated_mutators; mut; mut = mut->next) {
|
for (struct mutator *mut = heap->deactivated_mutators; mut; mut = mut->next) {
|
||||||
finish_sweeping(mut);
|
finish_sweeping_in_block(mut);
|
||||||
mark_controlling_mutator_roots(mut);
|
mark_controlling_mutator_roots(mut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -501,7 +503,7 @@ static void pause_mutator_for_collection_with_lock(struct mutator *mut) NEVER_IN
|
||||||
static void pause_mutator_for_collection_with_lock(struct mutator *mut) {
|
static void pause_mutator_for_collection_with_lock(struct mutator *mut) {
|
||||||
struct heap *heap = mutator_heap(mut);
|
struct heap *heap = mutator_heap(mut);
|
||||||
ASSERT(mutators_are_stopping(heap));
|
ASSERT(mutators_are_stopping(heap));
|
||||||
finish_sweeping(mut);
|
finish_sweeping_in_block(mut);
|
||||||
mark_controlling_mutator_roots(mut);
|
mark_controlling_mutator_roots(mut);
|
||||||
pause_mutator_for_collection(heap);
|
pause_mutator_for_collection(heap);
|
||||||
}
|
}
|
||||||
|
@ -527,8 +529,8 @@ static void reset_sweeper(struct mark_space *space) {
|
||||||
space->next_block = (uintptr_t) &space->slabs[0].blocks;
|
space->next_block = (uintptr_t) &space->slabs[0].blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uintptr_t broadcast_byte(uint8_t byte) {
|
static uint64_t broadcast_byte(uint8_t byte) {
|
||||||
uintptr_t result = byte;
|
uint64_t result = byte;
|
||||||
return result * 0x0101010101010101ULL;
|
return result * 0x0101010101010101ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -547,6 +549,7 @@ static void collect(struct mutator *mut) {
|
||||||
tracer_prepare(heap);
|
tracer_prepare(heap);
|
||||||
request_mutators_to_stop(heap);
|
request_mutators_to_stop(heap);
|
||||||
mark_controlling_mutator_roots(mut);
|
mark_controlling_mutator_roots(mut);
|
||||||
|
finish_sweeping(mut);
|
||||||
wait_for_mutators_to_stop(heap);
|
wait_for_mutators_to_stop(heap);
|
||||||
mark_inactive_mutators(heap);
|
mark_inactive_mutators(heap);
|
||||||
mark_global_roots(heap);
|
mark_global_roots(heap);
|
||||||
|
@ -593,21 +596,42 @@ static int sweep_word(uintptr_t *loc, uintptr_t sweep_mask) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t next_mark(uint8_t *mark, size_t limit, uintptr_t sweep_mask) {
|
static inline uint64_t load_mark_bytes(uint8_t *mark) {
|
||||||
|
ASSERT(((uintptr_t)mark & 7) == 0);
|
||||||
|
uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
|
||||||
|
uint64_t word;
|
||||||
|
memcpy(&word, aligned_mark, 8);
|
||||||
|
#ifdef WORDS_BIGENDIAN
|
||||||
|
word = __builtin_bswap64(word);
|
||||||
|
#endif
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline size_t count_zero_bytes(uint64_t bytes) {
|
||||||
|
return bytes ? (__builtin_ctz(bytes) / 8) : sizeof(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
// FIXME: may_alias
|
// If we have a hole, it is likely to be more that 8 granules long.
|
||||||
for (; (((uintptr_t)mark) & (sizeof(uintptr_t)-1)) && n < limit; n++)
|
// Assuming that it's better to make aligned loads, first we align the
|
||||||
if (sweep_byte(&mark[n], sweep_mask))
|
// sweep pointer, then we load aligned mark words.
|
||||||
return n;
|
size_t unaligned = ((uintptr_t) mark) & 7;
|
||||||
|
if (unaligned) {
|
||||||
|
uint64_t bytes = load_mark_bytes(mark - unaligned) >> (unaligned * 8);
|
||||||
|
bytes &= sweep_mask;
|
||||||
|
if (bytes)
|
||||||
|
return count_zero_bytes(bytes);
|
||||||
|
n += 8 - unaligned;
|
||||||
|
}
|
||||||
|
|
||||||
uintptr_t *mark_word = (uintptr_t*)&mark[n];
|
for(; n < limit; n += 8) {
|
||||||
for (; n + sizeof(uintptr_t) <= limit; n += sizeof(uintptr_t), mark_word++)
|
uint64_t bytes = load_mark_bytes(mark + n);
|
||||||
if (sweep_word(mark_word, sweep_mask))
|
bytes &= sweep_mask;
|
||||||
break;
|
if (bytes)
|
||||||
|
return n + count_zero_bytes(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
for (; n < limit; n++)
|
|
||||||
if (sweep_byte(&mark[n], sweep_mask))
|
|
||||||
return n;
|
|
||||||
return limit;
|
return limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -632,46 +656,103 @@ static uintptr_t mark_space_next_block(struct mark_space *space) {
|
||||||
return block;
|
return block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void finish_block(struct mutator *mut) {
|
||||||
|
mut->block = mut->alloc = mut->sweep = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int next_block(struct mutator *mut) {
|
||||||
|
ASSERT(mut->sweep == 0);
|
||||||
|
uintptr_t block = mark_space_next_block(heap_mark_space(mutator_heap(mut)));
|
||||||
|
if (block == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
mut->alloc = mut->sweep = mut->block = block;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Sweep some heap to reclaim free space, resetting mut->alloc and
|
// Sweep some heap to reclaim free space, resetting mut->alloc and
|
||||||
// mut->sweep. Return the size of the hole in granules.
|
// mut->sweep. Return the size of the hole in granules.
|
||||||
static size_t next_hole(struct mutator *mut, size_t clear_size) {
|
static size_t next_hole_in_block(struct mutator *mut) {
|
||||||
uintptr_t sweep = mut->sweep;
|
uintptr_t sweep = mut->sweep;
|
||||||
uintptr_t limit = align_up(sweep, BLOCK_SIZE);
|
if (sweep == 0)
|
||||||
|
return 0;
|
||||||
|
uintptr_t limit = mut->block + BLOCK_SIZE;
|
||||||
uintptr_t sweep_mask = heap_mark_space(mutator_heap(mut))->sweep_mask;
|
uintptr_t sweep_mask = heap_mark_space(mutator_heap(mut))->sweep_mask;
|
||||||
|
|
||||||
while (1) {
|
while (sweep != limit) {
|
||||||
if (sweep == limit) {
|
ASSERT((sweep & (GRANULE_SIZE - 1)) == 0);
|
||||||
sweep = mark_space_next_block(heap_mark_space(mutator_heap(mut)));
|
uint8_t* metadata = object_metadata_byte((struct gcobj*)sweep);
|
||||||
if (sweep == 0) {
|
size_t limit_granules = (limit - sweep) >> GRANULE_SIZE_LOG_2;
|
||||||
mut->alloc = mut->sweep = 0;
|
|
||||||
return 0;
|
// Except for when we first get a block, mut->sweep is positioned
|
||||||
|
// right after a hole, which can point to either the end of the
|
||||||
|
// block or to a live object. Assume that a live object is more
|
||||||
|
// common.
|
||||||
|
{
|
||||||
|
size_t live_granules = 0;
|
||||||
|
while (limit_granules && (metadata[0] & sweep_mask)) {
|
||||||
|
// Object survived collection; skip over it and continue sweeping.
|
||||||
|
size_t object_granules = mark_space_live_object_granules(metadata);
|
||||||
|
live_granules += object_granules;
|
||||||
|
limit_granules -= object_granules;
|
||||||
|
metadata += object_granules;
|
||||||
}
|
}
|
||||||
limit = sweep + BLOCK_SIZE;
|
if (!limit_granules)
|
||||||
|
break;
|
||||||
|
sweep += live_granules * GRANULE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
ASSERT((sweep & (GRANULE_SIZE - 1)) == 0);
|
size_t free_granules = next_mark(metadata, limit_granules, sweep_mask);
|
||||||
uint8_t* mark = object_metadata_byte((struct gcobj*)sweep);
|
ASSERT(free_granules);
|
||||||
size_t limit_granules = (limit - sweep) >> GRANULE_SIZE_LOG_2;
|
ASSERT(free_granules <= limit_granules);
|
||||||
size_t free_granules = next_mark(mark, limit_granules, sweep_mask);
|
size_t free_bytes = free_granules * GRANULE_SIZE;
|
||||||
if (free_granules) {
|
mut->alloc = sweep;
|
||||||
ASSERT(free_granules <= limit_granules);
|
mut->sweep = sweep + free_bytes;
|
||||||
size_t free_bytes = free_granules * GRANULE_SIZE;
|
return free_granules;
|
||||||
if (free_granules >= clear_size)
|
|
||||||
clear_memory(sweep, free_bytes);
|
|
||||||
mut->alloc = sweep;
|
|
||||||
mut->sweep = sweep + free_bytes;
|
|
||||||
return free_granules;
|
|
||||||
}
|
|
||||||
// Object survived collection; skip over it and continue sweeping.
|
|
||||||
ASSERT((*mark) & sweep_mask);
|
|
||||||
sweep += mark_space_live_object_granules(mark) * GRANULE_SIZE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finish_block(mut);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void finish_hole(struct mutator *mut) {
|
||||||
|
size_t granules = (mut->sweep - mut->alloc) / GRANULE_SIZE;
|
||||||
|
if (granules) {
|
||||||
|
uint8_t *metadata = object_metadata_byte((void*)mut->alloc);
|
||||||
|
memset(metadata, 0, granules);
|
||||||
|
mut->alloc = mut->sweep;
|
||||||
|
}
|
||||||
|
// FIXME: add to fragmentation
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t next_hole(struct mutator *mut) {
|
||||||
|
finish_hole(mut);
|
||||||
|
while (1) {
|
||||||
|
size_t granules = next_hole_in_block(mut);
|
||||||
|
if (granules)
|
||||||
|
return granules;
|
||||||
|
if (!next_block(mut))
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void finish_sweeping_in_block(struct mutator *mut) {
|
||||||
|
while (next_hole_in_block(mut))
|
||||||
|
finish_hole(mut);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Another thread is triggering GC. Before we stop, finish clearing the
|
// Another thread is triggering GC. Before we stop, finish clearing the
|
||||||
// dead mark bytes for the mutator's block, and release the block.
|
// dead mark bytes for the mutator's block, and release the block.
|
||||||
static void finish_sweeping(struct mutator *mut) {
|
static void finish_sweeping(struct mutator *mut) {
|
||||||
while (next_hole(mut, -1)) {}
|
while (next_hole(mut))
|
||||||
|
finish_hole(mut);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void out_of_memory(struct mutator *mut) {
|
||||||
|
struct heap *heap = mutator_heap(mut);
|
||||||
|
fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
|
||||||
|
heap->size, heap_mark_space(heap)->nslabs);
|
||||||
|
abort();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void* allocate_large(struct mutator *mut, enum alloc_kind kind,
|
static void* allocate_large(struct mutator *mut, enum alloc_kind kind,
|
||||||
|
@ -686,10 +767,8 @@ static void* allocate_large(struct mutator *mut, enum alloc_kind kind,
|
||||||
|
|
||||||
if (!heap_steal_pages(heap, npages)) {
|
if (!heap_steal_pages(heap, npages)) {
|
||||||
collect(mut);
|
collect(mut);
|
||||||
if (!heap_steal_pages(heap, npages)) {
|
if (!heap_steal_pages(heap, npages))
|
||||||
fprintf(stderr, "ran out of space, heap size %zu\n", heap->size);
|
out_of_memory(mut);
|
||||||
abort();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void *ret = large_object_space_alloc(space, npages);
|
void *ret = large_object_space_alloc(space, npages);
|
||||||
|
@ -713,14 +792,15 @@ static void* allocate_small_slow(struct mutator *mut, enum alloc_kind kind,
|
||||||
size_t granules) {
|
size_t granules) {
|
||||||
int swept_from_beginning = 0;
|
int swept_from_beginning = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
size_t hole = next_hole(mut, granules);
|
size_t hole = next_hole(mut);
|
||||||
if (hole >= granules)
|
if (hole >= granules) {
|
||||||
|
clear_memory(mut->alloc, hole * GRANULE_SIZE);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
if (!hole) {
|
if (!hole) {
|
||||||
struct heap *heap = mutator_heap(mut);
|
struct heap *heap = mutator_heap(mut);
|
||||||
if (swept_from_beginning) {
|
if (swept_from_beginning) {
|
||||||
fprintf(stderr, "ran out of space, heap size %zu\n", heap->size);
|
out_of_memory(mut);
|
||||||
abort();
|
|
||||||
} else {
|
} else {
|
||||||
heap_lock(heap);
|
heap_lock(heap);
|
||||||
if (mutators_are_stopping(heap))
|
if (mutators_are_stopping(heap))
|
||||||
|
@ -756,6 +836,8 @@ static inline void* allocate_small(struct mutator *mut, enum alloc_kind kind,
|
||||||
metadata[0] = METADATA_BYTE_YOUNG | METADATA_BYTE_END;
|
metadata[0] = METADATA_BYTE_YOUNG | METADATA_BYTE_END;
|
||||||
} else {
|
} else {
|
||||||
metadata[0] = METADATA_BYTE_YOUNG;
|
metadata[0] = METADATA_BYTE_YOUNG;
|
||||||
|
if (granules > 2)
|
||||||
|
memset(metadata + 1, 0, granules - 2);
|
||||||
metadata[granules - 1] = METADATA_BYTE_END;
|
metadata[granules - 1] = METADATA_BYTE_END;
|
||||||
}
|
}
|
||||||
return obj;
|
return obj;
|
||||||
|
@ -921,5 +1003,6 @@ static inline void print_start_gc_stats(struct heap *heap) {
|
||||||
|
|
||||||
static inline void print_end_gc_stats(struct heap *heap) {
|
static inline void print_end_gc_stats(struct heap *heap) {
|
||||||
printf("Completed %ld collections\n", heap->count);
|
printf("Completed %ld collections\n", heap->count);
|
||||||
printf("Heap size with overhead is %zd\n", heap->size);
|
printf("Heap size with overhead is %zd (%zu slabs)\n",
|
||||||
|
heap->size, heap_mark_space(heap)->nslabs);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue