From d137e1397c349493804024c56bef308178e74f55 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 22 Aug 2024 18:04:21 +0200 Subject: [PATCH] Instead of partitioning blocks by flag, put them in separate lists This way you can directly iterate blocks of a certain kind. Also verify these lists more thoroughly, and allow full blocks that are the results of evacuation to skip being swept the next round. Also! Have next_hole_in_block / next_hole_in_block ensure that the object data and the mark bytes are clear. --- src/nofl-space.h | 558 ++++++++++++++++++++++++++--------------------- src/whippet.c | 5 +- 2 files changed, 310 insertions(+), 253 deletions(-) diff --git a/src/nofl-space.h b/src/nofl-space.h index eba3cd386..134fbccd8 100644 --- a/src/nofl-space.h +++ b/src/nofl-space.h @@ -70,14 +70,14 @@ STATIC_ASSERT_EQ(sizeof(struct nofl_slab_header), NOFL_HEADER_BYTES_PER_SLAB); // non-atomically by the mutator when it owns a block; otherwise they // need to be accessed atomically. enum nofl_block_summary_flag { - NOFL_BLOCK_OUT_FOR_THREAD = 0x1, - NOFL_BLOCK_HAS_PIN = 0x2, - NOFL_BLOCK_PAGED_OUT = 0x4, - NOFL_BLOCK_NEEDS_SWEEP = 0x8, - NOFL_BLOCK_UNAVAILABLE = 0x10, - NOFL_BLOCK_EVACUATE = 0x20, - NOFL_BLOCK_VENERABLE = 0x40, - NOFL_BLOCK_VENERABLE_AFTER_SWEEP = 0x80, + NOFL_BLOCK_EVACUATE = 0x1, + NOFL_BLOCK_ZERO = 0x2, + NOFL_BLOCK_UNAVAILABLE = 0x4, + NOFL_BLOCK_FLAG_UNUSED_3 = 0x8, + NOFL_BLOCK_FLAG_UNUSED_4 = 0x10, + NOFL_BLOCK_FLAG_UNUSED_5 = 0x20, + NOFL_BLOCK_FLAG_UNUSED_6 = 0x40, + NOFL_BLOCK_FLAG_UNUSED_7 = 0x80, NOFL_BLOCK_FLAG_UNUSED_8 = 0x100, NOFL_BLOCK_FLAG_UNUSED_9 = 0x200, NOFL_BLOCK_FLAG_UNUSED_10 = 0x400, @@ -141,14 +141,17 @@ struct nofl_space { size_t extent; size_t heap_size; uint8_t last_collection_was_minor; - uintptr_t next_block; // atomically struct nofl_block_list empty; struct nofl_block_list unavailable; + struct nofl_block_list to_sweep; struct nofl_block_list partly_full; + struct nofl_block_list full; + struct nofl_block_list promoted; + struct nofl_block_list old; struct nofl_block_list evacuation_targets; double evacuation_minimum_reserve; double evacuation_reserve; - double venerable_threshold; + double promotion_threshold; ssize_t pending_unavailable_bytes; // atomically struct nofl_slab *slabs; size_t nslabs; @@ -277,6 +280,7 @@ static void nofl_push_block(struct nofl_block_list *list, uintptr_t block) { atomic_fetch_add_explicit(&list->count, 1, memory_order_acq_rel); struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); + GC_ASSERT_EQ(nofl_block_summary_next(summary), 0); uintptr_t next = atomic_load_explicit(&list->blocks, memory_order_acquire); do { nofl_block_summary_set_next(summary, next); @@ -306,10 +310,8 @@ nofl_block_count(struct nofl_block_list *list) { static void nofl_push_unavailable_block(struct nofl_space *space, uintptr_t block) { - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)); - GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_UNAVAILABLE); + nofl_block_summary_set_flag(nofl_block_summary_for_addr(block), + NOFL_BLOCK_ZERO | NOFL_BLOCK_UNAVAILABLE); madvise((void*)block, NOFL_BLOCK_SIZE, MADV_DONTNEED); nofl_push_block(&space->unavailable, block); } @@ -317,14 +319,17 @@ nofl_push_unavailable_block(struct nofl_space *space, uintptr_t block) { static uintptr_t nofl_pop_unavailable_block(struct nofl_space *space) { uintptr_t block = nofl_pop_block(&space->unavailable); - if (!block) - return 0; - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - GC_ASSERT(nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)); - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_UNAVAILABLE); + if (block) + nofl_block_summary_clear_flag(nofl_block_summary_for_addr(block), + NOFL_BLOCK_UNAVAILABLE); return block; } +static void +nofl_push_empty_block(struct nofl_space *space, uintptr_t block) { + nofl_push_block(&space->empty, block); +} + static uintptr_t nofl_pop_empty_block(struct nofl_space *space) { return nofl_pop_block(&space->empty); @@ -333,8 +338,6 @@ nofl_pop_empty_block(struct nofl_space *space) { static int nofl_maybe_push_evacuation_target(struct nofl_space *space, uintptr_t block, double reserve) { - GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block), - NOFL_BLOCK_NEEDS_SWEEP)); size_t targets = nofl_block_count(&space->evacuation_targets); size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; size_t unavailable = nofl_block_count(&space->unavailable); @@ -359,13 +362,6 @@ nofl_push_evacuation_target_if_possible(struct nofl_space *space, space->evacuation_reserve); } -static void -nofl_push_empty_block(struct nofl_space *space, uintptr_t block) { - GC_ASSERT(!nofl_block_summary_has_flag(nofl_block_summary_for_addr(block), - NOFL_BLOCK_NEEDS_SWEEP)); - nofl_push_block(&space->empty, block); -} - static inline void nofl_clear_memory(uintptr_t addr, size_t size) { memset((char*)addr, 0, size); @@ -376,17 +372,6 @@ nofl_space_live_object_granules(uint8_t *metadata) { return scan_for_byte(metadata, -1, broadcast_byte(NOFL_METADATA_BYTE_END)) + 1; } -static void -nofl_clear_remaining_metadata_bytes_in_block(uintptr_t block, - uintptr_t allocated) { - GC_ASSERT((allocated & (NOFL_GRANULE_SIZE - 1)) == 0); - uintptr_t base = block + allocated; - uintptr_t limit = block + NOFL_BLOCK_SIZE; - uintptr_t granules = (limit - base) >> NOFL_GRANULE_SIZE_LOG_2; - GC_ASSERT(granules <= NOFL_GRANULES_PER_BLOCK); - memset(nofl_metadata_byte_for_addr(base), 0, granules); -} - static void nofl_allocator_reset(struct nofl_allocator *alloc) { alloc->alloc = alloc->sweep = alloc->block = 0; @@ -394,12 +379,10 @@ nofl_allocator_reset(struct nofl_allocator *alloc) { static void nofl_allocator_release_full_block(struct nofl_allocator *alloc, - struct nofl_space *space, - struct nofl_block_summary *summary) { + struct nofl_space *space) { GC_ASSERT(alloc->block); GC_ASSERT(alloc->alloc == alloc->sweep); - GC_ASSERT(!nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE)); - + struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); atomic_fetch_add(&space->granules_freed_by_last_collection, summary->free_granules); atomic_fetch_add(&space->fragmentation_granules_since_last_collection, @@ -409,24 +392,51 @@ nofl_allocator_release_full_block(struct nofl_allocator *alloc, // trying to allocate into it for a minor GC. Sweep it next time to // clear any garbage allocated in this cycle and mark it as // "venerable" (i.e., old). - if (!nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP) && - summary->free_granules < NOFL_GRANULES_PER_BLOCK * space->venerable_threshold) - nofl_block_summary_set_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP); + if (GC_GENERATIONAL && + summary->free_granules < NOFL_GRANULES_PER_BLOCK * space->promotion_threshold) + nofl_push_block(&space->promoted, alloc->block); + else + nofl_push_block(&space->full, alloc->block); nofl_allocator_reset(alloc); } +static void +nofl_allocator_release_full_evacuation_target(struct nofl_allocator *alloc, + struct nofl_space *space) { + GC_ASSERT(alloc->alloc > alloc->block); + GC_ASSERT(alloc->sweep == alloc->block + NOFL_BLOCK_SIZE); + size_t hole_size = alloc->sweep - alloc->alloc; + struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); + // FIXME: Check how this affects statistics. + GC_ASSERT_EQ(summary->hole_count, 1); + GC_ASSERT_EQ(summary->free_granules, NOFL_GRANULES_PER_BLOCK); + atomic_fetch_add(&space->granules_freed_by_last_collection, + NOFL_GRANULES_PER_BLOCK); + if (hole_size) { + hole_size >>= NOFL_GRANULE_SIZE_LOG_2; + summary->holes_with_fragmentation = 1; + summary->fragmentation_granules = hole_size >> NOFL_GRANULE_SIZE_LOG_2; + atomic_fetch_add(&space->fragmentation_granules_since_last_collection, + summary->fragmentation_granules); + } else { + GC_ASSERT_EQ(summary->fragmentation_granules, 0); + GC_ASSERT_EQ(summary->holes_with_fragmentation, 0); + } + nofl_push_block(&space->old, alloc->block); + nofl_allocator_reset(alloc); +} + static void nofl_allocator_release_partly_full_block(struct nofl_allocator *alloc, - struct nofl_space *space, - struct nofl_block_summary *summary) { + struct nofl_space *space) { // A block can go on the partly full list if it has exactly one // hole, located at the end of the block. GC_ASSERT(alloc->alloc > alloc->block); GC_ASSERT(alloc->sweep == alloc->block + NOFL_BLOCK_SIZE); - GC_ASSERT(nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)); size_t hole_size = alloc->sweep - alloc->alloc; GC_ASSERT(hole_size); + struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); summary->fragmentation_granules = hole_size >> NOFL_GRANULE_SIZE_LOG_2; nofl_push_block(&space->partly_full, alloc->block); nofl_allocator_reset(alloc); @@ -457,13 +467,24 @@ nofl_allocator_acquire_empty_block(struct nofl_allocator *alloc, summary->free_granules = NOFL_GRANULES_PER_BLOCK; summary->holes_with_fragmentation = 0; summary->fragmentation_granules = 0; - nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); alloc->block = alloc->alloc = block; alloc->sweep = block + NOFL_BLOCK_SIZE; - nofl_clear_memory(block, NOFL_BLOCK_SIZE); + if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_ZERO)) + nofl_block_summary_clear_flag(summary, NOFL_BLOCK_ZERO); + else + nofl_clear_memory(block, NOFL_BLOCK_SIZE); return NOFL_GRANULES_PER_BLOCK; } +static size_t +nofl_allocator_acquire_evacuation_target(struct nofl_allocator* alloc, + struct nofl_space *space) { + size_t granules = nofl_allocator_acquire_partly_full_block(alloc, space); + if (granules) + return granules; + return nofl_allocator_acquire_empty_block(alloc, space); +} + static void nofl_allocator_finish_hole(struct nofl_allocator *alloc) { size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE; @@ -471,8 +492,6 @@ nofl_allocator_finish_hole(struct nofl_allocator *alloc) { struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); summary->holes_with_fragmentation++; summary->fragmentation_granules += granules; - uint8_t *metadata = nofl_metadata_byte_for_addr(alloc->alloc); - memset(metadata, 0, granules); alloc->alloc = alloc->sweep; } } @@ -513,15 +532,18 @@ nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc, } size_t free_granules = scan_for_byte(metadata, limit_granules, sweep_mask); + size_t free_bytes = free_granules * NOFL_GRANULE_SIZE; GC_ASSERT(free_granules); GC_ASSERT(free_granules <= limit_granules); + memset(metadata, 0, free_granules); + memset((char*)sweep, 0, free_bytes); + struct nofl_block_summary *summary = nofl_block_summary_for_addr(sweep); summary->hole_count++; GC_ASSERT(free_granules <= NOFL_GRANULES_PER_BLOCK - summary->free_granules); summary->free_granules += free_granules; - size_t free_bytes = free_granules * NOFL_GRANULE_SIZE; alloc->alloc = sweep; alloc->sweep = sweep + free_bytes; return free_granules; @@ -539,14 +561,15 @@ static void nofl_allocator_release_block(struct nofl_allocator *alloc, struct nofl_space *space) { GC_ASSERT(alloc->block); - struct nofl_block_summary *summary = nofl_block_summary_for_addr(alloc->block); if (alloc->alloc < alloc->sweep && alloc->sweep == alloc->block + NOFL_BLOCK_SIZE && - summary->holes_with_fragmentation == 0) { - nofl_allocator_release_partly_full_block(alloc, space, summary); + nofl_block_summary_for_addr(alloc->block)->holes_with_fragmentation == 0) { + nofl_allocator_release_partly_full_block(alloc, space); + } else if (space->evacuating) { + nofl_allocator_release_full_evacuation_target(alloc, space); } else { nofl_allocator_finish_sweeping_in_block(alloc, space->sweep_mask); - nofl_allocator_release_full_block(alloc, space, summary); + nofl_allocator_release_full_block(alloc, space); } } @@ -556,28 +579,6 @@ nofl_allocator_finish(struct nofl_allocator *alloc, struct nofl_space *space) { nofl_allocator_release_block(alloc, space); } -static uintptr_t -nofl_space_next_block_to_sweep(struct nofl_space *space) { - uintptr_t block = atomic_load_explicit(&space->next_block, - memory_order_acquire); - uintptr_t next_block; - do { - if (block == 0) - return 0; - - next_block = block + NOFL_BLOCK_SIZE; - if (next_block % NOFL_SLAB_SIZE == 0) { - uintptr_t hi_addr = space->low_addr + space->extent; - if (next_block == hi_addr) - next_block = 0; - else - next_block += NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE; - } - } while (!atomic_compare_exchange_weak(&space->next_block, &block, - next_block)); - return block; -} - static int nofl_maybe_release_swept_empty_block(struct nofl_allocator *alloc, struct nofl_space *space) { @@ -593,6 +594,17 @@ nofl_maybe_release_swept_empty_block(struct nofl_allocator *alloc, return 1; } +static int +nofl_allocator_acquire_block_to_sweep(struct nofl_allocator *alloc, + struct nofl_space *space) { + uintptr_t block = nofl_pop_block(&space->to_sweep); + if (block) { + alloc->block = alloc->alloc = alloc->sweep = block; + return 1; + } + return 0; +} + static size_t nofl_allocator_next_hole(struct nofl_allocator *alloc, struct nofl_space *space) { @@ -604,8 +616,6 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, while (1) { // Sweep current block for a hole. if (alloc->block) { - struct nofl_block_summary *summary = - nofl_block_summary_for_addr(alloc->block); size_t granules = nofl_allocator_next_hole_in_block(alloc, space->sweep_mask); if (granules) { @@ -613,10 +623,8 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, // to use it. if (granules < NOFL_GRANULES_PER_BLOCK) return granules; - // Otherwise we have an empty block. - nofl_clear_remaining_metadata_bytes_in_block(alloc->block, 0); - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); - // If we need an evacuation reserve block, take it. + // Otherwise we have an empty block. If we need an evacuation reserve + // block, take it. if (nofl_push_evacuation_target_if_needed(space, alloc->block)) { nofl_allocator_reset(alloc); continue; @@ -627,17 +635,14 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, continue; // Otherwise if we've already returned lots of empty blocks to the // freelist, let the allocator keep this block. - if (!empties_countdown) { - // After this block is allocated into, it will need to be swept. - nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); + if (!empties_countdown) return granules; - } // Otherwise we push to the empty blocks list. nofl_push_empty_block(space, alloc->block); nofl_allocator_reset(alloc); empties_countdown--; } else { - nofl_allocator_release_full_block(alloc, space, summary); + nofl_allocator_release_full_block(alloc, space); } } @@ -649,72 +654,30 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, return granules; } - while (1) { - uintptr_t block = nofl_space_next_block_to_sweep(space); - if (block) { - // Sweeping found a block. We might take it for allocation, or - // we might send it back. - struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - // If it's marked unavailable, it's already on a list of - // unavailable blocks, so skip and get the next block. - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) - continue; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE)) { - // Skip venerable blocks after a minor GC -- we don't need to - // sweep as they weren't allocated into last cycle, and the - // mark bytes didn't rotate, so we have no cleanup to do; and - // we shouldn't try to allocate into them as it's not worth - // it. Any wasted space is measured as fragmentation. - if (space->last_collection_was_minor) - continue; - else - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE); - } - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) { - // Prepare to sweep the block for holes. - alloc->alloc = alloc->sweep = alloc->block = block; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP)) { - // In the last cycle we noted that this block consists of - // mostly old data. Sweep any garbage, commit the mark as - // venerable, and avoid allocating into it. - nofl_block_summary_clear_flag(summary, NOFL_BLOCK_VENERABLE_AFTER_SWEEP); - if (space->last_collection_was_minor) { - nofl_allocator_finish_sweeping_in_block(alloc, space->sweep_mask); - nofl_allocator_release_full_block(alloc, space, summary); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_VENERABLE); - continue; - } - } - // This block was marked in the last GC and needs sweeping. - // As we sweep we'll want to record how many bytes were live - // at the last collection. As we allocate we'll record how - // many granules were wasted because of fragmentation. - summary->hole_count = 0; - summary->free_granules = 0; - summary->holes_with_fragmentation = 0; - summary->fragmentation_granules = 0; - break; - } else { - // Otherwise this block is completely empty and is on the - // empties list. We take from the empties list only after all - // the NEEDS_SWEEP blocks are processed. - continue; - } - } else { - // We are done sweeping for blocks. Now take from the empties - // list. - block = nofl_pop_empty_block(space); - // No empty block? Return 0 to cause collection. - if (!block) - return 0; + if (nofl_allocator_acquire_block_to_sweep(alloc, space)) { + struct nofl_block_summary *summary = + nofl_block_summary_for_addr(alloc->block); + // This block was marked in the last GC and needs sweeping. + // As we sweep we'll want to record how many bytes were live + // at the last collection. As we allocate we'll record how + // many granules were wasted because of fragmentation. + summary->hole_count = 0; + summary->free_granules = 0; + summary->holes_with_fragmentation = 0; + summary->fragmentation_granules = 0; + continue; + } + // We are done sweeping for blocks. Now take from the empties list. + { + uintptr_t block; + while ((block = nofl_pop_empty_block(space))) { // Maybe we should use this empty as a target for evacuation. if (nofl_push_evacuation_target_if_possible(space, block)) continue; // Otherwise give the block to the allocator. struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); - nofl_block_summary_set_flag(summary, NOFL_BLOCK_NEEDS_SWEEP); summary->hole_count = 1; summary->free_granules = NOFL_GRANULES_PER_BLOCK; summary->holes_with_fragmentation = 0; @@ -725,6 +688,9 @@ nofl_allocator_next_hole(struct nofl_allocator *alloc, return NOFL_GRANULES_PER_BLOCK; } } + + // Couldn't acquire another block; return 0 to cause collection. + return 0; } } @@ -740,7 +706,6 @@ nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, while (1) { size_t hole = nofl_allocator_next_hole(alloc, space); if (hole >= granules) { - nofl_clear_memory(alloc->alloc, hole * NOFL_GRANULE_SIZE); break; } if (!hole) @@ -754,33 +719,22 @@ nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, return ret; } -static size_t -nofl_allocator_acquire_evacuation_block(struct nofl_allocator* alloc, - struct nofl_space *space) { - size_t granules = nofl_allocator_acquire_partly_full_block(alloc, space); - if (granules) - return granules; - return nofl_allocator_acquire_empty_block(alloc, space); -} - static struct gc_ref nofl_evacuation_allocate(struct nofl_allocator* alloc, struct nofl_space *space, size_t granules) { size_t avail = (alloc->sweep - alloc->alloc) >> NOFL_GRANULE_SIZE_LOG_2; while (avail < granules) { - if (alloc->block) { - nofl_allocator_finish_hole(alloc); - nofl_allocator_release_full_block(alloc, space, - nofl_block_summary_for_addr(alloc->block)); - } - avail = nofl_allocator_acquire_evacuation_block(alloc, space); + if (alloc->block) + // No need to finish the hole, these mark bytes are zero. + nofl_allocator_release_full_evacuation_target(alloc, space); + avail = nofl_allocator_acquire_evacuation_target(alloc, space); if (!avail) return gc_ref_null(); } struct gc_ref ret = gc_ref(alloc->alloc); alloc->alloc += granules * NOFL_GRANULE_SIZE; - gc_update_alloc_table(ret, granules * NOFL_GRANULE_SIZE); + // Caller is responsible for updating alloc table. return ret; } @@ -860,27 +814,12 @@ nofl_space_trace_remembered_set(struct nofl_space *space, static void nofl_space_clear_remembered_set(struct nofl_space *space) { if (!GC_GENERATIONAL) return; + // FIXME: Don't assume slabs are contiguous. for (size_t slab = 0; slab < space->nslabs; slab++) { memset(space->slabs[slab].remembered_set, 0, NOFL_REMSET_BYTES_PER_SLAB); } } -static void -nofl_space_reset_sweeper(struct nofl_space *space) { - space->next_block = (uintptr_t) &space->slabs[0].blocks; -} - -static void -nofl_space_update_mark_patterns(struct nofl_space *space, - int advance_mark_mask) { - uint8_t survivor_mask = space->marked_mask; - uint8_t next_marked_mask = nofl_rotate_dead_survivor_marked(survivor_mask); - if (advance_mark_mask) - space->marked_mask = next_marked_mask; - space->live_mask = survivor_mask | next_marked_mask; - space->sweep_mask = broadcast_byte(space->live_mask); -} - static void nofl_space_reset_statistics(struct nofl_space *space) { space->granules_freed_by_last_collection = 0; @@ -911,6 +850,12 @@ nofl_space_prepare_evacuation(struct nofl_space *space) { while ((block = nofl_pop_block(&space->evacuation_targets))) nofl_push_empty_block(space, block); } + // Blocks are either to_sweep, empty, or unavailable. + GC_ASSERT_EQ(nofl_block_count(&space->partly_full), 0); + GC_ASSERT_EQ(nofl_block_count(&space->full), 0); + GC_ASSERT_EQ(nofl_block_count(&space->promoted), 0); + GC_ASSERT_EQ(nofl_block_count(&space->old), 0); + GC_ASSERT_EQ(nofl_block_count(&space->evacuation_targets), 0); size_t target_blocks = nofl_block_count(&space->empty); DEBUG("evacuation target block count: %zu\n", target_blocks); @@ -933,28 +878,17 @@ nofl_space_prepare_evacuation(struct nofl_space *space) { const size_t bucket_count = 33; size_t histogram[33] = {0,}; size_t bucket_size = NOFL_GRANULES_PER_BLOCK / 32; - size_t empties = 0; - for (size_t slab = 0; slab < space->nslabs; slab++) { - for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) { - struct nofl_block_summary *summary = &space->slabs[slab].summaries[block]; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) - continue; - if (!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) { - empties++; - continue; - } + { + uintptr_t block = space->to_sweep.blocks; + while (block) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - summary->free_granules; size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size; histogram[bucket]++; + block = nofl_block_summary_next(summary); } } - // Blocks which lack the NEEDS_SWEEP flag are empty, either because - // they have been removed from the pool and have the UNAVAILABLE flag - // set, or because they are on the empties or evacuation target - // lists. When evacuation starts, the empties list should be empty. - GC_ASSERT(empties == target_blocks); - // Now select a number of blocks that is likely to fill the space in // the target blocks. Prefer candidate blocks with fewer survivors // from the last GC, to increase expected free block yield. @@ -969,14 +903,11 @@ nofl_space_prepare_evacuation(struct nofl_space *space) { } // Having selected the number of blocks, now we set the evacuation - // candidate flag on all blocks. - for (size_t slab = 0; slab < space->nslabs; slab++) { - for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) { - struct nofl_block_summary *summary = &space->slabs[slab].summaries[block]; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) - continue; - if (!nofl_block_summary_has_flag(summary, NOFL_BLOCK_NEEDS_SWEEP)) - continue; + // candidate flag on all blocks that have live objects. + { + uintptr_t block = space->to_sweep.blocks; + while (block) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(block); size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - summary->free_granules; size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size; if (histogram[bucket]) { @@ -985,10 +916,50 @@ nofl_space_prepare_evacuation(struct nofl_space *space) { } else { nofl_block_summary_clear_flag(summary, NOFL_BLOCK_EVACUATE); } + block = nofl_block_summary_next(summary); } } } +static void +nofl_space_update_mark_patterns(struct nofl_space *space, + int advance_mark_mask) { + uint8_t survivor_mask = space->marked_mask; + uint8_t next_marked_mask = nofl_rotate_dead_survivor_marked(survivor_mask); + if (advance_mark_mask) + space->marked_mask = next_marked_mask; + space->live_mask = survivor_mask | next_marked_mask; + space->sweep_mask = broadcast_byte(space->live_mask); +} + +static void +nofl_space_prepare_gc(struct nofl_space *space, enum gc_collection_kind kind) { + nofl_space_update_mark_patterns(space, !(kind == GC_COLLECTION_MINOR)); +} + +static void +nofl_space_start_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) { + GC_ASSERT_EQ(nofl_block_count(&space->partly_full), 0); + GC_ASSERT_EQ(nofl_block_count(&space->to_sweep), 0); + + // Any block that was the target of allocation in the last cycle will need to + // be swept next cycle. + uintptr_t block; + while ((block = nofl_pop_block(&space->full))) + nofl_push_block(&space->to_sweep, block); + + if (gc_kind != GC_COLLECTION_MINOR) { + uintptr_t block; + while ((block = nofl_pop_block(&space->promoted))) + nofl_push_block(&space->to_sweep, block); + while ((block = nofl_pop_block(&space->old))) + nofl_push_block(&space->to_sweep, block); + } + + if (gc_kind == GC_COLLECTION_COMPACTING) + nofl_space_prepare_evacuation(space); +} + static void nofl_space_finish_evacuation(struct nofl_space *space) { // When evacuation began, the evacuation reserve was moved to the @@ -996,7 +967,6 @@ nofl_space_finish_evacuation(struct nofl_space *space) { // repopulate the reserve. GC_ASSERT(space->evacuating); space->evacuating = 0; - space->evacuation_reserve = space->evacuation_minimum_reserve; size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; size_t unavailable = nofl_block_count(&space->unavailable); size_t reserve = space->evacuation_minimum_reserve * (total - unavailable); @@ -1006,13 +976,15 @@ nofl_space_finish_evacuation(struct nofl_space *space) { if (!block) break; nofl_push_block(&space->evacuation_targets, block); } - { - // FIXME: We should avoid sweeping partly full blocks, but it's too annoying - // to do at the moment given the way sweeping works. - uintptr_t block; - do { - block = nofl_pop_block(&space->partly_full); - } while (block); +} + +static void +nofl_space_promote_blocks(struct nofl_space *space) { + uintptr_t block; + while ((block = nofl_pop_block(&space->promoted))) { + struct nofl_allocator alloc = { block, block, block }; + nofl_allocator_finish_sweeping_in_block(&alloc, space->sweep_mask); + nofl_push_block(&space->old, block); } } @@ -1022,50 +994,135 @@ nofl_size_to_granules(size_t size) { } static void -nofl_space_verify_before_restart(struct nofl_space *space) { - GC_ASSERT_EQ(nofl_block_count(&space->partly_full), 0); - // Iterate objects in each block, verifying that the END bytes correspond to - // the measured object size. - for (size_t slab = 0; slab < space->nslabs; slab++) { - for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) { - struct nofl_block_summary *summary = &space->slabs[slab].summaries[block]; - if (nofl_block_summary_has_flag(summary, NOFL_BLOCK_UNAVAILABLE)) - continue; - - uintptr_t addr = (uintptr_t)space->slabs[slab].blocks[block].data; - uintptr_t limit = addr + NOFL_BLOCK_SIZE; - uint8_t *meta = nofl_metadata_byte_for_addr(addr); - while (addr < limit) { - if (meta[0] & space->live_mask) { - struct gc_ref obj = gc_ref(addr); - size_t obj_bytes; - gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); - size_t granules = nofl_size_to_granules(obj_bytes); - GC_ASSERT(granules); - for (size_t granule = 0; granule < granules - 1; granule++) - GC_ASSERT(!(meta[granule] & NOFL_METADATA_BYTE_END)); - GC_ASSERT(meta[granules - 1] & NOFL_METADATA_BYTE_END); - meta += granules; - addr += granules * NOFL_GRANULE_SIZE; - } else { - meta++; - addr += NOFL_GRANULE_SIZE; - } +nofl_space_verify_sweepable_blocks(struct nofl_space *space, + struct nofl_block_list *list) +{ + uintptr_t addr = list->blocks; + while (addr) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(addr); + // Iterate objects in the block, verifying that the END bytes correspond to + // the measured object size. + uintptr_t limit = addr + NOFL_BLOCK_SIZE; + uint8_t *meta = nofl_metadata_byte_for_addr(addr); + while (addr < limit) { + if (meta[0] & space->live_mask) { + struct gc_ref obj = gc_ref(addr); + size_t obj_bytes; + gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); + size_t granules = nofl_size_to_granules(obj_bytes); + GC_ASSERT(granules); + for (size_t granule = 0; granule < granules - 1; granule++) + GC_ASSERT(!(meta[granule] & NOFL_METADATA_BYTE_END)); + GC_ASSERT(meta[granules - 1] & NOFL_METADATA_BYTE_END); + meta += granules; + addr += granules * NOFL_GRANULE_SIZE; + } else { + meta++; + addr += NOFL_GRANULE_SIZE; } - GC_ASSERT(addr == limit); } + GC_ASSERT(addr == limit); + addr = nofl_block_summary_next(summary); } } +static void +nofl_space_verify_swept_blocks(struct nofl_space *space, + struct nofl_block_list *list) { + uintptr_t addr = list->blocks; + while (addr) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(addr); + // Iterate objects in the block, verifying that the END bytes correspond to + // the measured object size. + uintptr_t limit = addr + NOFL_BLOCK_SIZE; + uint8_t *meta = nofl_metadata_byte_for_addr(addr); + while (addr < limit) { + if (meta[0]) { + GC_ASSERT(meta[0] & space->marked_mask); + GC_ASSERT_EQ(meta[0] & ~(space->marked_mask | NOFL_METADATA_BYTE_END), 0); + struct gc_ref obj = gc_ref(addr); + size_t obj_bytes; + gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); + size_t granules = nofl_size_to_granules(obj_bytes); + GC_ASSERT(granules); + for (size_t granule = 0; granule < granules - 1; granule++) + GC_ASSERT(!(meta[granule] & NOFL_METADATA_BYTE_END)); + GC_ASSERT(meta[granules - 1] & NOFL_METADATA_BYTE_END); + meta += granules; + addr += granules * NOFL_GRANULE_SIZE; + } else { + meta++; + addr += NOFL_GRANULE_SIZE; + } + } + GC_ASSERT(addr == limit); + addr = nofl_block_summary_next(summary); + } +} + +static void +nofl_space_verify_empty_blocks(struct nofl_space *space, + struct nofl_block_list *list, + int paged_in) { + uintptr_t addr = list->blocks; + while (addr) { + struct nofl_block_summary *summary = nofl_block_summary_for_addr(addr); + // Iterate objects in the block, verifying that the END bytes correspond to + // the measured object size. + uintptr_t limit = addr + NOFL_BLOCK_SIZE; + uint8_t *meta = nofl_metadata_byte_for_addr(addr); + while (addr < limit) { + GC_ASSERT_EQ(*meta, 0); + if (paged_in) { + char zeroes[NOFL_GRANULE_SIZE] = { 0, }; + GC_ASSERT_EQ(memcmp((char*)addr, zeroes, NOFL_GRANULE_SIZE), 0); + } + meta++; + addr += NOFL_GRANULE_SIZE; + } + GC_ASSERT(addr == limit); + addr = nofl_block_summary_next(summary); + } +} + +static void +nofl_space_verify_before_restart(struct nofl_space *space) { + nofl_space_verify_sweepable_blocks(space, &space->to_sweep); + nofl_space_verify_sweepable_blocks(space, &space->promoted); + // If there are full or partly full blocks, they were filled during + // evacuation. + nofl_space_verify_swept_blocks(space, &space->partly_full); + nofl_space_verify_swept_blocks(space, &space->full); + nofl_space_verify_swept_blocks(space, &space->old); + nofl_space_verify_empty_blocks(space, &space->empty, 1); + nofl_space_verify_empty_blocks(space, &space->unavailable, 0); + // GC_ASSERT(space->last_collection_was_minor || !nofl_block_count(&space->old)); +} + static void nofl_space_finish_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) { space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR); if (space->evacuating) nofl_space_finish_evacuation(space); - nofl_space_reset_sweeper(space); - nofl_space_update_mark_patterns(space, 0); + else { + space->evacuation_reserve = space->evacuation_minimum_reserve; + // If we were evacuating and preferentially allocated empty blocks + // to the evacuation reserve, return those blocks to the empty set + // for allocation by the mutator. + size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; + size_t unavailable = nofl_block_count(&space->unavailable); + size_t target = space->evacuation_minimum_reserve * (total - unavailable); + size_t reserve = nofl_block_count(&space->evacuation_targets); + while (reserve-- > target) + nofl_push_block(&space->empty, + nofl_pop_block(&space->evacuation_targets)); + } + + // FIXME: Promote concurrently instead of during the pause. + nofl_space_promote_blocks(space); nofl_space_reset_statistics(space); + nofl_space_update_mark_patterns(space, 0); if (GC_DEBUG) nofl_space_verify_before_restart(space); } @@ -1361,7 +1418,7 @@ nofl_allocate_slabs(size_t nslabs) { static int nofl_space_init(struct nofl_space *space, size_t size, int atomic, - double venerable_threshold) { + double promotion_threshold) { size = align_up(size, NOFL_BLOCK_SIZE); size_t reserved = align_up(size, NOFL_SLAB_SIZE); size_t nslabs = reserved / NOFL_SLAB_SIZE; @@ -1375,10 +1432,9 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic, space->nslabs = nslabs; space->low_addr = (uintptr_t) slabs; space->extent = reserved; - space->next_block = 0; space->evacuation_minimum_reserve = 0.02; space->evacuation_reserve = space->evacuation_minimum_reserve; - space->venerable_threshold = venerable_threshold; + space->promotion_threshold = promotion_threshold; for (size_t slab = 0; slab < nslabs; slab++) { for (size_t block = 0; block < NOFL_NONMETA_BLOCKS_PER_SLAB; block++) { uintptr_t addr = (uintptr_t)slabs[slab].blocks[block].data; @@ -1386,6 +1442,8 @@ nofl_space_init(struct nofl_space *space, size_t size, int atomic, nofl_push_unavailable_block(space, addr); reserved -= NOFL_BLOCK_SIZE; } else { + nofl_block_summary_set_flag(nofl_block_summary_for_addr(addr), + NOFL_BLOCK_ZERO); if (!nofl_push_evacuation_target_if_needed(space, addr)) nofl_push_empty_block(space, addr); } diff --git a/src/whippet.c b/src/whippet.c index 6e942d7da..1a00939b1 100644 --- a/src/whippet.c +++ b/src/whippet.c @@ -1024,7 +1024,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { determine_collection_kind(heap, requested_kind); int is_minor = gc_kind == GC_COLLECTION_MINOR; HEAP_EVENT(heap, prepare_gc, gc_kind); - nofl_space_update_mark_patterns(nofl_space, !is_minor); + nofl_space_prepare_gc(nofl_space, gc_kind); large_object_space_start_gc(lospace, is_minor); gc_extern_space_start_gc(exspace, is_minor); resolve_ephemerons_lazily(heap); @@ -1042,8 +1042,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation); detect_out_of_memory(heap); trace_pinned_roots_after_stop(heap); - if (gc_kind == GC_COLLECTION_COMPACTING) - nofl_space_prepare_evacuation(nofl_space); + nofl_space_start_gc(nofl_space, gc_kind); trace_roots_after_stop(heap); HEAP_EVENT(heap, roots_traced); gc_tracer_trace(&heap->tracer);