diff --git a/src/copy-space.h b/src/copy-space.h new file mode 100644 index 000000000..6f10a3f7b --- /dev/null +++ b/src/copy-space.h @@ -0,0 +1,566 @@ +#ifndef COPY_SPACE_H +#define COPY_SPACE_H + +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "assert.h" +#include "debug.h" +#include "gc-align.h" +#include "gc-attrs.h" +#include "gc-inline.h" +#include "spin.h" + +// A copy space: a block-structured space that traces via evacuation. + +#define COPY_SPACE_SLAB_SIZE (64 * 1024 * 1024) +#define COPY_SPACE_REGION_SIZE (64 * 1024) +#define COPY_SPACE_BLOCK_SIZE (2 * COPY_SPACE_REGION_SIZE) +#define COPY_SPACE_BLOCKS_PER_SLAB \ + (COPY_SPACE_SLAB_SIZE / COPY_SPACE_BLOCK_SIZE) +#define COPY_SPACE_HEADER_BYTES_PER_BLOCK \ + (COPY_SPACE_BLOCK_SIZE / COPY_SPACE_BLOCKS_PER_SLAB) +#define COPY_SPACE_HEADER_BLOCKS_PER_SLAB 1 +#define COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB \ + (COPY_SPACE_BLOCKS_PER_SLAB - COPY_SPACE_HEADER_BLOCKS_PER_SLAB) +#define COPY_SPACE_HEADER_BYTES_PER_SLAB \ + (COPY_SPACE_HEADER_BYTES_PER_BLOCK * COPY_SPACE_HEADER_BLOCKS_PER_SLAB) + +struct copy_space_slab; + +struct copy_space_slab_header { + union { + struct { + struct copy_space_slab *next; + struct copy_space_slab *prev; + unsigned incore_block_count; + }; + uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_SLAB]; + }; +}; +STATIC_ASSERT_EQ(sizeof(struct copy_space_slab_header), + COPY_SPACE_HEADER_BYTES_PER_SLAB); + +// Really just the block header. +struct copy_space_block { + union { + struct { + struct copy_space_block *next; + uint8_t in_core; + size_t allocated; // For partly-empty blocks. + }; + uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_BLOCK]; + }; +}; +STATIC_ASSERT_EQ(sizeof(struct copy_space_block), + COPY_SPACE_HEADER_BYTES_PER_BLOCK); + +struct copy_space_region { + char data[COPY_SPACE_REGION_SIZE]; +}; + +struct copy_space_block_payload { + struct copy_space_region regions[2]; +}; + +struct copy_space_slab { + struct copy_space_slab_header header; + struct copy_space_block headers[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB]; + struct copy_space_block_payload blocks[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB]; +}; +STATIC_ASSERT_EQ(sizeof(struct copy_space_slab), COPY_SPACE_SLAB_SIZE); + +static inline struct copy_space_block* +copy_space_block_header(struct copy_space_block_payload *payload) { + uintptr_t addr = (uintptr_t) payload; + uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE); + struct copy_space_slab *slab = (struct copy_space_slab*) base; + uintptr_t block_idx = + (addr / COPY_SPACE_BLOCK_SIZE) % COPY_SPACE_BLOCKS_PER_SLAB; + return &slab->headers[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB]; +} + +static inline struct copy_space_block_payload* +copy_space_block_payload(struct copy_space_block *block) { + uintptr_t addr = (uintptr_t) block; + uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE); + struct copy_space_slab *slab = (struct copy_space_slab*) base; + uintptr_t block_idx = + (addr / COPY_SPACE_HEADER_BYTES_PER_BLOCK) % COPY_SPACE_BLOCKS_PER_SLAB; + return &slab->blocks[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB]; +} + +static uint8_t +copy_space_object_region(struct gc_ref obj) { + return (gc_ref_value(obj) / COPY_SPACE_REGION_SIZE) & 1; +} + +struct copy_space_extent { + uintptr_t low_addr; + uintptr_t high_addr; +}; + +struct copy_space { + struct copy_space_block *empty; + struct copy_space_block *partly_full; + struct copy_space_block *full ALIGNED_TO_AVOID_FALSE_SHARING; + size_t allocated_bytes; + size_t fragmentation; + struct copy_space_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING; + ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING; + // The rest of these members are only changed rarely and with the heap + // lock. + uint8_t active_region ALIGNED_TO_AVOID_FALSE_SHARING; + size_t allocated_bytes_at_last_gc; + size_t fragmentation_at_last_gc; + struct copy_space_extent *extents; + size_t nextents; + struct copy_space_slab *slabs; + size_t nslabs; +}; + +struct copy_space_allocator { + uintptr_t hp; + uintptr_t limit; + struct copy_space_block *block; +}; + +static void +copy_space_push_block(struct copy_space_block **list, + struct copy_space_block *block) { + struct copy_space_block *next = + atomic_load_explicit(list, memory_order_acquire); + do { + block->next = next; + } while (!atomic_compare_exchange_weak(list, &next, block)); +} + +static struct copy_space_block* +copy_space_pop_block(struct copy_space_block **list) { + struct copy_space_block *head = + atomic_load_explicit(list, memory_order_acquire); + struct copy_space_block *next; + do { + if (!head) + return NULL; + } while (!atomic_compare_exchange_weak(list, &head, head->next)); + head->next = NULL; + return head; +} + +static struct copy_space_block* +copy_space_pop_empty_block(struct copy_space *space) { + struct copy_space_block *ret = copy_space_pop_block(&space->empty); + if (ret) + ret->allocated = 0; + return ret; +} + +static void +copy_space_push_empty_block(struct copy_space *space, + struct copy_space_block *block) { + copy_space_push_block(&space->empty, block); +} + +static struct copy_space_block* +copy_space_pop_full_block(struct copy_space *space) { + return copy_space_pop_block(&space->full); +} + +static void +copy_space_push_full_block(struct copy_space *space, + struct copy_space_block *block) { + copy_space_push_block(&space->full, block); +} + +static struct copy_space_block* +copy_space_pop_partly_full_block(struct copy_space *space) { + return copy_space_pop_block(&space->partly_full); +} + +static void +copy_space_push_partly_full_block(struct copy_space *space, + struct copy_space_block *block) { + copy_space_push_block(&space->partly_full, block); +} + +static struct copy_space_block* +copy_space_pop_paged_out_block(struct copy_space *space) { + return copy_space_pop_block(&space->paged_out); +} + +static void +copy_space_push_paged_out_block(struct copy_space *space, + struct copy_space_block *block) { + copy_space_push_block(&space->paged_out, block); +} + +static void +copy_space_page_out_block(struct copy_space *space, + struct copy_space_block *block) { + block->in_core = 0; + madvise(copy_space_block_payload(block), COPY_SPACE_BLOCK_SIZE, MADV_DONTNEED); + copy_space_push_paged_out_block(space, block); +} + +static struct copy_space_block* +copy_space_page_in_block(struct copy_space *space) { + struct copy_space_block* block = copy_space_pop_paged_out_block(space); + if (block) block->in_core = 1; + return block; +} + +static ssize_t +copy_space_request_release_memory(struct copy_space *space, size_t bytes) { + return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes; +} + +static int +copy_space_page_out_blocks_until_memory_released(struct copy_space *space) { + ssize_t pending = atomic_load(&space->bytes_to_page_out); + while (pending > 0) { + struct copy_space_block *block = copy_space_pop_empty_block(space); + if (!block) return 0; + copy_space_page_out_block(space, block); + pending = (atomic_fetch_sub(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE) + - COPY_SPACE_BLOCK_SIZE); + } + return 1; +} + +static void +copy_space_reacquire_memory(struct copy_space *space, size_t bytes) { + ssize_t pending = + atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes; + while (pending + COPY_SPACE_BLOCK_SIZE <= 0) { + struct copy_space_block *block = copy_space_page_in_block(space); + GC_ASSERT(block); + copy_space_push_empty_block(space, block); + pending = (atomic_fetch_add(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE) + + COPY_SPACE_BLOCK_SIZE); + } +} + +static inline void +copy_space_allocator_set_block(struct copy_space_allocator *alloc, + struct copy_space_block *block, + int active_region) { + struct copy_space_block_payload *payload = copy_space_block_payload(block); + struct copy_space_region *region = &payload->regions[active_region]; + alloc->block = block; + alloc->hp = (uintptr_t)®ion[0]; + alloc->limit = (uintptr_t)®ion[1]; +} + +static inline int +copy_space_allocator_acquire_block(struct copy_space_allocator *alloc, + struct copy_space_block *block, + int active_region) { + if (block) { + copy_space_allocator_set_block(alloc, block, active_region); + return 1; + } + return 0; +} + +static int +copy_space_allocator_acquire_empty_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + return copy_space_allocator_acquire_block(alloc, + copy_space_pop_empty_block(space), + space->active_region); +} + +static int +copy_space_allocator_acquire_partly_full_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + if (copy_space_allocator_acquire_block(alloc, + copy_space_pop_partly_full_block(space), + space->active_region)) { + alloc->hp += alloc->block->allocated; + return 1; + } + return 0; +} + +static void +copy_space_allocator_release_full_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + size_t fragmentation = alloc->limit - alloc->hp; + size_t allocated = COPY_SPACE_REGION_SIZE - alloc->block->allocated; + atomic_fetch_add_explicit(&space->allocated_bytes, allocated, + memory_order_relaxed); + if (fragmentation) + atomic_fetch_add_explicit(&space->fragmentation, fragmentation, + memory_order_relaxed); + copy_space_push_full_block(space, alloc->block); + alloc->hp = alloc->limit = 0; + alloc->block = NULL; +} + +static void +copy_space_allocator_release_partly_full_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + size_t allocated = alloc->hp & (COPY_SPACE_REGION_SIZE - 1); + if (allocated) { + atomic_fetch_add_explicit(&space->allocated_bytes, + allocated - alloc->block->allocated, + memory_order_relaxed); + alloc->block->allocated = allocated; + copy_space_push_partly_full_block(space, alloc->block); + } else { + // In this case, hp was bumped all the way to the limit, in which + // case allocated wraps to 0; the block is full. + atomic_fetch_add_explicit(&space->allocated_bytes, + COPY_SPACE_REGION_SIZE - alloc->block->allocated, + memory_order_relaxed); + copy_space_push_full_block(space, alloc->block); + } + alloc->hp = alloc->limit = 0; + alloc->block = NULL; +} + +static inline struct gc_ref +copy_space_allocate(struct copy_space_allocator *alloc, + struct copy_space *space, + size_t size, + void (*get_more_empty_blocks)(void *data), + void *data) { + GC_ASSERT(size > 0); + GC_ASSERT(size <= gc_allocator_large_threshold()); + size = align_up(size, gc_allocator_small_granule_size()); + + if (alloc->hp + size <= alloc->limit) + goto done; + + if (alloc->block) + copy_space_allocator_release_full_block(alloc, space); + while (copy_space_allocator_acquire_partly_full_block(alloc, space)) { + if (alloc->hp + size <= alloc->limit) + goto done; + copy_space_allocator_release_full_block(alloc, space); + } + while (!copy_space_allocator_acquire_empty_block(alloc, space)) + get_more_empty_blocks(data); + // The newly acquired block is empty and is therefore large enough for + // a small allocation. + +done: + struct gc_ref ret = gc_ref(alloc->hp); + alloc->hp += size; + return ret; +} + +static struct copy_space_block* +copy_space_append_block_lists(struct copy_space_block *head, + struct copy_space_block *tail) { + if (!head) return tail; + if (tail) { + struct copy_space_block *walk = head; + while (walk->next) + walk = walk->next; + walk->next = tail; + } + return head; +} + +static void +copy_space_flip(struct copy_space *space) { + // Mutators stopped, can access nonatomically. + struct copy_space_block *flip = space->full; + flip = copy_space_append_block_lists(space->partly_full, flip); + flip = copy_space_append_block_lists(space->empty, flip); + space->empty = flip; + space->partly_full = NULL; + space->full = NULL; + space->allocated_bytes = 0; + space->fragmentation = 0; + space->active_region ^= 1; +} + +static void +copy_space_finish_gc(struct copy_space *space) { + // Mutators stopped, can access nonatomically. + space->allocated_bytes_at_last_gc = space->allocated_bytes; + space->fragmentation_at_last_gc = space->fragmentation; +} + +static void +copy_space_gc_during_evacuation(void *data) { + // If space is really tight and reordering of objects during + // evacuation resulted in more end-of-block fragmentation and thus + // block use than before collection started, we can actually run out + // of memory while collecting. We should probably attempt to expand + // the heap here, at least by a single block; it's better than the + // alternatives. + fprintf(stderr, "Out of memory\n"); + GC_CRASH(); +} + +static inline int +copy_space_forward(struct copy_space *space, struct gc_edge edge, + struct gc_ref old_ref, struct copy_space_allocator *alloc) { + GC_ASSERT(copy_space_object_region(old_ref) != space->active_region); + struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); + + if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) + gc_atomic_forward_acquire(&fwd); + + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + case GC_FORWARDING_STATE_ABORTED: + default: + // Impossible. + GC_CRASH(); + case GC_FORWARDING_STATE_ACQUIRED: { + // We claimed the object successfully; evacuating is up to us. + size_t bytes = gc_atomic_forward_object_size(&fwd); + struct gc_ref new_ref = + copy_space_allocate(alloc, space, bytes, + copy_space_gc_during_evacuation, NULL); + // Copy object contents before committing, as we don't know what + // part of the object (if any) will be overwritten by the + // commit. + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), bytes); + gc_atomic_forward_commit(&fwd, new_ref); + gc_edge_update(edge, new_ref); + return 1; + } + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known, or evacuation aborts. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + break; + yield_for_spin(spin_count); + } + GC_ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); + // Fall through. + case GC_FORWARDING_STATE_FORWARDED: + // The object has been evacuated already. Update the edge; + // whoever forwarded the object will make sure it's eventually + // traced. + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 0; + } +} + +static int +copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge, + struct gc_ref old_ref) { + GC_ASSERT(copy_space_object_region(old_ref) != space->active_region); + struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + return 0; + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + break; + yield_for_spin(spin_count); + } + GC_ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); + // Fall through. + case GC_FORWARDING_STATE_FORWARDED: + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 1; + default: + GC_CRASH(); + } +} + +static inline int +copy_space_contains(struct copy_space *space, struct gc_ref ref) { + for (size_t i = 0; i < space->nextents; i++) + if (space->extents[i].low_addr <= gc_ref_value(ref) && + gc_ref_value(ref) < space->extents[i].high_addr) + return 1; + return 0; +} + +static inline void +copy_space_allocator_init(struct copy_space_allocator *alloc, + struct copy_space *space) { + memset(alloc, 0, sizeof(*alloc)); +} + +static inline void +copy_space_allocator_finish(struct copy_space_allocator *alloc, + struct copy_space *space) { + if (alloc->block) + copy_space_allocator_release_partly_full_block(alloc, space); +} + +static struct copy_space_slab* +copy_space_allocate_slabs(size_t nslabs) { + size_t size = nslabs * COPY_SPACE_SLAB_SIZE; + size_t extent = size + COPY_SPACE_SLAB_SIZE; + + char *mem = mmap(NULL, extent, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + perror("mmap failed"); + return NULL; + } + + uintptr_t base = (uintptr_t) mem; + uintptr_t end = base + extent; + uintptr_t aligned_base = align_up(base, COPY_SPACE_SLAB_SIZE); + uintptr_t aligned_end = aligned_base + size; + + if (aligned_base - base) + munmap((void*)base, aligned_base - base); + if (end - aligned_end) + munmap((void*)aligned_end, end - aligned_end); + + return (struct copy_space_slab*) aligned_base; +} + +static int +copy_space_init(struct copy_space *space, size_t size) { + size = align_up(size, COPY_SPACE_BLOCK_SIZE); + size_t reserved = align_up(size, COPY_SPACE_SLAB_SIZE); + size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE; + struct copy_space_slab *slabs = copy_space_allocate_slabs(nslabs); + if (!slabs) + return 0; + + space->empty = NULL; + space->partly_full = NULL; + space->full = NULL; + space->paged_out = NULL; + space->allocated_bytes = 0; + space->fragmentation = 0; + space->bytes_to_page_out = 0; + space->active_region = 0; + space->allocated_bytes_at_last_gc = 0; + space->fragmentation_at_last_gc = 0; + space->extents = calloc(1, sizeof(struct copy_space_extent)); + space->extents[0].low_addr = (uintptr_t) slabs; + space->extents[0].high_addr = space->extents[0].low_addr + reserved; + space->nextents = 1; + space->slabs = slabs; + space->nslabs = nslabs; + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = 0; idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; idx++) { + struct copy_space_block *block = &slabs[slab].headers[idx]; + if (reserved > size) { + block->in_core = 0; + copy_space_push_paged_out_block(space, block); + reserved -= COPY_SPACE_BLOCK_SIZE; + } else { + block->in_core = 1; + copy_space_push_empty_block(space, block); + } + } + } + return 1; +} + +#endif // COPY_SPACE_H diff --git a/src/pcc.c b/src/pcc.c index 1638abf85..b0aeddda0 100644 --- a/src/pcc.c +++ b/src/pcc.c @@ -12,6 +12,7 @@ #define GC_IMPL 1 #include "gc-internal.h" +#include "copy-space.h" #include "debug.h" #include "gc-align.h" #include "gc-inline.h" @@ -21,106 +22,8 @@ #include "spin.h" #include "pcc-attrs.h" -#define SLAB_SIZE (64 * 1024 * 1024) -#define REGION_SIZE (64 * 1024) -#define BLOCK_SIZE (2 * REGION_SIZE) -#define BLOCKS_PER_SLAB (SLAB_SIZE / BLOCK_SIZE) -#define HEADER_BYTES_PER_BLOCK (BLOCK_SIZE / BLOCKS_PER_SLAB) -#define HEADER_BLOCKS_PER_SLAB 1 -#define NONHEADER_BLOCKS_PER_SLAB (BLOCKS_PER_SLAB - HEADER_BLOCKS_PER_SLAB) -#define HEADER_BYTES_PER_SLAB (HEADER_BYTES_PER_BLOCK * HEADER_BLOCKS_PER_SLAB) - -struct pcc_slab; -struct pcc_block; - -struct pcc_slab_header { - union { - struct { - struct pcc_slab *next; - struct pcc_slab *prev; - unsigned incore_block_count; - }; - uint8_t padding[HEADER_BYTES_PER_SLAB]; - }; -}; -STATIC_ASSERT_EQ(sizeof(struct pcc_slab_header), - HEADER_BYTES_PER_SLAB); - -// Really just the block header. -struct pcc_block { - union { - struct { - struct pcc_block *next; - uint8_t in_core; - size_t allocated; // For partly-empty blocks. - }; - uint8_t padding[HEADER_BYTES_PER_BLOCK]; - }; -}; -STATIC_ASSERT_EQ(sizeof(struct pcc_block), - HEADER_BYTES_PER_BLOCK); - -struct pcc_region { - char data[REGION_SIZE]; -}; - -struct pcc_block_payload { - struct pcc_region regions[2]; -}; - -struct pcc_slab { - struct pcc_slab_header header; - struct pcc_block headers[NONHEADER_BLOCKS_PER_SLAB]; - struct pcc_block_payload blocks[NONHEADER_BLOCKS_PER_SLAB]; -}; -STATIC_ASSERT_EQ(sizeof(struct pcc_slab), SLAB_SIZE); - -static struct pcc_block *block_header(struct pcc_block_payload *payload) { - uintptr_t addr = (uintptr_t) payload; - uintptr_t base = align_down(addr, SLAB_SIZE); - struct pcc_slab *slab = (struct pcc_slab*) base; - uintptr_t block_idx = (addr / BLOCK_SIZE) % BLOCKS_PER_SLAB; - return &slab->headers[block_idx - HEADER_BLOCKS_PER_SLAB]; -} - -static struct pcc_block_payload *block_payload(struct pcc_block *block) { - uintptr_t addr = (uintptr_t) block; - uintptr_t base = align_down(addr, SLAB_SIZE); - struct pcc_slab *slab = (struct pcc_slab*) base; - uintptr_t block_idx = (addr / HEADER_BYTES_PER_BLOCK) % BLOCKS_PER_SLAB; - return &slab->blocks[block_idx - HEADER_BLOCKS_PER_SLAB]; -} - -static uint8_t pcc_object_region(struct gc_ref obj) { - return (gc_ref_value(obj) / REGION_SIZE) & 1; -} - -struct pcc_extent { - uintptr_t low_addr; - uintptr_t high_addr; -}; - -struct pcc_space { - struct pcc_block *empty; - struct pcc_block *partly_full; - struct pcc_block *full ALIGNED_TO_AVOID_FALSE_SHARING; - size_t full_block_count; - struct pcc_block *paged_out ALIGNED_TO_AVOID_FALSE_SHARING; - size_t fragmentation ALIGNED_TO_AVOID_FALSE_SHARING; - ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING; - // The rest of these members are only changed rarely and with the heap - // lock. - uint8_t active_region ALIGNED_TO_AVOID_FALSE_SHARING; - size_t live_bytes_at_last_gc; - size_t fragmentation_at_last_gc; - struct pcc_extent *extents; - size_t nextents; - struct pcc_slab *slabs; - size_t nslabs; -}; - struct gc_heap { - struct pcc_space pcc_space; + struct copy_space copy_space; struct large_object_space large_object_space; struct gc_extern_space *extern_space; size_t large_object_pages; @@ -150,14 +53,8 @@ struct gc_heap { #define MUTATOR_EVENT(mut, event, ...) \ (mut)->heap->event_listener.event((mut)->event_listener_data, ##__VA_ARGS__) -struct gc_allocator { - uintptr_t hp; - uintptr_t limit; - struct pcc_block *block; -}; - struct gc_mutator { - struct gc_allocator allocator; + struct copy_space_allocator allocator; struct gc_heap *heap; struct gc_mutator_roots *roots; void *event_listener_data; @@ -166,11 +63,11 @@ struct gc_mutator { }; struct gc_trace_worker_data { - struct gc_allocator allocator; + struct copy_space_allocator allocator; }; -static inline struct pcc_space* heap_pcc_space(struct gc_heap *heap) { - return &heap->pcc_space; +static inline struct copy_space* heap_copy_space(struct gc_heap *heap) { + return &heap->copy_space; } static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) { return &heap->large_object_space; @@ -182,202 +79,6 @@ static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) { return mutator->heap; } -static void push_block(struct pcc_block **list, - struct pcc_block *block) { - struct pcc_block *next = atomic_load_explicit(list, memory_order_acquire); - do { - block->next = next; - } while (!atomic_compare_exchange_weak(list, &next, block)); -} - -static struct pcc_block* pop_block(struct pcc_block **list) { - struct pcc_block *head = atomic_load_explicit(list, memory_order_acquire); - struct pcc_block *next; - do { - if (!head) - return NULL; - } while (!atomic_compare_exchange_weak(list, &head, head->next)); - head->next = NULL; - return head; -} - -static struct pcc_block* pop_empty_block(struct pcc_space *space) { - return pop_block(&space->empty); -} -static void push_empty_block(struct pcc_space *space, - struct pcc_block *block) { - push_block(&space->empty, block); -} - -static struct pcc_block* pop_full_block(struct pcc_space *space) { - return pop_block(&space->full); -} -static void push_full_block(struct pcc_space *space, - struct pcc_block *block) { - push_block(&space->full, block); - atomic_fetch_add_explicit(&space->full_block_count, 1, - memory_order_relaxed); -} - -static struct pcc_block* pop_partly_full_block(struct pcc_space *space) { - return pop_block(&space->partly_full); -} -static void push_partly_full_block(struct pcc_space *space, - struct pcc_block *block, - size_t allocated_bytes) { - GC_ASSERT(allocated_bytes); - block->allocated = allocated_bytes; - push_block(&space->partly_full, block); -} - -static struct pcc_block* pop_paged_out_block(struct pcc_space *space) { - return pop_block(&space->paged_out); -} -static void push_paged_out_block(struct pcc_space *space, - struct pcc_block *block) { - push_block(&space->paged_out, block); -} - -static void page_out_block(struct pcc_space *space, - struct pcc_block *block) { - block->in_core = 0; - madvise(block_payload(block), BLOCK_SIZE, MADV_DONTNEED); - push_paged_out_block(space, block); -} - -static struct pcc_block* page_in_block(struct pcc_space *space) { - struct pcc_block* block = pop_paged_out_block(space); - if (block) block->in_core = 1; - return block; -} - -static void record_fragmentation(struct pcc_space *space, - size_t bytes) { - atomic_fetch_add_explicit(&space->fragmentation, bytes, - memory_order_relaxed); -} - -static ssize_t pcc_space_request_release_memory(struct pcc_space *space, - size_t bytes) { - return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes; -} - -static int -pcc_space_page_out_blocks_until_memory_released(struct pcc_space *space) { - ssize_t pending = atomic_load(&space->bytes_to_page_out); - while (pending > 0) { - struct pcc_block *block = pop_empty_block(space); - if (!block) return 0; - page_out_block(space, block); - pending = - atomic_fetch_sub(&space->bytes_to_page_out, BLOCK_SIZE) - BLOCK_SIZE; - } - return 1; -} - -static void pcc_space_reacquire_memory(struct pcc_space *space, - size_t bytes) { - ssize_t pending = - atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes; - while (pending + BLOCK_SIZE <= 0) { - struct pcc_block *block = page_in_block(space); - GC_ASSERT(block); - push_empty_block(space, block); - pending = - atomic_fetch_add(&space->bytes_to_page_out, BLOCK_SIZE) + BLOCK_SIZE; - } -} - -static inline void allocator_set_block(struct gc_allocator *alloc, - struct pcc_block *block, - int active_region) { - struct pcc_block_payload *payload = block_payload(block); - struct pcc_region *region = &payload->regions[active_region]; - alloc->block = block; - alloc->hp = (uintptr_t)®ion[0]; - alloc->limit = (uintptr_t)®ion[1]; -} - -static inline int allocator_acquire_block(struct gc_allocator *alloc, - struct pcc_block *block, - int active_region) { - if (block) { - allocator_set_block(alloc, block, active_region); - return 1; - } - return 0; -} - -static int -allocator_acquire_empty_block(struct gc_allocator *alloc, - struct pcc_space *space) { - return allocator_acquire_block(alloc, pop_empty_block(space), - space->active_region); -} - -static int -allocator_acquire_partly_full_block(struct gc_allocator *alloc, - struct pcc_space *space) { - if (allocator_acquire_block(alloc, pop_partly_full_block(space), - space->active_region)) { - alloc->hp += alloc->block->allocated; - return 1; - } - return 0; -} - -static void allocator_release_full_block(struct gc_allocator *alloc, - struct pcc_space *space) { - record_fragmentation(space, alloc->limit - alloc->hp); - push_full_block(space, alloc->block); - alloc->hp = alloc->limit = 0; - alloc->block = NULL; -} - -static void allocator_release_partly_full_block(struct gc_allocator *alloc, - struct pcc_space *space) { - size_t allocated = alloc->hp & (REGION_SIZE - 1); - if (allocated) { - push_partly_full_block(space, alloc->block, allocated); - } else { - // Could be hp was bumped all the way to the limit, in which case - // allocated wraps to 0; in any case the block is full. - push_full_block(space, alloc->block); - } - alloc->hp = alloc->limit = 0; - alloc->block = NULL; -} - -static inline struct gc_ref allocate(struct gc_allocator *alloc, - struct pcc_space *space, - size_t size, - void (*get_more_empty_blocks)(void *data), - void *data) { - GC_ASSERT(size > 0); - GC_ASSERT(size <= gc_allocator_large_threshold()); - size = align_up(size, GC_ALIGNMENT); - - if (alloc->hp + size <= alloc->limit) - goto done; - - if (alloc->block) - allocator_release_full_block(alloc, space); - while (allocator_acquire_partly_full_block(alloc, space)) { - if (alloc->hp + size <= alloc->limit) - goto done; - allocator_release_full_block(alloc, space); - } - while (!allocator_acquire_empty_block(alloc, space)) - get_more_empty_blocks(data); - // The newly acquired block is empty and is therefore large enough for - // a small allocation. - -done: - struct gc_ref ret = gc_ref(alloc->hp); - alloc->hp += size; - return ret; -} - static void gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, struct gc_heap *heap, @@ -386,110 +87,10 @@ gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, struct gc_tracer *tracer, struct gc_heap *heap, struct gc_trace_worker *worker) { - struct gc_trace_worker_data data = {{0,0,NULL},}; + struct gc_trace_worker_data data; + copy_space_allocator_init(&data.allocator, heap_copy_space(heap)); f(tracer, heap, worker, &data); - if (data.allocator.block) - allocator_release_partly_full_block(&data.allocator, heap_pcc_space(heap)); -} - -static struct pcc_block* -append_block_lists(struct pcc_block *head, struct pcc_block *tail) { - if (!head) return tail; - if (tail) { - struct pcc_block *walk = head; - while (walk->next) - walk = walk->next; - walk->next = tail; - } - return head; -} - -static void pcc_space_flip(struct pcc_space *space) { - // Mutators stopped, can access nonatomically. - space->empty = - append_block_lists(space->empty, - append_block_lists(space->partly_full, space->full)); - space->partly_full = NULL; - space->full = NULL; - space->full_block_count = 0; - space->fragmentation = 0; - space->active_region ^= 1; -} - -static void pcc_space_finish_gc(struct pcc_space *space) { - // Mutators stopped, can access nonatomically. - space->live_bytes_at_last_gc = space->full_block_count * REGION_SIZE; - space->fragmentation_at_last_gc = space->fragmentation; -} - -static void get_more_empty_blocks_during_evacuation(void *data) { - // If space is really tight and reordering of objects during - // evacuation resulted in more end-of-block fragmentation and thus - // block use than before collection started, we can actually run out - // of memory while collecting. We should probably attempt to expand - // the heap here, at least by a single block; it's better than the - // alternatives. - fprintf(stderr, "Out of memory\n"); - GC_CRASH(); -} - -static inline int pcc_space_forward(struct pcc_space *space, - struct gc_edge edge, - struct gc_ref old_ref, - struct gc_trace_worker_data *data) { - GC_ASSERT(pcc_object_region(old_ref) != space->active_region); - struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); - - if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) - gc_atomic_forward_acquire(&fwd); - - switch (fwd.state) { - case GC_FORWARDING_STATE_NOT_FORWARDED: - case GC_FORWARDING_STATE_ABORTED: - default: - // Impossible. - GC_CRASH(); - case GC_FORWARDING_STATE_ACQUIRED: { - // We claimed the object successfully; evacuating is up to us. - size_t bytes = gc_atomic_forward_object_size(&fwd); - struct gc_ref new_ref = allocate(&data->allocator, space, bytes, - get_more_empty_blocks_during_evacuation, - NULL); - // Copy object contents before committing, as we don't know what - // part of the object (if any) will be overwritten by the - // commit. - memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), bytes); - gc_atomic_forward_commit(&fwd, new_ref); - gc_edge_update(edge, new_ref); - return 1; - } - case GC_FORWARDING_STATE_BUSY: - // Someone else claimed this object first. Spin until new address - // known, or evacuation aborts. - for (size_t spin_count = 0;; spin_count++) { - if (gc_atomic_forward_retry_busy(&fwd)) - break; - yield_for_spin(spin_count); - } - GC_ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); - // Fall through. - case GC_FORWARDING_STATE_FORWARDED: - // The object has been evacuated already. Update the edge; - // whoever forwarded the object will make sure it's eventually - // traced. - gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); - return 0; - } -} - -static inline int pcc_space_contains(struct pcc_space *space, - struct gc_ref ref) { - - for (size_t i = 0; i < space->nextents; i++) - if (space->extents[i].low_addr <= gc_ref_value(ref) && - gc_ref_value(ref) < space->extents[i].high_addr) - return 1; - return 0; + copy_space_allocator_finish(&data.allocator, heap_copy_space(heap)); } static inline int do_trace(struct gc_heap *heap, struct gc_edge edge, @@ -497,8 +98,9 @@ static inline int do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_trace_worker_data *data) { if (!gc_ref_is_heap_object(ref)) return 0; - if (GC_LIKELY(pcc_space_contains(heap_pcc_space(heap), ref))) - return pcc_space_forward(heap_pcc_space(heap), edge, ref, data); + if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref))) + return copy_space_forward(heap_copy_space(heap), edge, ref, + &data->allocator); else if (large_object_space_contains(heap_large_object_space(heap), ref)) return large_object_space_mark_object(heap_large_object_space(heap), ref); else @@ -523,30 +125,10 @@ int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { struct gc_ref ref = gc_edge_ref(edge); if (!gc_ref_is_heap_object(ref)) return 0; - if (GC_LIKELY(pcc_space_contains(heap_pcc_space(heap), ref))) { - struct gc_atomic_forward fwd = gc_atomic_forward_begin(ref); - switch (fwd.state) { - case GC_FORWARDING_STATE_NOT_FORWARDED: - return 0; - case GC_FORWARDING_STATE_BUSY: - // Someone else claimed this object first. Spin until new address - // known. - for (size_t spin_count = 0;; spin_count++) { - if (gc_atomic_forward_retry_busy(&fwd)) - break; - yield_for_spin(spin_count); - } - GC_ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); - // Fall through. - case GC_FORWARDING_STATE_FORWARDED: - gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); - return 1; - default: - GC_CRASH(); - } - } else if (large_object_space_contains(heap_large_object_space(heap), ref)) { + if (GC_LIKELY(copy_space_contains(heap_copy_space(heap), ref))) + return copy_space_forward_if_traced(heap_copy_space(heap), edge, ref); + if (large_object_space_contains(heap_large_object_space(heap), ref)) return large_object_space_is_copied(heap_large_object_space(heap), ref); - } GC_CRASH(); } @@ -571,6 +153,7 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { mut->heap = heap; mut->event_listener_data = heap->event_listener.mutator_added(heap->event_listener_data); + copy_space_allocator_init(&mut->allocator, heap_copy_space(heap)); heap_lock(heap); // We have no roots. If there is a GC currently in progress, we have // nothing to add. Just wait until it's done. @@ -590,8 +173,7 @@ static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { MUTATOR_EVENT(mut, mutator_removed); mut->heap = NULL; - if (mut->allocator.block) - allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap)); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); heap_lock(heap); heap->mutator_count--; if (mut->next) @@ -627,7 +209,7 @@ static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) { GC_ASSERT(npages <= previous); size_t bytes = (previous - npages) << heap_large_object_space(heap)->page_size_log2; - pcc_space_reacquire_memory(heap_pcc_space(heap), bytes); + copy_space_reacquire_memory(heap_copy_space(heap), bytes); } void gc_mutator_set_roots(struct gc_mutator *mut, @@ -654,8 +236,8 @@ tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, struct gc_trace_worker *worker) { #ifdef DEBUG - if (pcc_space_contains(heap_pcc_space(heap), ref)) - GC_ASSERT(pcc_object_region(ref) == heap_pcc_space(heap)->active_region); + if (copy_space_contains(heap_copy_space(heap), ref)) + GC_ASSERT(copy_space_object_region(ref) == heap_copy_space(heap)->active_region); #endif gc_trace_object(ref, tracer_visit, heap, worker, NULL); } @@ -726,8 +308,7 @@ static void pause_mutator_for_collection_without_lock(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); GC_ASSERT(mutators_are_stopping(heap)); MUTATOR_EVENT(mut, mutator_stopping); - if (mut->allocator.block) - allocator_release_full_block(&mut->allocator, heap_pcc_space(heap)); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); heap_lock(heap); pause_mutator_for_collection(heap, mut); heap_unlock(heap); @@ -794,7 +375,7 @@ static void sweep_ephemerons(struct gc_heap *heap) { static void collect(struct gc_mutator *mut) GC_NEVER_INLINE; static void collect(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); - struct pcc_space *cspace = heap_pcc_space(heap); + struct copy_space *copy_space = heap_copy_space(heap); struct large_object_space *lospace = heap_large_object_space(heap); struct gc_extern_space *exspace = heap_extern_space(heap); MUTATOR_EVENT(mut, mutator_cause_gc); @@ -808,7 +389,7 @@ static void collect(struct gc_mutator *mut) { HEAP_EVENT(heap, waiting_for_stop); wait_for_mutators_to_stop(heap); HEAP_EVENT(heap, mutators_stopped); - pcc_space_flip(cspace); + copy_space_flip(copy_space); gc_tracer_prepare(&heap->tracer); add_roots(heap); HEAP_EVENT(heap, roots_traced); @@ -821,18 +402,18 @@ static void collect(struct gc_mutator *mut) { HEAP_EVENT(heap, finalizers_traced); sweep_ephemerons(heap); gc_tracer_release(&heap->tracer); - pcc_space_finish_gc(cspace); + copy_space_finish_gc(copy_space); large_object_space_finish_gc(lospace, 0); gc_extern_space_finish_gc(exspace, 0); heap->count++; heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); - size_t live_size = (cspace->live_bytes_at_last_gc + + size_t live_size = (copy_space->allocated_bytes_at_last_gc + large_object_space_size_at_last_collection(lospace)); HEAP_EVENT(heap, live_data_size, live_size); maybe_grow_heap(heap); - if (!pcc_space_page_out_blocks_until_memory_released(cspace)) { + if (!copy_space_page_out_blocks_until_memory_released(copy_space)) { fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n", - heap->size, cspace->nslabs); + heap->size, copy_space->nslabs); GC_CRASH(); } HEAP_EVENT(heap, restarting_mutators); @@ -841,8 +422,7 @@ static void collect(struct gc_mutator *mut) { static void trigger_collection(struct gc_mutator *mut) { struct gc_heap *heap = mutator_heap(mut); - if (mut->allocator.block) - allocator_release_full_block(&mut->allocator, heap_pcc_space(heap)); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); heap_lock(heap); long epoch = heap->count; while (mutators_are_stopping(heap)) @@ -862,9 +442,9 @@ static void* allocate_large(struct gc_mutator *mut, size_t size) { size_t npages = large_object_space_npages(space, size); - pcc_space_request_release_memory(heap_pcc_space(heap), + copy_space_request_release_memory(heap_copy_space(heap), npages << space->page_size_log2); - while (!pcc_space_page_out_blocks_until_memory_released(heap_pcc_space(heap))) + while (!copy_space_page_out_blocks_until_memory_released(heap_copy_space(heap))) trigger_collection(mut); atomic_fetch_add(&heap->large_object_pages, npages); @@ -890,10 +470,13 @@ void* gc_allocate_slow(struct gc_mutator *mut, size_t size) { if (size > gc_allocator_large_threshold()) return allocate_large(mut, size); - return gc_ref_heap_object(allocate(&mut->allocator, - heap_pcc_space(mutator_heap(mut)), - size, get_more_empty_blocks_for_mutator, - mut)); + struct gc_ref ret = copy_space_allocate(&mut->allocator, + heap_copy_space(mutator_heap(mut)), + size, + get_more_empty_blocks_for_mutator, + mut); + gc_clear_fresh_allocation(ret, size); + return gc_ref_heap_object(ret); } void* gc_allocate_pointerless(struct gc_mutator *mut, size_t size) { @@ -939,30 +522,6 @@ void gc_set_finalizer_callback(struct gc_heap *heap, gc_finalizer_state_set_callback(heap->finalizer_state, callback); } -static struct pcc_slab* allocate_slabs(size_t nslabs) { - size_t size = nslabs * SLAB_SIZE; - size_t extent = size + SLAB_SIZE; - - char *mem = mmap(NULL, extent, PROT_READ|PROT_WRITE, - MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { - perror("mmap failed"); - return NULL; - } - - uintptr_t base = (uintptr_t) mem; - uintptr_t end = base + extent; - uintptr_t aligned_base = align_up(base, SLAB_SIZE); - uintptr_t aligned_end = aligned_base + size; - - if (aligned_base - base) - munmap((void*)base, aligned_base - base); - if (end - aligned_end) - munmap((void*)aligned_end, end - aligned_end); - - return (struct pcc_slab*) aligned_base; -} - static int heap_prepare_pending_ephemerons(struct gc_heap *heap) { struct gc_pending_ephemerons *cur = heap->pending_ephemerons; size_t target = heap->size * heap->pending_ephemerons_size_factor; @@ -1024,45 +583,6 @@ static int heap_init(struct gc_heap *heap, const struct gc_options *options) { return 1; } -static int pcc_space_init(struct pcc_space *space, struct gc_heap *heap) { - size_t size = align_up(heap->size, SLAB_SIZE); - size_t nslabs = size / SLAB_SIZE; - struct pcc_slab *slabs = allocate_slabs(nslabs); - if (!slabs) - return 0; - - space->empty = NULL; - space->partly_full = NULL; - space->full = NULL; - space->full_block_count = 0; - space->paged_out = NULL; - space->fragmentation = 0; - space->bytes_to_page_out = 0; - space->active_region = 0; - space->live_bytes_at_last_gc = 0; - space->fragmentation_at_last_gc = 0; - space->extents = calloc(1, sizeof(struct pcc_extent)); - space->extents[0].low_addr = (uintptr_t) slabs; - space->extents[0].high_addr = space->extents[0].low_addr + size; - space->nextents = 1; - space->slabs = slabs; - space->nslabs = nslabs; - for (size_t slab = 0; slab < nslabs; slab++) { - for (size_t idx = 0; idx < NONHEADER_BLOCKS_PER_SLAB; idx++) { - struct pcc_block *block = &slabs[slab].headers[idx]; - if (size > heap->size) { - block->in_core = 0; - push_paged_out_block(space, block); - size -= BLOCK_SIZE; - } else { - block->in_core = 1; - push_empty_block(space, block); - } - } - } - return 1; -} - int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, struct gc_heap **heap, struct gc_mutator **mut, struct gc_event_listener event_listener, @@ -1071,9 +591,9 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD); GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator)); GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), - offsetof(struct gc_allocator, hp)); + offsetof(struct copy_space_allocator, hp)); GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), - offsetof(struct gc_allocator, limit)); + offsetof(struct copy_space_allocator, limit)); if (options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { fprintf(stderr, "fixed heap size is currently required\n"); @@ -1090,8 +610,8 @@ int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, (*heap)->event_listener_data = event_listener_data; HEAP_EVENT(*heap, init, (*heap)->size); - struct pcc_space *space = heap_pcc_space(*heap); - if (!pcc_space_init(space, *heap)) { + struct copy_space *space = heap_copy_space(*heap); + if (!copy_space_init(space, (*heap)->size)) { free(*heap); *heap = NULL; return 0; @@ -1122,8 +642,7 @@ void gc_finish_for_thread(struct gc_mutator *mut) { static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { GC_ASSERT(mut->next == NULL); - if (mut->allocator.block) - allocator_release_partly_full_block(&mut->allocator, heap_pcc_space(heap)); + copy_space_allocator_finish(&mut->allocator, heap_copy_space(heap)); heap_lock(heap); heap->inactive_mutator_count++; if (all_mutators_stopped(heap))