1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-09 23:40:29 +02:00
guile/whippet.h
Andy Wingo 52166fe286 Add gc_edge data structure
Less casting in user programs, and it's a step on the way to evacuation
in whippet.
2022-07-20 14:40:47 +02:00

1296 lines
44 KiB
C

#include <pthread.h>
#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include "assert.h"
#include "debug.h"
#include "inline.h"
#include "large-object-space.h"
#include "precise-roots.h"
#ifdef GC_PARALLEL_TRACE
#include "parallel-tracer.h"
#else
#include "serial-tracer.h"
#endif
#define GRANULE_SIZE 16
#define GRANULE_SIZE_LOG_2 4
#define MEDIUM_OBJECT_THRESHOLD 256
#define MEDIUM_OBJECT_GRANULE_THRESHOLD 16
#define LARGE_OBJECT_THRESHOLD 8192
#define LARGE_OBJECT_GRANULE_THRESHOLD 512
STATIC_ASSERT_EQ(GRANULE_SIZE, 1 << GRANULE_SIZE_LOG_2);
STATIC_ASSERT_EQ(MEDIUM_OBJECT_THRESHOLD,
MEDIUM_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
STATIC_ASSERT_EQ(LARGE_OBJECT_THRESHOLD,
LARGE_OBJECT_GRANULE_THRESHOLD * GRANULE_SIZE);
// Each granule has one metadata byte stored in a side table, used for
// mark bits but also for other per-object metadata. Already we were
// using a byte instead of a bit to facilitate parallel marking.
// (Parallel markers are allowed to race.) Turns out we can put a
// pinned bit there too, for objects that can't be moved. Actually
// there are two pinned bits: one that's managed by the collector, which
// pins referents of conservative roots, and one for pins managed
// externally (maybe because the mutator requested a pin.) Then there's
// a "remembered" bit, indicating that the object should be scanned for
// references to the nursery. If the remembered bit is set, the
// corresponding remset byte should also be set in the slab (see below).
//
// Getting back to mark bits -- because we want to allow for
// conservative roots, we need to know whether an address indicates an
// object or not. That means that when an object is allocated, it has
// to set a bit, somewhere. In our case we use the metadata byte, and
// set the "young" bit. In future we could use this for generational
// GC, with the sticky mark bit strategy.
//
// When an object becomes dead after a GC, it will still have a bit set
// -- maybe the young bit, or maybe a survivor bit. The sweeper has to
// clear these bits before the next collection. But, for concurrent
// marking, we will also be marking "live" objects, updating their mark
// bits. So there are four object states concurrently observable:
// young, dead, survivor, and marked. (If we didn't have concurrent
// marking we would still need the "marked" state, because marking
// mutator roots before stopping is also a form of concurrent marking.)
// Even though these states are mutually exclusive, we use separate bits
// for them because we have the space. After each collection, the dead,
// survivor, and marked states rotate by one bit.
enum metadata_byte {
METADATA_BYTE_NONE = 0,
METADATA_BYTE_YOUNG = 1,
METADATA_BYTE_MARK_0 = 2,
METADATA_BYTE_MARK_1 = 4,
METADATA_BYTE_MARK_2 = 8,
METADATA_BYTE_END = 16,
METADATA_BYTE_PINNED = 32,
METADATA_BYTE_PERMAPINNED = 64,
METADATA_BYTE_REMEMBERED = 128
};
static uint8_t rotate_dead_survivor_marked(uint8_t mask) {
uint8_t all =
METADATA_BYTE_MARK_0 | METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
return ((mask << 1) | (mask >> 2)) & all;
}
#define SLAB_SIZE (4 * 1024 * 1024)
#define BLOCK_SIZE (64 * 1024)
#define METADATA_BYTES_PER_BLOCK (BLOCK_SIZE / GRANULE_SIZE)
#define BLOCKS_PER_SLAB (SLAB_SIZE / BLOCK_SIZE)
#define META_BLOCKS_PER_SLAB (METADATA_BYTES_PER_BLOCK * BLOCKS_PER_SLAB / BLOCK_SIZE)
#define NONMETA_BLOCKS_PER_SLAB (BLOCKS_PER_SLAB - META_BLOCKS_PER_SLAB)
#define METADATA_BYTES_PER_SLAB (NONMETA_BLOCKS_PER_SLAB * METADATA_BYTES_PER_BLOCK)
#define SLACK_METADATA_BYTES_PER_SLAB (META_BLOCKS_PER_SLAB * METADATA_BYTES_PER_BLOCK)
#define REMSET_BYTES_PER_BLOCK (SLACK_METADATA_BYTES_PER_SLAB / BLOCKS_PER_SLAB)
#define REMSET_BYTES_PER_SLAB (REMSET_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB)
#define SLACK_REMSET_BYTES_PER_SLAB (REMSET_BYTES_PER_BLOCK * META_BLOCKS_PER_SLAB)
#define SUMMARY_BYTES_PER_BLOCK (SLACK_REMSET_BYTES_PER_SLAB / BLOCKS_PER_SLAB)
#define SUMMARY_BYTES_PER_SLAB (SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB)
#define SLACK_SUMMARY_BYTES_PER_SLAB (SUMMARY_BYTES_PER_BLOCK * META_BLOCKS_PER_SLAB)
#define HEADER_BYTES_PER_SLAB SLACK_SUMMARY_BYTES_PER_SLAB
struct slab;
struct slab_header {
union {
struct {
struct slab *next;
struct slab *prev;
};
uint8_t padding[HEADER_BYTES_PER_SLAB];
};
};
STATIC_ASSERT_EQ(sizeof(struct slab_header), HEADER_BYTES_PER_SLAB);
// Sometimes we want to put a block on a singly-linked list. For that
// there's a pointer reserved in the block summary. But because the
// pointer is aligned (32kB on 32-bit, 64kB on 64-bit), we can portably
// hide up to 15 flags in the low bits. These flags can be accessed
// non-atomically by the mutator when it owns a block; otherwise they
// need to be accessed atomically.
enum block_summary_flag {
BLOCK_OUT_FOR_THREAD = 0x1,
BLOCK_HAS_PIN = 0x2,
BLOCK_PAGED_OUT = 0x4,
BLOCK_NEEDS_SWEEP = 0x8,
BLOCK_UNAVAILABLE = 0x10,
BLOCK_FLAG_UNUSED_5 = 0x20,
BLOCK_FLAG_UNUSED_6 = 0x40,
BLOCK_FLAG_UNUSED_7 = 0x80,
BLOCK_FLAG_UNUSED_8 = 0x100,
BLOCK_FLAG_UNUSED_9 = 0x200,
BLOCK_FLAG_UNUSED_10 = 0x400,
BLOCK_FLAG_UNUSED_11 = 0x800,
BLOCK_FLAG_UNUSED_12 = 0x1000,
BLOCK_FLAG_UNUSED_13 = 0x2000,
BLOCK_FLAG_UNUSED_14 = 0x4000,
};
struct block_summary {
union {
struct {
//struct block *next;
// Counters related to previous collection: how many holes there
// were, and how much space they had.
uint16_t hole_count;
uint16_t free_granules;
// Counters related to allocation since previous collection:
// wasted space due to fragmentation.
uint16_t holes_with_fragmentation;
uint16_t fragmentation_granules;
// After a block is swept, if it's empty it goes on the empties
// list. Otherwise if it's not immediately used by a mutator (as
// is usually the case), it goes on the swept list. Both of these
// lists use this field. But as the next element in the field is
// block-aligned, we stash flags in the low bits.
uintptr_t next_and_flags;
};
uint8_t padding[SUMMARY_BYTES_PER_BLOCK];
};
};
STATIC_ASSERT_EQ(sizeof(struct block_summary), SUMMARY_BYTES_PER_BLOCK);
struct block {
char data[BLOCK_SIZE];
};
struct slab {
struct slab_header header;
struct block_summary summaries[NONMETA_BLOCKS_PER_SLAB];
uint8_t remsets[REMSET_BYTES_PER_SLAB];
uint8_t metadata[METADATA_BYTES_PER_SLAB];
struct block blocks[NONMETA_BLOCKS_PER_SLAB];
};
STATIC_ASSERT_EQ(sizeof(struct slab), SLAB_SIZE);
static struct slab *object_slab(void *obj) {
uintptr_t addr = (uintptr_t) obj;
uintptr_t base = addr & ~(SLAB_SIZE - 1);
return (struct slab*) base;
}
static uint8_t *object_metadata_byte(void *obj) {
uintptr_t addr = (uintptr_t) obj;
uintptr_t base = addr & ~(SLAB_SIZE - 1);
uintptr_t granule = (addr & (SLAB_SIZE - 1)) >> GRANULE_SIZE_LOG_2;
return (uint8_t*) (base + granule);
}
#define GRANULES_PER_BLOCK (BLOCK_SIZE / GRANULE_SIZE)
#define GRANULES_PER_REMSET_BYTE (GRANULES_PER_BLOCK / REMSET_BYTES_PER_BLOCK)
static uint8_t *object_remset_byte(void *obj) {
uintptr_t addr = (uintptr_t) obj;
uintptr_t base = addr & ~(SLAB_SIZE - 1);
uintptr_t granule = (addr & (SLAB_SIZE - 1)) >> GRANULE_SIZE_LOG_2;
uintptr_t remset_byte = granule / GRANULES_PER_REMSET_BYTE;
return (uint8_t*) (base + remset_byte);
}
static struct block_summary* block_summary_for_addr(uintptr_t addr) {
uintptr_t base = addr & ~(SLAB_SIZE - 1);
uintptr_t block = (addr & (SLAB_SIZE - 1)) / BLOCK_SIZE;
return (struct block_summary*) (base + block * sizeof(struct block_summary));
}
static uintptr_t block_summary_has_flag(struct block_summary *summary,
enum block_summary_flag flag) {
return summary->next_and_flags & flag;
}
static void block_summary_set_flag(struct block_summary *summary,
enum block_summary_flag flag) {
summary->next_and_flags |= flag;
}
static void block_summary_clear_flag(struct block_summary *summary,
enum block_summary_flag flag) {
summary->next_and_flags &= ~(uintptr_t)flag;
}
static uintptr_t block_summary_next(struct block_summary *summary) {
return summary->next_and_flags & ~(BLOCK_SIZE - 1);
}
static void block_summary_set_next(struct block_summary *summary,
uintptr_t next) {
ASSERT((next & (BLOCK_SIZE - 1)) == 0);
summary->next_and_flags =
(summary->next_and_flags & (BLOCK_SIZE - 1)) | next;
}
static void push_block(uintptr_t *loc, size_t *count, uintptr_t block) {
struct block_summary *summary = block_summary_for_addr(block);
uintptr_t next = atomic_load_explicit(loc, memory_order_acquire);
do {
block_summary_set_next(summary, next);
} while (!atomic_compare_exchange_weak(loc, &next, block));
atomic_fetch_add_explicit(count, 1, memory_order_acq_rel);
}
static uintptr_t pop_block(uintptr_t *loc, size_t *count) {
uintptr_t head = atomic_load_explicit(loc, memory_order_acquire);
struct block_summary *summary;
uintptr_t next;
do {
if (!head)
return 0;
summary = block_summary_for_addr(head);
next = block_summary_next(summary);
} while (!atomic_compare_exchange_weak(loc, &head, next));
block_summary_set_next(summary, 0);
atomic_fetch_sub_explicit(count, 1, memory_order_acq_rel);
return head;
}
static uintptr_t align_up(uintptr_t addr, size_t align) {
return (addr + align - 1) & ~(align-1);
}
static inline size_t size_to_granules(size_t size) {
return (size + GRANULE_SIZE - 1) >> GRANULE_SIZE_LOG_2;
}
// Alloc kind is in bits 0-7, for live objects.
static const uintptr_t gcobj_alloc_kind_mask = 0xff;
static const uintptr_t gcobj_alloc_kind_shift = 0;
static inline uint8_t tag_live_alloc_kind(uintptr_t tag) {
return (tag >> gcobj_alloc_kind_shift) & gcobj_alloc_kind_mask;
}
static inline uintptr_t tag_live(uint8_t alloc_kind) {
return ((uintptr_t)alloc_kind << gcobj_alloc_kind_shift);
}
struct gcobj {
union {
uintptr_t tag;
uintptr_t words[0];
void *pointers[0];
};
};
struct mark_space {
uint64_t sweep_mask;
uint8_t live_mask;
uint8_t marked_mask;
uintptr_t low_addr;
size_t extent;
size_t heap_size;
uintptr_t next_block; // atomically
uintptr_t empty_blocks; // atomically
size_t empty_blocks_count; // atomically
uintptr_t unavailable_blocks; // atomically
size_t unavailable_blocks_count; // atomically
ssize_t pending_unavailable_bytes; // atomically
struct slab *slabs;
size_t nslabs;
uintptr_t granules_freed_by_last_collection; // atomically
uintptr_t fragmentation_granules_since_last_collection; // atomically
};
struct heap {
struct mark_space mark_space;
struct large_object_space large_object_space;
size_t large_object_pages;
pthread_mutex_t lock;
pthread_cond_t collector_cond;
pthread_cond_t mutator_cond;
size_t size;
int collecting;
int multithreaded;
size_t active_mutator_count;
size_t mutator_count;
struct handle *global_roots;
struct mutator_mark_buf *mutator_roots;
long count;
struct mutator *deactivated_mutators;
struct tracer tracer;
};
struct mutator_mark_buf {
struct mutator_mark_buf *next;
size_t size;
size_t capacity;
struct gcobj **objects;
};
struct mutator {
// Bump-pointer allocation into holes.
uintptr_t alloc;
uintptr_t sweep;
uintptr_t block;
struct heap *heap;
struct handle *roots;
struct mutator_mark_buf mark_buf;
struct mutator *next;
};
static inline struct tracer* heap_tracer(struct heap *heap) {
return &heap->tracer;
}
static inline struct mark_space* heap_mark_space(struct heap *heap) {
return &heap->mark_space;
}
static inline struct large_object_space* heap_large_object_space(struct heap *heap) {
return &heap->large_object_space;
}
static inline struct heap* mutator_heap(struct mutator *mutator) {
return mutator->heap;
}
#define GC_HEADER uintptr_t _gc_header
static inline void clear_memory(uintptr_t addr, size_t size) {
memset((char*)addr, 0, size);
}
static void collect(struct mutator *mut) NEVER_INLINE;
static inline uint8_t* mark_byte(struct mark_space *space, struct gcobj *obj) {
return object_metadata_byte(obj);
}
static inline int mark_space_mark_object(struct mark_space *space,
struct gc_edge edge) {
struct gcobj *obj = dereference_edge(edge);
uint8_t *loc = object_metadata_byte(obj);
uint8_t byte = *loc;
if (byte & space->marked_mask)
return 0;
uint8_t mask = METADATA_BYTE_YOUNG | METADATA_BYTE_MARK_0
| METADATA_BYTE_MARK_1 | METADATA_BYTE_MARK_2;
*loc = (byte & ~mask) | space->marked_mask;
return 1;
}
static inline int mark_space_contains(struct mark_space *space,
struct gcobj *obj) {
uintptr_t addr = (uintptr_t)obj;
return addr - space->low_addr < space->extent;
}
static inline int large_object_space_mark_object(struct large_object_space *space,
struct gcobj *obj) {
return large_object_space_copy(space, (uintptr_t)obj);
}
static inline int trace_edge(struct heap *heap, struct gc_edge edge) {
struct gcobj *obj = dereference_edge(edge);
if (!obj)
return 0;
else if (LIKELY(mark_space_contains(heap_mark_space(heap), obj)))
return mark_space_mark_object(heap_mark_space(heap), edge);
else if (large_object_space_contains(heap_large_object_space(heap), obj))
return large_object_space_mark_object(heap_large_object_space(heap),
obj);
else
abort();
}
static inline void trace_one(struct gcobj *obj, void *mark_data) {
switch (tag_live_alloc_kind(obj->tag)) {
#define SCAN_OBJECT(name, Name, NAME) \
case ALLOC_KIND_##NAME: \
visit_##name##_fields((Name*)obj, tracer_visit, mark_data); \
break;
FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT)
#undef SCAN_OBJECT
default:
abort ();
}
}
static int heap_has_multiple_mutators(struct heap *heap) {
return atomic_load_explicit(&heap->multithreaded, memory_order_relaxed);
}
static int mutators_are_stopping(struct heap *heap) {
return atomic_load_explicit(&heap->collecting, memory_order_relaxed);
}
static inline void heap_lock(struct heap *heap) {
pthread_mutex_lock(&heap->lock);
}
static inline void heap_unlock(struct heap *heap) {
pthread_mutex_unlock(&heap->lock);
}
static void add_mutator(struct heap *heap, struct mutator *mut) {
mut->heap = heap;
heap_lock(heap);
// We have no roots. If there is a GC currently in progress, we have
// nothing to add. Just wait until it's done.
while (mutators_are_stopping(heap))
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
if (heap->mutator_count == 1)
heap->multithreaded = 1;
heap->active_mutator_count++;
heap->mutator_count++;
heap_unlock(heap);
}
static void remove_mutator(struct heap *heap, struct mutator *mut) {
mut->heap = NULL;
heap_lock(heap);
heap->active_mutator_count--;
heap->mutator_count--;
// We have no roots. If there is a GC stop currently in progress,
// maybe tell the controller it can continue.
if (mutators_are_stopping(heap) && heap->active_mutator_count == 0)
pthread_cond_signal(&heap->collector_cond);
heap_unlock(heap);
}
static void request_mutators_to_stop(struct heap *heap) {
ASSERT(!mutators_are_stopping(heap));
atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed);
}
static void allow_mutators_to_continue(struct heap *heap) {
ASSERT(mutators_are_stopping(heap));
ASSERT(heap->active_mutator_count == 0);
heap->active_mutator_count++;
atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed);
ASSERT(!mutators_are_stopping(heap));
pthread_cond_broadcast(&heap->mutator_cond);
}
static void push_unavailable_block(struct mark_space *space, uintptr_t block) {
struct block_summary *summary = block_summary_for_addr(block);
ASSERT(!block_summary_has_flag(summary, BLOCK_NEEDS_SWEEP));
ASSERT(!block_summary_has_flag(summary, BLOCK_UNAVAILABLE));
block_summary_set_flag(summary, BLOCK_UNAVAILABLE);
madvise((void*)block, BLOCK_SIZE, MADV_DONTNEED);
push_block(&space->unavailable_blocks, &space->unavailable_blocks_count,
block);
}
static uintptr_t pop_unavailable_block(struct mark_space *space) {
uintptr_t block = pop_block(&space->unavailable_blocks,
&space->unavailable_blocks_count);
if (!block)
return 0;
struct block_summary *summary = block_summary_for_addr(block);
ASSERT(block_summary_has_flag(summary, BLOCK_UNAVAILABLE));
block_summary_clear_flag(summary, BLOCK_UNAVAILABLE);
return block;
}
static uintptr_t pop_empty_block(struct mark_space *space) {
return pop_block(&space->empty_blocks, &space->empty_blocks_count);
}
static void push_empty_block(struct mark_space *space, uintptr_t block) {
ASSERT(!block_summary_has_flag(block_summary_for_addr(block),
BLOCK_NEEDS_SWEEP));
push_block(&space->empty_blocks, &space->empty_blocks_count, block);
}
static ssize_t mark_space_request_release_memory(struct mark_space *space,
size_t bytes) {
return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes;
}
static void mark_space_reacquire_memory(struct mark_space *space,
size_t bytes) {
ssize_t pending =
atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes;
while (pending + BLOCK_SIZE <= 0) {
uintptr_t block = pop_unavailable_block(space);
ASSERT(block);
push_empty_block(space, block);
pending = atomic_fetch_add(&space->pending_unavailable_bytes, BLOCK_SIZE)
+ BLOCK_SIZE;
}
}
static size_t next_hole(struct mutator *mut);
static int sweep_until_memory_released(struct mutator *mut) {
struct mark_space *space = heap_mark_space(mutator_heap(mut));
ssize_t pending = atomic_load_explicit(&space->pending_unavailable_bytes,
memory_order_acquire);
// First try to unmap previously-identified empty blocks. If pending
// > 0 and other mutators happen to identify empty blocks, they will
// be unmapped directly and moved to the unavailable list.
while (pending > 0) {
uintptr_t block = pop_empty_block(space);
if (!block)
break;
push_unavailable_block(space, block);
pending = atomic_fetch_sub(&space->pending_unavailable_bytes, BLOCK_SIZE);
pending -= BLOCK_SIZE;
}
// Otherwise, sweep, transitioning any empty blocks to unavailable and
// throwing away any non-empty block. A bit wasteful but hastening
// the next collection is a reasonable thing to do here.
while (pending > 0) {
if (!next_hole(mut))
return 0;
pending = atomic_load_explicit(&space->pending_unavailable_bytes,
memory_order_acquire);
}
return pending <= 0;
}
static void heap_reset_large_object_pages(struct heap *heap, size_t npages) {
size_t previous = heap->large_object_pages;
heap->large_object_pages = npages;
ASSERT(npages <= previous);
size_t bytes = (previous - npages) <<
heap_large_object_space(heap)->page_size_log2;
mark_space_reacquire_memory(heap_mark_space(heap), bytes);
}
static void mutator_mark_buf_grow(struct mutator_mark_buf *buf) {
size_t old_capacity = buf->capacity;
size_t old_bytes = old_capacity * sizeof(struct gcobj*);
size_t new_bytes = old_bytes ? old_bytes * 2 : getpagesize();
size_t new_capacity = new_bytes / sizeof(struct gcobj*);
void *mem = mmap(NULL, new_bytes, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("allocating mutator mark buffer failed");
abort();
}
if (old_bytes) {
memcpy(mem, buf->objects, old_bytes);
munmap(buf->objects, old_bytes);
}
buf->objects = mem;
buf->capacity = new_capacity;
}
static void mutator_mark_buf_push(struct mutator_mark_buf *buf,
struct gcobj *val) {
if (UNLIKELY(buf->size == buf->capacity))
mutator_mark_buf_grow(buf);
buf->objects[buf->size++] = val;
}
static void mutator_mark_buf_release(struct mutator_mark_buf *buf) {
size_t bytes = buf->size * sizeof(struct gcobj*);
if (bytes >= getpagesize())
madvise(buf->objects, align_up(bytes, getpagesize()), MADV_DONTNEED);
buf->size = 0;
}
static void mutator_mark_buf_destroy(struct mutator_mark_buf *buf) {
size_t bytes = buf->capacity * sizeof(struct gcobj*);
if (bytes)
munmap(buf->objects, bytes);
}
// Mark the roots of a mutator that is stopping for GC. We can't
// enqueue them directly, so we send them to the controller in a buffer.
static void mark_stopping_mutator_roots(struct mutator *mut) {
struct heap *heap = mutator_heap(mut);
struct mutator_mark_buf *local_roots = &mut->mark_buf;
for (struct handle *h = mut->roots; h; h = h->next) {
struct gc_edge root = gc_edge(&h->v);
if (trace_edge(heap, root))
mutator_mark_buf_push(local_roots, dereference_edge(root));
}
// Post to global linked-list of thread roots.
struct mutator_mark_buf *next =
atomic_load_explicit(&heap->mutator_roots, memory_order_acquire);
do {
local_roots->next = next;
} while (!atomic_compare_exchange_weak(&heap->mutator_roots,
&next, local_roots));
}
// Mark the roots of the mutator that causes GC.
static void mark_controlling_mutator_roots(struct mutator *mut) {
struct heap *heap = mutator_heap(mut);
for (struct handle *h = mut->roots; h; h = h->next) {
struct gc_edge root = gc_edge(&h->v);
if (trace_edge(heap, root))
tracer_enqueue_root(&heap->tracer, dereference_edge(root));
}
}
static void release_stopping_mutator_roots(struct mutator *mut) {
mutator_mark_buf_release(&mut->mark_buf);
}
static void wait_for_mutators_to_stop(struct heap *heap) {
heap->active_mutator_count--;
while (heap->active_mutator_count)
pthread_cond_wait(&heap->collector_cond, &heap->lock);
}
static void finish_sweeping(struct mutator *mut);
static void finish_sweeping_in_block(struct mutator *mut);
static void mark_inactive_mutators(struct heap *heap) {
for (struct mutator *mut = heap->deactivated_mutators; mut; mut = mut->next) {
finish_sweeping_in_block(mut);
mark_controlling_mutator_roots(mut);
}
}
static void mark_global_roots(struct heap *heap) {
for (struct handle *h = heap->global_roots; h; h = h->next) {
struct gc_edge edge = gc_edge(&h->v);
if (trace_edge(heap, edge))
tracer_enqueue_root(&heap->tracer, dereference_edge(edge));
}
struct mutator_mark_buf *roots = atomic_load(&heap->mutator_roots);
for (; roots; roots = roots->next)
tracer_enqueue_roots(&heap->tracer, roots->objects, roots->size);
atomic_store(&heap->mutator_roots, NULL);
}
static void pause_mutator_for_collection(struct heap *heap) NEVER_INLINE;
static void pause_mutator_for_collection(struct heap *heap) {
ASSERT(mutators_are_stopping(heap));
ASSERT(heap->active_mutator_count);
heap->active_mutator_count--;
if (heap->active_mutator_count == 0)
pthread_cond_signal(&heap->collector_cond);
// Go to sleep and wake up when the collector is done. Note,
// however, that it may be that some other mutator manages to
// trigger collection before we wake up. In that case we need to
// mark roots, not just sleep again. To detect a wakeup on this
// collection vs a future collection, we use the global GC count.
// This is safe because the count is protected by the heap lock,
// which we hold.
long epoch = heap->count;
do
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
while (mutators_are_stopping(heap) && heap->count == epoch);
heap->active_mutator_count++;
}
static void pause_mutator_for_collection_with_lock(struct mutator *mut) NEVER_INLINE;
static void pause_mutator_for_collection_with_lock(struct mutator *mut) {
struct heap *heap = mutator_heap(mut);
ASSERT(mutators_are_stopping(heap));
finish_sweeping_in_block(mut);
mark_controlling_mutator_roots(mut);
pause_mutator_for_collection(heap);
}
static void pause_mutator_for_collection_without_lock(struct mutator *mut) NEVER_INLINE;
static void pause_mutator_for_collection_without_lock(struct mutator *mut) {
struct heap *heap = mutator_heap(mut);
ASSERT(mutators_are_stopping(heap));
finish_sweeping(mut);
mark_stopping_mutator_roots(mut);
heap_lock(heap);
pause_mutator_for_collection(heap);
heap_unlock(heap);
release_stopping_mutator_roots(mut);
}
static inline void maybe_pause_mutator_for_collection(struct mutator *mut) {
while (mutators_are_stopping(mutator_heap(mut)))
pause_mutator_for_collection_without_lock(mut);
}
static void reset_sweeper(struct mark_space *space) {
space->next_block = (uintptr_t) &space->slabs[0].blocks;
}
static uint64_t broadcast_byte(uint8_t byte) {
uint64_t result = byte;
return result * 0x0101010101010101ULL;
}
static void rotate_mark_bytes(struct mark_space *space) {
space->live_mask = rotate_dead_survivor_marked(space->live_mask);
space->marked_mask = rotate_dead_survivor_marked(space->marked_mask);
space->sweep_mask = broadcast_byte(space->live_mask);
}
static void reset_statistics(struct mark_space *space) {
space->granules_freed_by_last_collection = 0;
space->fragmentation_granules_since_last_collection = 0;
}
static void collect(struct mutator *mut) {
struct heap *heap = mutator_heap(mut);
struct mark_space *space = heap_mark_space(heap);
struct large_object_space *lospace = heap_large_object_space(heap);
DEBUG("start collect #%ld:\n", heap->count);
large_object_space_start_gc(lospace);
tracer_prepare(heap);
request_mutators_to_stop(heap);
mark_controlling_mutator_roots(mut);
finish_sweeping(mut);
wait_for_mutators_to_stop(heap);
double yield = space->granules_freed_by_last_collection * GRANULE_SIZE;
double fragmentation = space->fragmentation_granules_since_last_collection * GRANULE_SIZE;
yield /= SLAB_SIZE * space->nslabs;
fragmentation /= SLAB_SIZE * space->nslabs;
fprintf(stderr, "last gc yield: %f; fragmentation: %f\n", yield, fragmentation);
mark_inactive_mutators(heap);
mark_global_roots(heap);
tracer_trace(heap);
tracer_release(heap);
reset_sweeper(space);
rotate_mark_bytes(space);
heap->count++;
reset_statistics(space);
large_object_space_finish_gc(lospace);
heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection);
allow_mutators_to_continue(heap);
DEBUG("collect done\n");
}
static size_t mark_space_live_object_granules(uint8_t *metadata) {
size_t n = 0;
while ((metadata[n] & METADATA_BYTE_END) == 0)
n++;
return n + 1;
}
static int sweep_byte(uint8_t *loc, uintptr_t sweep_mask) {
uint8_t metadata = atomic_load_explicit(loc, memory_order_relaxed);
// If the metadata byte is nonzero, that means either a young, dead,
// survived, or marked object. If it's live (young, survived, or
// marked), we found the next mark. Otherwise it's dead and we clear
// the byte. If we see an END, that means an end of a dead object;
// clear it.
if (metadata) {
if (metadata & sweep_mask)
return 1;
atomic_store_explicit(loc, 0, memory_order_relaxed);
}
return 0;
}
static int sweep_word(uintptr_t *loc, uintptr_t sweep_mask) {
uintptr_t metadata = atomic_load_explicit(loc, memory_order_relaxed);
if (metadata) {
if (metadata & sweep_mask)
return 1;
atomic_store_explicit(loc, 0, memory_order_relaxed);
}
return 0;
}
static inline uint64_t load_mark_bytes(uint8_t *mark) {
ASSERT(((uintptr_t)mark & 7) == 0);
uint8_t * __attribute__((aligned(8))) aligned_mark = mark;
uint64_t word;
memcpy(&word, aligned_mark, 8);
#ifdef WORDS_BIGENDIAN
word = __builtin_bswap64(word);
#endif
return word;
}
static inline size_t count_zero_bytes(uint64_t bytes) {
return bytes ? (__builtin_ctz(bytes) / 8) : sizeof(bytes);
}
static size_t next_mark(uint8_t *mark, size_t limit, uint64_t sweep_mask) {
size_t n = 0;
// If we have a hole, it is likely to be more that 8 granules long.
// Assuming that it's better to make aligned loads, first we align the
// sweep pointer, then we load aligned mark words.
size_t unaligned = ((uintptr_t) mark) & 7;
if (unaligned) {
uint64_t bytes = load_mark_bytes(mark - unaligned) >> (unaligned * 8);
bytes &= sweep_mask;
if (bytes)
return count_zero_bytes(bytes);
n += 8 - unaligned;
}
for(; n < limit; n += 8) {
uint64_t bytes = load_mark_bytes(mark + n);
bytes &= sweep_mask;
if (bytes)
return n + count_zero_bytes(bytes);
}
return limit;
}
static uintptr_t mark_space_next_block_to_sweep(struct mark_space *space) {
uintptr_t block = atomic_load_explicit(&space->next_block,
memory_order_acquire);
uintptr_t next_block;
do {
if (block == 0)
return 0;
next_block = block + BLOCK_SIZE;
if (next_block % SLAB_SIZE == 0) {
uintptr_t hi_addr = space->low_addr + space->extent;
if (next_block == hi_addr)
next_block = 0;
else
next_block += META_BLOCKS_PER_SLAB * BLOCK_SIZE;
}
} while (!atomic_compare_exchange_weak(&space->next_block, &block,
next_block));
return block;
}
static void finish_block(struct mutator *mut) {
ASSERT(mut->block);
struct block_summary *block = block_summary_for_addr(mut->block);
struct mark_space *space = heap_mark_space(mutator_heap(mut));
atomic_fetch_add(&space->granules_freed_by_last_collection,
block->free_granules);
atomic_fetch_add(&space->fragmentation_granules_since_last_collection,
block->fragmentation_granules);
mut->block = mut->alloc = mut->sweep = 0;
}
// Sweep some heap to reclaim free space, resetting mut->alloc and
// mut->sweep. Return the size of the hole in granules.
static size_t next_hole_in_block(struct mutator *mut) {
uintptr_t sweep = mut->sweep;
if (sweep == 0)
return 0;
uintptr_t limit = mut->block + BLOCK_SIZE;
uintptr_t sweep_mask = heap_mark_space(mutator_heap(mut))->sweep_mask;
while (sweep != limit) {
ASSERT((sweep & (GRANULE_SIZE - 1)) == 0);
uint8_t* metadata = object_metadata_byte((struct gcobj*)sweep);
size_t limit_granules = (limit - sweep) >> GRANULE_SIZE_LOG_2;
// Except for when we first get a block, mut->sweep is positioned
// right after a hole, which can point to either the end of the
// block or to a live object. Assume that a live object is more
// common.
{
size_t live_granules = 0;
while (limit_granules && (metadata[0] & sweep_mask)) {
// Object survived collection; skip over it and continue sweeping.
size_t object_granules = mark_space_live_object_granules(metadata);
live_granules += object_granules;
limit_granules -= object_granules;
metadata += object_granules;
}
if (!limit_granules)
break;
sweep += live_granules * GRANULE_SIZE;
}
size_t free_granules = next_mark(metadata, limit_granules, sweep_mask);
ASSERT(free_granules);
ASSERT(free_granules <= limit_granules);
struct block_summary *summary = block_summary_for_addr(sweep);
summary->hole_count++;
summary->free_granules += free_granules;
size_t free_bytes = free_granules * GRANULE_SIZE;
mut->alloc = sweep;
mut->sweep = sweep + free_bytes;
return free_granules;
}
finish_block(mut);
return 0;
}
static void finish_hole(struct mutator *mut) {
size_t granules = (mut->sweep - mut->alloc) / GRANULE_SIZE;
if (granules) {
struct block_summary *summary = block_summary_for_addr(mut->block);
summary->holes_with_fragmentation++;
summary->fragmentation_granules += granules;
uint8_t *metadata = object_metadata_byte((void*)mut->alloc);
memset(metadata, 0, granules);
mut->alloc = mut->sweep;
}
// FIXME: add to fragmentation
}
static int maybe_release_swept_empty_block(struct mutator *mut) {
ASSERT(mut->block);
struct mark_space *space = heap_mark_space(mutator_heap(mut));
uintptr_t block = mut->block;
if (atomic_load_explicit(&space->pending_unavailable_bytes,
memory_order_acquire) <= 0)
return 0;
block_summary_clear_flag(block_summary_for_addr(block), BLOCK_NEEDS_SWEEP);
push_unavailable_block(space, block);
atomic_fetch_sub(&space->pending_unavailable_bytes, BLOCK_SIZE);
mut->alloc = mut->sweep = mut->block = 0;
return 1;
}
static size_t next_hole(struct mutator *mut) {
finish_hole(mut);
// As we sweep if we find that a block is empty, we return it to the
// empties list. Empties are precious. But if we return 10 blocks in
// a row, and still find an 11th empty, go ahead and use it.
size_t empties_countdown = 10;
struct mark_space *space = heap_mark_space(mutator_heap(mut));
while (1) {
// Sweep current block for a hole.
size_t granules = next_hole_in_block(mut);
if (granules) {
// If the hole spans only part of a block, give it to the mutator.
if (granules < GRANULES_PER_BLOCK)
return granules;
// Sweeping found a completely empty block. If we have pending
// pages to release to the OS, we should unmap this block.
if (maybe_release_swept_empty_block(mut))
continue;
// Otherwise if we've already returned lots of empty blocks to the
// freelist, give this block to the mutator.
if (!empties_countdown)
return granules;
// Otherwise we push to the empty blocks list.
struct block_summary *summary = block_summary_for_addr(mut->block);
block_summary_clear_flag(summary, BLOCK_NEEDS_SWEEP);
push_empty_block(space, mut->block);
mut->alloc = mut->sweep = mut->block = 0;
empties_countdown--;
}
ASSERT(mut->block == 0);
while (1) {
uintptr_t block = mark_space_next_block_to_sweep(space);
if (block) {
// Sweeping found a block. We might take it for allocation, or
// we might send it back.
struct block_summary *summary = block_summary_for_addr(block);
// If it's marked unavailable, it's already on a list of
// unavailable blocks, so skip and get the next block.
if (block_summary_has_flag(summary, BLOCK_UNAVAILABLE))
continue;
if (block_summary_has_flag(summary, BLOCK_NEEDS_SWEEP)) {
// This block was marked in the last GC and needs sweeping.
// As we sweep we'll want to record how many bytes were live
// at the last collection. As we allocate we'll record how
// many granules were wasted because of fragmentation.
summary->hole_count = 0;
summary->free_granules = 0;
summary->holes_with_fragmentation = 0;
summary->fragmentation_granules = 0;
// Prepare to sweep the block for holes.
mut->alloc = mut->sweep = mut->block = block;
break;
} else {
// Otherwise this block is completely empty and is on the
// empties list. We take from the empties list only after all
// the NEEDS_SWEEP blocks are processed.
continue;
}
} else {
// We are done sweeping for blocks. Now take from the empties
// list.
block = pop_empty_block(space);
// No empty block? Return 0 to cause collection.
if (!block)
return 0;
// Otherwise return the block to the mutator.
struct block_summary *summary = block_summary_for_addr(block);
block_summary_set_flag(summary, BLOCK_NEEDS_SWEEP);
summary->hole_count = 1;
summary->free_granules = GRANULES_PER_BLOCK;
summary->holes_with_fragmentation = 0;
summary->fragmentation_granules = 0;
mut->block = block;
mut->alloc = block;
mut->sweep = block + BLOCK_SIZE;
return GRANULES_PER_BLOCK;
}
}
}
}
static void finish_sweeping_in_block(struct mutator *mut) {
while (next_hole_in_block(mut))
finish_hole(mut);
}
// Another thread is triggering GC. Before we stop, finish clearing the
// dead mark bytes for the mutator's block, and release the block.
static void finish_sweeping(struct mutator *mut) {
while (next_hole(mut))
finish_hole(mut);
}
static void out_of_memory(struct mutator *mut) {
struct heap *heap = mutator_heap(mut);
fprintf(stderr, "ran out of space, heap size %zu (%zu slabs)\n",
heap->size, heap_mark_space(heap)->nslabs);
abort();
}
static void* allocate_large(struct mutator *mut, enum alloc_kind kind,
size_t granules) {
struct heap *heap = mutator_heap(mut);
struct large_object_space *space = heap_large_object_space(heap);
size_t size = granules * GRANULE_SIZE;
size_t npages = large_object_space_npages(space, size);
mark_space_request_release_memory(heap_mark_space(heap),
npages << space->page_size_log2);
if (!sweep_until_memory_released(mut)) {
heap_lock(heap);
if (mutators_are_stopping(heap))
pause_mutator_for_collection_with_lock(mut);
else
collect(mut);
heap_unlock(heap);
if (!sweep_until_memory_released(mut))
out_of_memory(mut);
}
atomic_fetch_add(&heap->large_object_pages, npages);
void *ret = large_object_space_alloc(space, npages);
if (!ret)
ret = large_object_space_obtain_and_alloc(space, npages);
if (!ret) {
perror("weird: we have the space but mmap didn't work");
abort();
}
*(uintptr_t*)ret = kind;
return ret;
}
static void* allocate_small_slow(struct mutator *mut, enum alloc_kind kind,
size_t granules) NEVER_INLINE;
static void* allocate_small_slow(struct mutator *mut, enum alloc_kind kind,
size_t granules) {
int swept_from_beginning = 0;
while (1) {
size_t hole = next_hole(mut);
if (hole >= granules) {
clear_memory(mut->alloc, hole * GRANULE_SIZE);
break;
}
if (!hole) {
struct heap *heap = mutator_heap(mut);
if (swept_from_beginning) {
out_of_memory(mut);
} else {
heap_lock(heap);
if (mutators_are_stopping(heap))
pause_mutator_for_collection_with_lock(mut);
else
collect(mut);
heap_unlock(heap);
swept_from_beginning = 1;
}
}
}
struct gcobj* ret = (struct gcobj*)mut->alloc;
mut->alloc += granules * GRANULE_SIZE;
return ret;
}
static inline void* allocate_small(struct mutator *mut, enum alloc_kind kind,
size_t granules) {
ASSERT(granules > 0); // allocating 0 granules would be silly
uintptr_t alloc = mut->alloc;
uintptr_t sweep = mut->sweep;
uintptr_t new_alloc = alloc + granules * GRANULE_SIZE;
struct gcobj *obj;
if (new_alloc <= sweep) {
mut->alloc = new_alloc;
obj = (struct gcobj *)alloc;
} else {
obj = allocate_small_slow(mut, kind, granules);
}
obj->tag = tag_live(kind);
uint8_t *metadata = object_metadata_byte(obj);
if (granules == 1) {
metadata[0] = METADATA_BYTE_YOUNG | METADATA_BYTE_END;
} else {
metadata[0] = METADATA_BYTE_YOUNG;
if (granules > 2)
memset(metadata + 1, 0, granules - 2);
metadata[granules - 1] = METADATA_BYTE_END;
}
return obj;
}
static inline void* allocate_medium(struct mutator *mut, enum alloc_kind kind,
size_t granules) {
return allocate_small(mut, kind, granules);
}
static inline void* allocate(struct mutator *mut, enum alloc_kind kind,
size_t size) {
size_t granules = size_to_granules(size);
if (granules <= MEDIUM_OBJECT_GRANULE_THRESHOLD)
return allocate_small(mut, kind, granules);
if (granules <= LARGE_OBJECT_GRANULE_THRESHOLD)
return allocate_medium(mut, kind, granules);
return allocate_large(mut, kind, granules);
}
static inline void* allocate_pointerless(struct mutator *mut,
enum alloc_kind kind,
size_t size) {
return allocate(mut, kind, size);
}
static inline void init_field(void **addr, void *val) {
*addr = val;
}
static inline void set_field(void **addr, void *val) {
*addr = val;
}
static inline void* get_field(void **addr) {
return *addr;
}
static struct slab* allocate_slabs(size_t nslabs) {
size_t size = nslabs * SLAB_SIZE;
size_t extent = size + SLAB_SIZE;
char *mem = mmap(NULL, extent, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("mmap failed");
return NULL;
}
uintptr_t base = (uintptr_t) mem;
uintptr_t end = base + extent;
uintptr_t aligned_base = align_up(base, SLAB_SIZE);
uintptr_t aligned_end = aligned_base + size;
if (aligned_base - base)
munmap((void*)base, aligned_base - base);
if (end - aligned_end)
munmap((void*)aligned_end, end - aligned_end);
return (struct slab*) aligned_base;
}
static int mark_space_init(struct mark_space *space, struct heap *heap) {
size_t size = align_up(heap->size, SLAB_SIZE);
size_t nslabs = size / SLAB_SIZE;
struct slab *slabs = allocate_slabs(nslabs);
if (!slabs)
return 0;
uint8_t dead = METADATA_BYTE_MARK_0;
uint8_t survived = METADATA_BYTE_MARK_1;
uint8_t marked = METADATA_BYTE_MARK_2;
space->marked_mask = marked;
space->live_mask = METADATA_BYTE_YOUNG | survived | marked;
rotate_mark_bytes(space);
space->slabs = slabs;
space->nslabs = nslabs;
space->low_addr = (uintptr_t) slabs;
space->extent = size;
space->next_block = 0;
for (size_t slab = 0; slab < nslabs; slab++) {
for (size_t block = 0; block < NONMETA_BLOCKS_PER_SLAB; block++) {
uintptr_t addr = (uintptr_t)slabs[slab].blocks[block].data;
if (size > heap->size) {
push_unavailable_block(space, addr);
size -= BLOCK_SIZE;
} else {
push_empty_block(space, addr);
}
}
}
return 1;
}
static int initialize_gc(size_t size, struct heap **heap,
struct mutator **mut) {
*heap = calloc(1, sizeof(struct heap));
if (!*heap) abort();
pthread_mutex_init(&(*heap)->lock, NULL);
pthread_cond_init(&(*heap)->mutator_cond, NULL);
pthread_cond_init(&(*heap)->collector_cond, NULL);
(*heap)->size = size;
if (!tracer_init(*heap))
abort();
struct mark_space *space = heap_mark_space(*heap);
if (!mark_space_init(space, *heap)) {
free(*heap);
*heap = NULL;
return 0;
}
if (!large_object_space_init(heap_large_object_space(*heap), *heap))
abort();
*mut = calloc(1, sizeof(struct mutator));
if (!*mut) abort();
add_mutator(*heap, *mut);
return 1;
}
static struct mutator* initialize_gc_for_thread(uintptr_t *stack_base,
struct heap *heap) {
struct mutator *ret = calloc(1, sizeof(struct mutator));
if (!ret)
abort();
add_mutator(heap, ret);
return ret;
}
static void finish_gc_for_thread(struct mutator *mut) {
remove_mutator(mutator_heap(mut), mut);
mutator_mark_buf_destroy(&mut->mark_buf);
free(mut);
}
static void deactivate_mutator(struct heap *heap, struct mutator *mut) {
ASSERT(mut->next == NULL);
heap_lock(heap);
mut->next = heap->deactivated_mutators;
heap->deactivated_mutators = mut;
heap->active_mutator_count--;
if (!heap->active_mutator_count && mutators_are_stopping(heap))
pthread_cond_signal(&heap->collector_cond);
heap_unlock(heap);
}
static void reactivate_mutator(struct heap *heap, struct mutator *mut) {
heap_lock(heap);
while (mutators_are_stopping(heap))
pthread_cond_wait(&heap->mutator_cond, &heap->lock);
struct mutator **prev = &heap->deactivated_mutators;
while (*prev != mut)
prev = &(*prev)->next;
*prev = mut->next;
mut->next = NULL;
heap->active_mutator_count++;
heap_unlock(heap);
}
static void* call_without_gc(struct mutator *mut, void* (*f)(void*),
void *data) NEVER_INLINE;
static void* call_without_gc(struct mutator *mut,
void* (*f)(void*),
void *data) {
struct heap *heap = mutator_heap(mut);
deactivate_mutator(heap, mut);
void *ret = f(data);
reactivate_mutator(heap, mut);
return ret;
}
static inline void print_start_gc_stats(struct heap *heap) {
}
static inline void print_end_gc_stats(struct heap *heap) {
printf("Completed %ld collections\n", heap->count);
printf("Heap size with overhead is %zd (%zu slabs)\n",
heap->size, heap_mark_space(heap)->nslabs);
}