1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Switch mmc to field-logging write barrier

Instead of the card table, use metadata bytes for field-logging.  More
precision should lead to less work during the pause.
This commit is contained in:
Andy Wingo 2024-10-02 21:25:09 +02:00
parent 1493bf6398
commit 1ecb45a437
6 changed files with 385 additions and 182 deletions

View file

@ -56,7 +56,7 @@ static inline uint8_t gc_old_generation_check_alloc_table_bit_pattern(void) {
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) { static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
if (GC_GENERATIONAL) { if (GC_GENERATIONAL) {
if (obj_size <= gc_allocator_large_threshold()) if (obj_size <= gc_allocator_large_threshold())
return GC_WRITE_BARRIER_CARD; return GC_WRITE_BARRIER_FIELD;
return GC_WRITE_BARRIER_SLOW; return GC_WRITE_BARRIER_SLOW;
} }
return GC_WRITE_BARRIER_NONE; return GC_WRITE_BARRIER_NONE;
@ -71,7 +71,7 @@ static inline size_t gc_write_barrier_card_size(void) {
} }
static inline size_t gc_write_barrier_field_table_alignment(void) { static inline size_t gc_write_barrier_field_table_alignment(void) {
GC_ASSERT(GC_GENERATIONAL); GC_ASSERT(GC_GENERATIONAL);
return 4 * 1024 * 1024; return gc_allocator_alloc_table_alignment();
} }
static inline size_t gc_write_barrier_field_fields_per_byte(void) { static inline size_t gc_write_barrier_field_fields_per_byte(void) {
GC_ASSERT(GC_GENERATIONAL); GC_ASSERT(GC_GENERATIONAL);

205
src/field-set.h Normal file
View file

@ -0,0 +1,205 @@
#ifndef FIELD_SET_H
#define FIELD_SET_H
#include <pthread.h>
#include <stdatomic.h>
#include <stdlib.h>
#include "assert.h"
#include "gc-edge.h"
#include "gc-lock.h"
#include "tracer.h"
#define GC_EDGE_BUFFER_CAPACITY 510
struct gc_edge_buffer {
struct gc_edge_buffer *next;
size_t size;
struct gc_edge edges[GC_EDGE_BUFFER_CAPACITY];
};
// Lock-free.
struct gc_edge_buffer_list {
struct gc_edge_buffer *head;
};
// With a lock.
struct gc_edge_buffer_stack {
struct gc_edge_buffer_list list;
};
struct gc_field_set {
struct gc_edge_buffer_list full;
struct gc_edge_buffer_stack partly_full;
struct gc_edge_buffer_list empty;
size_t count;
pthread_mutex_t lock;
};
struct gc_field_set_writer {
struct gc_edge_buffer *buf;
struct gc_field_set *set;
};
static void
gc_edge_buffer_list_push(struct gc_edge_buffer_list *list,
struct gc_edge_buffer *buf) {
struct gc_edge_buffer *next =
atomic_load_explicit(&list->head, memory_order_relaxed);
do {
buf->next = next;
} while (!atomic_compare_exchange_weak_explicit(&list->head, &next, buf,
memory_order_acq_rel,
memory_order_acquire));
}
static struct gc_edge_buffer*
gc_edge_buffer_list_pop(struct gc_edge_buffer_list *list) {
struct gc_edge_buffer *head =
atomic_load_explicit(&list->head, memory_order_acquire);
struct gc_edge_buffer *next;
do {
if (!head) return NULL;
next = head->next;
} while (!atomic_compare_exchange_weak_explicit(&list->head, &head, next,
memory_order_acq_rel,
memory_order_acquire));
head->next = NULL;
return head;
}
static void
gc_edge_buffer_stack_push(struct gc_edge_buffer_stack *stack,
struct gc_edge_buffer *buf,
const struct gc_lock *lock) {
buf->next = stack->list.head;
stack->list.head = buf;
}
static struct gc_edge_buffer*
gc_edge_buffer_stack_pop(struct gc_edge_buffer_stack *stack,
const struct gc_lock *lock) {
struct gc_edge_buffer *head = stack->list.head;
if (head) {
stack->list.head = head->next;
head->next = NULL;
}
return head;
}
static void
gc_field_set_init(struct gc_field_set *set) {
memset(set, 0, sizeof(*set));
pthread_mutex_init(&set->lock, NULL);
}
static struct gc_edge_buffer*
gc_field_set_acquire_buffer(struct gc_field_set *set) {
struct gc_edge_buffer *ret;
ret = gc_edge_buffer_list_pop(&set->empty);
if (ret) return ret;
struct gc_lock lock = gc_lock_acquire(&set->lock);
ret = gc_edge_buffer_stack_pop(&set->partly_full, &lock);
gc_lock_release(&lock);
if (ret) return ret;
// atomic inc count
ret = malloc(sizeof(*ret));
if (!ret) {
perror("Failed to allocate remembered set");
GC_CRASH();
}
memset(ret, 0, sizeof(*ret));
return ret;
}
static void
gc_field_set_release_buffer(struct gc_field_set *set,
struct gc_edge_buffer *buf) {
if (buf->size == GC_EDGE_BUFFER_CAPACITY) {
gc_edge_buffer_list_push(&set->full, buf);
} else {
struct gc_lock lock = gc_lock_acquire(&set->lock);
gc_edge_buffer_stack_push(&set->partly_full, buf, &lock);
gc_lock_release(&lock);
}
}
static void
gc_field_set_add_roots(struct gc_field_set *set, struct gc_tracer *tracer) {
struct gc_edge_buffer *buf;
for (buf = set->partly_full.list.head; buf; buf = buf->next)
gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
for (buf = set->full.head; buf; buf = buf->next)
gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
}
static void
gc_field_set_clear(struct gc_field_set *set,
void (*forget_edge)(struct gc_edge, struct gc_heap*),
struct gc_heap *heap) {
struct gc_edge_buffer *partly_full = set->partly_full.list.head;
struct gc_edge_buffer *full = set->full.head;
// Clear the full and partly full sets now so that if a collector
// wanted to it could re-add an edge to the remembered set.
set->partly_full.list.head = NULL;
set->full.head = NULL;
struct gc_edge_buffer *buf;
for (buf = partly_full; buf; buf = buf->next) {
for (size_t i = 0; i < buf->size; i++)
forget_edge(buf->edges[i], heap);
buf->size = 0;
gc_edge_buffer_list_push(&set->empty, buf);
}
for (buf = full; buf; buf = buf->next) {
for (size_t i = 0; i < buf->size; i++)
forget_edge(buf->edges[i], heap);
buf->size = 0;
gc_edge_buffer_list_push(&set->empty, buf);
}
}
static inline void
gc_field_set_trace_edge_buffer(struct gc_field_set *set,
struct gc_edge_buffer *buf,
void (*tracer_visit)(struct gc_edge,
struct gc_heap*,
void *data),
struct gc_heap *heap,
struct gc_trace_worker *worker) {
for (size_t i = 0; i < buf->size; i++)
tracer_visit(buf->edges[i], heap, worker);
}
static void
gc_field_set_writer_release_buffer(struct gc_field_set_writer *writer) {
if (writer->buf) {
gc_field_set_release_buffer(writer->set, writer->buf);
writer->buf = NULL;
}
}
static void
gc_field_set_writer_init(struct gc_field_set_writer *writer,
struct gc_field_set *set) {
writer->set = set;
writer->buf = NULL;
}
static void
gc_field_set_writer_add_edge(struct gc_field_set_writer *writer,
struct gc_edge edge) {
struct gc_edge_buffer *buf = writer->buf;
if (GC_UNLIKELY(!buf))
writer->buf = buf = gc_field_set_acquire_buffer(writer->set);
GC_ASSERT(buf->size < GC_EDGE_BUFFER_CAPACITY);
buf->edges[buf->size++] = edge;
if (GC_UNLIKELY(buf->size == GC_EDGE_BUFFER_CAPACITY)) {
gc_edge_buffer_list_push(&writer->set->full, buf);
writer->buf = NULL;
}
}
#endif // FIELD_SET_H

View file

@ -36,7 +36,7 @@ struct large_object_space {
struct address_set from_space; struct address_set from_space;
struct address_set to_space; struct address_set to_space;
struct address_set survivor_space; struct address_set survivor_space;
struct address_set remembered_set; struct address_set remembered_edges;
struct address_set free_space; struct address_set free_space;
struct address_map object_pages; // for each object: size in pages. struct address_map object_pages; // for each object: size in pages.
struct address_map predecessors; // subsequent addr -> object addr struct address_map predecessors; // subsequent addr -> object addr
@ -50,7 +50,7 @@ static int large_object_space_init(struct large_object_space *space,
address_set_init(&space->from_space); address_set_init(&space->from_space);
address_set_init(&space->to_space); address_set_init(&space->to_space);
address_set_init(&space->survivor_space); address_set_init(&space->survivor_space);
address_set_init(&space->remembered_set); address_set_init(&space->remembered_edges);
address_set_init(&space->free_space); address_set_init(&space->free_space);
address_map_init(&space->object_pages); address_map_init(&space->object_pages);
address_map_init(&space->predecessors); address_map_init(&space->predecessors);
@ -77,49 +77,6 @@ static inline int large_object_space_contains(struct large_object_space *space,
return ret; return ret;
} }
struct large_object_space_trace_remembered_data {
void (*trace)(struct gc_ref, struct gc_heap*);
struct gc_heap *heap;
};
static void large_object_space_trace_one_remembered(uintptr_t addr,
void *data) {
struct gc_ref ref = gc_ref(addr);
gc_object_clear_remembered_nonatomic(ref);
struct large_object_space_trace_remembered_data *vdata = data;
vdata->trace(ref, vdata->heap);
}
static void
large_object_space_clear_remembered_set(struct large_object_space *space) {
address_set_clear(&space->remembered_set);
}
static void
large_object_space_trace_remembered_set(struct large_object_space *space,
void (*trace)(struct gc_ref,
struct gc_heap*),
struct gc_heap *heap) {
struct large_object_space_trace_remembered_data vdata = { trace, heap };
if (!GC_GENERATIONAL)
return;
address_set_for_each(&space->remembered_set,
large_object_space_trace_one_remembered, &vdata);
large_object_space_clear_remembered_set(space);
}
static void
large_object_space_remember_object(struct large_object_space *space,
struct gc_ref ref) {
GC_ASSERT(GC_GENERATIONAL);
uintptr_t addr = gc_ref_value(ref);
pthread_mutex_lock(&space->lock);
GC_ASSERT(!address_set_contains(&space->remembered_set, addr));
address_set_add(&space->remembered_set, addr);
pthread_mutex_unlock(&space->lock);
}
static void large_object_space_flip_survivor(uintptr_t addr, static void large_object_space_flip_survivor(uintptr_t addr,
void *data) { void *data) {
struct large_object_space *space = data; struct large_object_space *space = data;
@ -176,17 +133,41 @@ static int large_object_space_is_copied(struct large_object_space *space,
return copied; return copied;
} }
static int
large_object_space_is_survivor_with_lock(struct large_object_space *space,
struct gc_ref ref) {
return address_set_contains(&space->survivor_space, gc_ref_value(ref));
}
static int large_object_space_is_survivor(struct large_object_space *space, static int large_object_space_is_survivor(struct large_object_space *space,
struct gc_ref ref) { struct gc_ref ref) {
GC_ASSERT(large_object_space_contains(space, ref)); GC_ASSERT(large_object_space_contains(space, ref));
int old = 0;
uintptr_t addr = gc_ref_value(ref);
pthread_mutex_lock(&space->lock); pthread_mutex_lock(&space->lock);
old = address_set_contains(&space->survivor_space, addr); int old = large_object_space_is_survivor_with_lock(space, ref);
pthread_mutex_unlock(&space->lock); pthread_mutex_unlock(&space->lock);
return old; return old;
} }
static int large_object_space_remember_edge(struct large_object_space *space,
struct gc_ref obj,
struct gc_edge edge) {
int remembered = 0;
uintptr_t edge_addr = (uintptr_t)gc_edge_loc(edge);
pthread_mutex_lock(&space->lock);
if (large_object_space_is_survivor_with_lock(space, obj)
&& !address_set_contains(&space->remembered_edges, edge_addr)) {
address_set_add(&space->remembered_edges, edge_addr);
remembered = 1;
}
pthread_mutex_unlock(&space->lock);
return remembered;
}
static void
large_object_space_clear_remembered_edges(struct large_object_space *space) {
address_set_clear(&space->remembered_edges);
}
static int large_object_space_mark_object(struct large_object_space *space, static int large_object_space_mark_object(struct large_object_space *space,
struct gc_ref ref) { struct gc_ref ref) {
return large_object_space_copy(space, ref); return large_object_space_copy(space, ref);

View file

@ -11,6 +11,7 @@
#include "background-thread.h" #include "background-thread.h"
#include "debug.h" #include "debug.h"
#include "field-set.h"
#include "gc-align.h" #include "gc-align.h"
#include "gc-inline.h" #include "gc-inline.h"
#include "gc-platform.h" #include "gc-platform.h"
@ -33,6 +34,7 @@ struct gc_heap {
struct nofl_space nofl_space; struct nofl_space nofl_space;
struct large_object_space large_object_space; struct large_object_space large_object_space;
struct gc_extern_space *extern_space; struct gc_extern_space *extern_space;
struct gc_field_set remembered_set;
size_t large_object_pages; size_t large_object_pages;
pthread_mutex_t lock; pthread_mutex_t lock;
pthread_cond_t collector_cond; pthread_cond_t collector_cond;
@ -72,6 +74,7 @@ struct gc_heap {
struct gc_mutator { struct gc_mutator {
struct nofl_allocator allocator; struct nofl_allocator allocator;
struct gc_field_set_writer logger;
struct gc_heap *heap; struct gc_heap *heap;
struct gc_stack stack; struct gc_stack stack;
struct gc_mutator_roots *roots; struct gc_mutator_roots *roots;
@ -188,6 +191,7 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
mut->event_listener_data = mut->event_listener_data =
heap->event_listener.mutator_added(heap->event_listener_data); heap->event_listener.mutator_added(heap->event_listener_data);
nofl_allocator_reset(&mut->allocator); nofl_allocator_reset(&mut->allocator);
gc_field_set_writer_init(&mut->logger, &heap->remembered_set);
heap_lock(heap); heap_lock(heap);
// We have no roots. If there is a GC currently in progress, we have // We have no roots. If there is a GC currently in progress, we have
// nothing to add. Just wait until it's done. // nothing to add. Just wait until it's done.
@ -207,6 +211,8 @@ add_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
static void static void
remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
if (GC_GENERATIONAL)
gc_field_set_writer_release_buffer(&mut->logger);
MUTATOR_EVENT(mut, mutator_removed); MUTATOR_EVENT(mut, mutator_removed);
mut->heap = NULL; mut->heap = NULL;
heap_lock(heap); heap_lock(heap);
@ -360,12 +366,9 @@ trace_root(struct gc_root root, struct gc_heap *heap,
case GC_ROOT_KIND_EDGE: case GC_ROOT_KIND_EDGE:
tracer_visit(root.edge, heap, worker); tracer_visit(root.edge, heap, worker);
break; break;
case GC_ROOT_KIND_REMEMBERED_OBJECT: case GC_ROOT_KIND_EDGE_BUFFER:
trace_one(root.ref, heap, worker); gc_field_set_trace_edge_buffer(&heap->remembered_set, root.edge_buffer,
break; tracer_visit, heap, worker);
case GC_ROOT_KIND_REMEMBERED_SLAB:
nofl_space_trace_remembered_slab(heap_nofl_space(heap), root.idx,
trace_one, heap, worker);
break; break;
default: default:
GC_CRASH(); GC_CRASH();
@ -633,25 +636,27 @@ enqueue_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) {
gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); gc_tracer_add_root(&heap->tracer, gc_root_edge(edge));
} }
static void
enqueue_remembered_object(struct gc_ref ref, struct gc_heap *heap) {
gc_tracer_add_root(&heap->tracer, gc_root_remembered_object(ref));
}
static void static void
enqueue_generational_roots(struct gc_heap *heap, enqueue_generational_roots(struct gc_heap *heap,
enum gc_collection_kind gc_kind) { enum gc_collection_kind gc_kind) {
if (!GC_GENERATIONAL) return; if (!GC_GENERATIONAL) return;
if (gc_kind == GC_COLLECTION_MINOR) { if (gc_kind == GC_COLLECTION_MINOR)
for (size_t i = 0; i < heap_nofl_space(heap)->nslabs; i++) gc_field_set_add_roots(&heap->remembered_set, &heap->tracer);
gc_tracer_add_root(&heap->tracer, gc_root_remembered_slab(i)); }
large_object_space_trace_remembered_set(heap_large_object_space(heap),
enqueue_remembered_object, static inline void
heap); forget_remembered_edge(struct gc_edge edge, struct gc_heap *heap) {
} else { struct nofl_space *space = heap_nofl_space(heap);
nofl_space_clear_remembered_set(heap_nofl_space(heap)); if (nofl_space_contains_edge(space, edge))
large_object_space_clear_remembered_set(heap_large_object_space(heap)); nofl_space_forget_edge(space, edge);
} // Otherwise the edge is in the lospace, whose remembered edges are
// cleared in bulk.
}
static void
clear_remembered_set(struct gc_heap *heap) {
gc_field_set_clear(&heap->remembered_set, forget_remembered_edge, heap);
large_object_space_clear_remembered_edges(heap_large_object_space(heap));
} }
static void static void
@ -768,6 +773,7 @@ collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind,
HEAP_EVENT(heap, finalizers_traced); HEAP_EVENT(heap, finalizers_traced);
sweep_ephemerons(heap); sweep_ephemerons(heap);
gc_tracer_release(&heap->tracer); gc_tracer_release(&heap->tracer);
clear_remembered_set(heap);
nofl_space_finish_gc(nofl_space, gc_kind); nofl_space_finish_gc(nofl_space, gc_kind);
large_object_space_finish_gc(lospace, is_minor); large_object_space_finish_gc(lospace, is_minor);
gc_extern_space_finish_gc(exspace, is_minor); gc_extern_space_finish_gc(exspace, is_minor);
@ -792,6 +798,8 @@ trigger_collection(struct gc_mutator *mut,
int prev_kind = -1; int prev_kind = -1;
gc_stack_capture_hot(&mut->stack); gc_stack_capture_hot(&mut->stack);
nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
if (GC_GENERATIONAL)
gc_field_set_writer_release_buffer(&mut->logger);
heap_lock(heap); heap_lock(heap);
while (mutators_are_stopping(heap)) while (mutators_are_stopping(heap))
prev_kind = pause_mutator_for_collection(heap, mut); prev_kind = pause_mutator_for_collection(heap, mut);
@ -815,6 +823,8 @@ gc_safepoint_slow(struct gc_mutator *mut) {
struct gc_heap *heap = mutator_heap(mut); struct gc_heap *heap = mutator_heap(mut);
gc_stack_capture_hot(&mut->stack); gc_stack_capture_hot(&mut->stack);
nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
if (GC_GENERATIONAL)
gc_field_set_writer_release_buffer(&mut->logger);
heap_lock(heap); heap_lock(heap);
while (mutators_are_stopping(mutator_heap(mut))) while (mutators_are_stopping(mutator_heap(mut)))
pause_mutator_for_collection(heap, mut); pause_mutator_for_collection(heap, mut);
@ -900,14 +910,16 @@ void
gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge, size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) { struct gc_ref new_val) {
GC_ASSERT(gc_ref_is_heap_object(new_val));
if (!GC_GENERATIONAL) return; if (!GC_GENERATIONAL) return;
GC_ASSERT(obj_size > gc_allocator_large_threshold()); if (gc_object_is_old_generation_slow(mut, new_val))
struct gc_heap *heap = mutator_heap(mut);
struct large_object_space *space = heap_large_object_space(heap);
if (!large_object_space_is_survivor(space, obj))
return; return;
if (gc_object_set_remembered(obj)) struct gc_heap *heap = mutator_heap(mut);
large_object_space_remember_object(space, obj); if ((obj_size <= gc_allocator_large_threshold())
? nofl_space_remember_edge(heap_nofl_space(heap), obj, edge)
: large_object_space_remember_edge(heap_large_object_space(heap),
obj, edge))
gc_field_set_writer_add_edge(&mut->logger, edge);
} }
struct gc_ephemeron* struct gc_ephemeron*
@ -1032,6 +1044,7 @@ static int
heap_init(struct gc_heap *heap, const struct gc_options *options) { heap_init(struct gc_heap *heap, const struct gc_options *options) {
// *heap is already initialized to 0. // *heap is already initialized to 0.
gc_field_set_init(&heap->remembered_set);
pthread_mutex_init(&heap->lock, NULL); pthread_mutex_init(&heap->lock, NULL);
pthread_cond_init(&heap->mutator_cond, NULL); pthread_cond_init(&heap->mutator_cond, NULL);
pthread_cond_init(&heap->collector_cond, NULL); pthread_cond_init(&heap->collector_cond, NULL);
@ -1080,9 +1093,11 @@ gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), NOFL_METADATA_BYTE_YOUNG); GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(), NOFL_METADATA_BYTE_YOUNG);
GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END); GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END);
if (GC_GENERATIONAL) { if (GC_GENERATIONAL) {
GC_ASSERT_EQ(gc_write_barrier_card_table_alignment(), NOFL_SLAB_SIZE); GC_ASSERT_EQ(gc_write_barrier_field_table_alignment(), NOFL_SLAB_SIZE);
GC_ASSERT_EQ(gc_write_barrier_card_size(), GC_ASSERT_EQ(gc_write_barrier_field_fields_per_byte(),
NOFL_BLOCK_SIZE / NOFL_REMSET_BYTES_PER_BLOCK); NOFL_GRANULE_SIZE / sizeof(uintptr_t));
GC_ASSERT_EQ(gc_write_barrier_field_first_bit_pattern(),
NOFL_METADATA_BYTE_LOGGED_0);
} }
*heap = calloc(1, sizeof(struct gc_heap)); *heap = calloc(1, sizeof(struct gc_heap));
@ -1139,6 +1154,8 @@ static void
deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) {
GC_ASSERT(mut->next == NULL); GC_ASSERT(mut->next == NULL);
nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap));
if (GC_GENERATIONAL)
gc_field_set_writer_release_buffer(&mut->logger);
heap_lock(heap); heap_lock(heap);
heap->inactive_mutator_count++; heap->inactive_mutator_count++;
gc_stack_capture_hot(&mut->stack); gc_stack_capture_hot(&mut->stack);

View file

@ -45,10 +45,10 @@ STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD,
#define NOFL_NONMETA_BLOCKS_PER_SLAB (NOFL_BLOCKS_PER_SLAB - NOFL_META_BLOCKS_PER_SLAB) #define NOFL_NONMETA_BLOCKS_PER_SLAB (NOFL_BLOCKS_PER_SLAB - NOFL_META_BLOCKS_PER_SLAB)
#define NOFL_METADATA_BYTES_PER_SLAB (NOFL_NONMETA_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK) #define NOFL_METADATA_BYTES_PER_SLAB (NOFL_NONMETA_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK)
#define NOFL_SLACK_METADATA_BYTES_PER_SLAB (NOFL_META_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK) #define NOFL_SLACK_METADATA_BYTES_PER_SLAB (NOFL_META_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK)
#define NOFL_REMSET_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) #define NOFL_VESTIGIAL_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB)
#define NOFL_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * NOFL_NONMETA_BLOCKS_PER_SLAB) #define NOFL_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * NOFL_NONMETA_BLOCKS_PER_SLAB)
#define NOFL_SLACK_REMSET_BYTES_PER_SLAB (NOFL_REMSET_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) #define NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB)
#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_REMSET_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) #define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB)
#define NOFL_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB) #define NOFL_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB)
#define NOFL_SLACK_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) #define NOFL_SLACK_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB)
#define NOFL_HEADER_BYTES_PER_SLAB NOFL_SLACK_SUMMARY_BYTES_PER_SLAB #define NOFL_HEADER_BYTES_PER_SLAB NOFL_SLACK_SUMMARY_BYTES_PER_SLAB
@ -127,7 +127,7 @@ struct nofl_block_ref {
struct nofl_slab { struct nofl_slab {
struct nofl_slab_header header; struct nofl_slab_header header;
struct nofl_block_summary summaries[NOFL_NONMETA_BLOCKS_PER_SLAB]; struct nofl_block_summary summaries[NOFL_NONMETA_BLOCKS_PER_SLAB];
uint8_t remembered_set[NOFL_REMSET_BYTES_PER_SLAB]; uint8_t unused[NOFL_VESTIGIAL_BYTES_PER_SLAB];
uint8_t metadata[NOFL_METADATA_BYTES_PER_SLAB]; uint8_t metadata[NOFL_METADATA_BYTES_PER_SLAB];
struct nofl_block blocks[NOFL_NONMETA_BLOCKS_PER_SLAB]; struct nofl_block blocks[NOFL_NONMETA_BLOCKS_PER_SLAB];
}; };
@ -297,8 +297,6 @@ nofl_block_set_mark(uintptr_t addr) {
} }
#define NOFL_GRANULES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE) #define NOFL_GRANULES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE)
#define NOFL_GRANULES_PER_REMSET_BYTE \
(NOFL_GRANULES_PER_BLOCK / NOFL_REMSET_BYTES_PER_BLOCK)
static struct nofl_block_summary* static struct nofl_block_summary*
nofl_block_summary_for_addr(uintptr_t addr) { nofl_block_summary_for_addr(uintptr_t addr) {
@ -909,67 +907,75 @@ nofl_space_set_ephemeron_flag(struct gc_ref ref) {
struct gc_trace_worker; struct gc_trace_worker;
// Note that it's quite possible (and even likely) that any given remset static inline int
// byte doesn't hold any roots, if all stores were to nursery objects. nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) {
STATIC_ASSERT_EQ(NOFL_GRANULES_PER_REMSET_BYTE % 8, 0); return extents_contain_addr(space->extents, addr);
static void }
nofl_space_trace_card(struct nofl_space *space, struct nofl_slab *slab,
size_t card, static inline int
void (*trace_object)(struct gc_ref, nofl_space_contains_conservative_ref(struct nofl_space *space,
struct gc_heap*, struct gc_conservative_ref ref) {
struct gc_trace_worker*), return nofl_space_contains_address(space, gc_conservative_ref_value(ref));
struct gc_heap *heap, }
struct gc_trace_worker *worker) {
uintptr_t first_addr_in_slab = (uintptr_t) &slab->blocks[0]; static inline int
size_t granule_base = card * NOFL_GRANULES_PER_REMSET_BYTE; nofl_space_contains(struct nofl_space *space, struct gc_ref ref) {
for (size_t granule_in_remset = 0; return nofl_space_contains_address(space, gc_ref_value(ref));
granule_in_remset < NOFL_GRANULES_PER_REMSET_BYTE; }
granule_in_remset += 8, granule_base += 8) {
uint64_t mark_bytes = load_eight_aligned_bytes(slab->metadata + granule_base); static inline int
mark_bytes &= space->sweep_mask; nofl_space_contains_edge(struct nofl_space *space, struct gc_edge edge) {
while (mark_bytes) { return nofl_space_contains_address(space, (uintptr_t)gc_edge_loc(edge));
size_t granule_offset = count_zero_bytes(mark_bytes); }
mark_bytes &= ~(((uint64_t)0xff) << (granule_offset * 8));
size_t granule = granule_base + granule_offset; static inline int
uintptr_t addr = first_addr_in_slab + granule * NOFL_GRANULE_SIZE; nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) {
GC_ASSERT(nofl_metadata_byte_for_addr(addr) == &slab->metadata[granule]); uint8_t *metadata = nofl_metadata_byte_for_object(ref);
trace_object(gc_ref(addr), heap, worker); uint8_t mask = NOFL_METADATA_BYTE_MARK_0
} | NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2;
} uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed);
return byte & mask;
}
static uint8_t*
nofl_field_logged_byte(struct gc_edge edge) {
return nofl_metadata_byte_for_addr((uintptr_t)gc_edge_loc(edge));
}
static uint8_t
nofl_field_logged_bit(struct gc_edge edge) {
GC_ASSERT_EQ(sizeof(uintptr_t) * 2, NOFL_GRANULE_SIZE);
size_t field = ((uintptr_t)gc_edge_loc(edge)) / sizeof(uintptr_t);
return NOFL_METADATA_BYTE_LOGGED_0 << (field % 2);
}
static int
nofl_space_remember_edge(struct nofl_space *space, struct gc_ref obj,
struct gc_edge edge) {
GC_ASSERT(nofl_space_contains(space, obj));
if (!GC_GENERATIONAL) return 0;
if (!nofl_space_is_survivor(space, obj))
return 0;
uint8_t* loc = nofl_field_logged_byte(edge);
uint8_t bit = nofl_field_logged_bit(edge);
uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
do {
if (byte & bit) return 0;
} while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit,
memory_order_acq_rel,
memory_order_acquire));
return 1;
} }
static void static void
nofl_space_trace_remembered_slab(struct nofl_space *space, nofl_space_forget_edge(struct nofl_space *space, struct gc_edge edge) {
size_t slab_idx, GC_ASSERT(nofl_space_contains_edge(space, edge));
void (*trace_object)(struct gc_ref, GC_ASSERT(GC_GENERATIONAL);
struct gc_heap*, uint8_t* loc = nofl_field_logged_byte(edge);
struct gc_trace_worker*), // Clear both logged bits.
struct gc_heap *heap, uint8_t bits = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1;
struct gc_trace_worker *worker) { uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
GC_ASSERT(slab_idx < space->nslabs); atomic_store_explicit(loc, byte & ~bits, memory_order_release);
struct nofl_slab *slab = space->slabs[slab_idx];
uint8_t *remset = slab->remembered_set;
for (size_t card_base = 0;
card_base < NOFL_REMSET_BYTES_PER_SLAB;
card_base += 8) {
uint64_t remset_bytes = load_eight_aligned_bytes(remset + card_base);
if (!remset_bytes) continue;
memset(remset + card_base, 0, 8);
while (remset_bytes) {
size_t card_offset = count_zero_bytes(remset_bytes);
remset_bytes &= ~(((uint64_t)0xff) << (card_offset * 8));
nofl_space_trace_card(space, slab, card_base + card_offset,
trace_object, heap, worker);
}
}
}
static void
nofl_space_clear_remembered_set(struct nofl_space *space) {
if (!GC_GENERATIONAL) return;
for (size_t slab = 0; slab < space->nslabs; slab++) {
memset(space->slabs[slab]->remembered_set, 0, NOFL_REMSET_BYTES_PER_SLAB);
}
} }
static void static void
@ -1431,13 +1437,29 @@ nofl_space_pin_object(struct nofl_space *space, struct gc_ref ref) {
memory_order_acquire)); memory_order_acquire));
} }
static inline int static inline void
nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) { clear_logged_bits_in_evacuated_object(uint8_t *metadata, size_t count) {
uint8_t *metadata = nofl_metadata_byte_for_object(ref); // On a major collection, it could be that we evacuate an object that
uint8_t mask = NOFL_METADATA_BYTE_MARK_0 // has one or more fields in the old-to-new remembered set. Because
| NOFL_METADATA_BYTE_MARK_1 | NOFL_METADATA_BYTE_MARK_2; // the young generation is empty after a major collection, we know the
uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed); // old-to-new remembered set will be empty also. To clear the
return byte & mask; // remembered set, we call gc_field_set_clear, which will end up
// visiting all remembered edges and clearing their logged bits. But
// that doesn't work for evacuated objects, because their edges move:
// gc_field_set_clear will frob the pre-evacuation metadata bytes of
// the object. So here we explicitly clear logged bits for evacuated
// objects. That the bits for the pre-evacuation location are also
// frobbed by gc_field_set_clear doesn't cause a problem, as that
// memory will be swept and cleared later.
//
// This concern doesn't apply to minor collections: there we will
// never evacuate an object in the remembered set, because old objects
// aren't traced during a minor collection.
uint8_t mask = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1;
for (size_t i = 0; i < count; i++) {
if (metadata[i] & mask)
metadata[i] &= ~mask;
}
} }
static inline int static inline int
@ -1472,6 +1494,8 @@ nofl_space_evacuate(struct nofl_space *space, uint8_t *metadata, uint8_t byte,
// the object's fields need to be traced. // the object's fields need to be traced.
uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref); uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref);
memcpy(new_metadata + 1, metadata + 1, object_granules - 1); memcpy(new_metadata + 1, metadata + 1, object_granules - 1);
if (GC_GENERATIONAL)
clear_logged_bits_in_evacuated_object(new_metadata, object_granules);
gc_edge_update(edge, new_ref); gc_edge_update(edge, new_ref);
return nofl_space_set_nonempty_mark(space, new_metadata, byte, return nofl_space_set_nonempty_mark(space, new_metadata, byte,
new_ref); new_ref);
@ -1523,22 +1547,6 @@ nofl_space_evacuate_or_mark_object(struct nofl_space *space,
return nofl_space_set_nonempty_mark(space, metadata, byte, old_ref); return nofl_space_set_nonempty_mark(space, metadata, byte, old_ref);
} }
static inline int
nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) {
return extents_contain_addr(space->extents, addr);
}
static inline int
nofl_space_contains_conservative_ref(struct nofl_space *space,
struct gc_conservative_ref ref) {
return nofl_space_contains_address(space, gc_conservative_ref_value(ref));
}
static inline int
nofl_space_contains(struct nofl_space *space, struct gc_ref ref) {
return nofl_space_contains_address(space, gc_ref_value(ref));
}
static inline int static inline int
nofl_space_forward_if_evacuated(struct nofl_space *space, nofl_space_forward_if_evacuated(struct nofl_space *space,
struct gc_edge edge, struct gc_edge edge,

View file

@ -7,6 +7,7 @@
struct gc_ephemeron; struct gc_ephemeron;
struct gc_heap; struct gc_heap;
struct gc_mutator; struct gc_mutator;
struct gc_edge_buffer;
enum gc_root_kind { enum gc_root_kind {
GC_ROOT_KIND_NONE, GC_ROOT_KIND_NONE,
@ -16,8 +17,7 @@ enum gc_root_kind {
GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES, GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES,
GC_ROOT_KIND_RESOLVED_EPHEMERONS, GC_ROOT_KIND_RESOLVED_EPHEMERONS,
GC_ROOT_KIND_EDGE, GC_ROOT_KIND_EDGE,
GC_ROOT_KIND_REMEMBERED_OBJECT, GC_ROOT_KIND_EDGE_BUFFER,
GC_ROOT_KIND_REMEMBERED_SLAB,
}; };
struct gc_root { struct gc_root {
@ -28,8 +28,7 @@ struct gc_root {
struct gc_ephemeron *resolved_ephemerons; struct gc_ephemeron *resolved_ephemerons;
struct extent_range range; struct extent_range range;
struct gc_edge edge; struct gc_edge edge;
struct gc_ref ref; struct gc_edge_buffer *edge_buffer;
size_t idx;
}; };
}; };
@ -73,16 +72,9 @@ gc_root_edge(struct gc_edge edge) {
} }
static inline struct gc_root static inline struct gc_root
gc_root_remembered_object(struct gc_ref ref) { gc_root_edge_buffer(struct gc_edge_buffer *buf) {
struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_OBJECT }; struct gc_root ret = { GC_ROOT_KIND_EDGE_BUFFER };
ret.ref = ref; ret.edge_buffer = buf;
return ret;
}
static inline struct gc_root
gc_root_remembered_slab(size_t idx) {
struct gc_root ret = { GC_ROOT_KIND_REMEMBERED_SLAB };
ret.idx = idx;
return ret; return ret;
} }