1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

Merged Whippet into libguile/whippet

This commit is contained in:
Andy Wingo 2025-04-11 14:10:41 +02:00
commit db181e67ff
112 changed files with 18115 additions and 0 deletions

16
libguile/whippet/.gitignore vendored Normal file
View file

@ -0,0 +1,16 @@
/*.o
/*.bdw
/*.semi
/*.mmc
/*.generational-mmc
/*.parallel-mmc
/*.parallel-generational-mmc
/*.stack-conservative-mmc
/*.stack-conservative-generational-mmc
/*.stack-conservative-parallel-mmc
/*.stack-conservative-parallel-generational-mmc
/*.heap-conservative-mmc
/*.heap-conservative-generational-mmc
/*.heap-conservative-parallel-mmc
/*.heap-conservative-parallel-generational-mmc
/.deps/

141
libguile/whippet/Makefile Normal file
View file

@ -0,0 +1,141 @@
TESTS = quads mt-gcbench ephemerons finalizers
COLLECTORS = \
bdw \
semi \
\
pcc \
generational-pcc \
\
mmc \
stack-conservative-mmc \
heap-conservative-mmc \
\
parallel-mmc \
stack-conservative-parallel-mmc \
heap-conservative-parallel-mmc \
\
generational-mmc \
stack-conservative-generational-mmc \
heap-conservative-generational-mmc \
\
parallel-generational-mmc \
stack-conservative-parallel-generational-mmc \
heap-conservative-parallel-generational-mmc
DEFAULT_BUILD := opt
BUILD_CFLAGS_opt = -O2 -g -DNDEBUG
BUILD_CFLAGS_optdebug = -Og -g -DGC_DEBUG=1
BUILD_CFLAGS_debug = -O0 -g -DGC_DEBUG=1
BUILD_CFLAGS = $(BUILD_CFLAGS_$(or $(BUILD),$(DEFAULT_BUILD)))
USE_LTTNG_0 :=
USE_LTTNG_1 := 1
USE_LTTNG := $(shell pkg-config --exists lttng-ust && echo 1 || echo 0)
LTTNG_CPPFLAGS := $(if $(USE_LTTNG_$(USE_LTTNG)), $(shell pkg-config --cflags lttng-ust),)
LTTNG_LIBS := $(if $(USE_LTTNG_$(USE_LTTNG)), $(shell pkg-config --libs lttng-ust),)
TRACEPOINT_CPPFLAGS = $(if $(USE_LTTNG_$(USE_LTTNG)),$(LTTNG_CPPFLAGS) -DGC_TRACEPOINT_LTTNG=1,)
TRACEPOINT_LIBS = $(LTTNG_LIBS)
CC = gcc
CFLAGS = -Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(BUILD_CFLAGS)
CPPFLAGS = -Iapi $(TRACEPOINT_CPPFLAGS)
LDFLAGS = -lpthread -flto=auto $(TRACEPOINT_LIBS)
DEPFLAGS = -MMD -MP -MF $(@:obj/%.o=.deps/%.d)
COMPILE = $(CC) $(CFLAGS) $(CPPFLAGS) $(DEPFLAGS) -o $@
LINK = $(CC) $(LDFLAGS) -o $@
PLATFORM = gnu-linux
ALL_TESTS = $(foreach COLLECTOR,$(COLLECTORS),$(addsuffix .$(COLLECTOR),$(TESTS)))
all: $(ALL_TESTS:%=bin/%)
.deps obj bin: ; mkdir -p $@
include $(wildcard .deps/*)
obj/gc-platform.o: src/gc-platform-$(PLATFORM).c | .deps obj
$(COMPILE) -c $<
obj/gc-stack.o: src/gc-stack.c | .deps obj
$(COMPILE) -c $<
obj/gc-options.o: src/gc-options.c | .deps obj
$(COMPILE) -c $<
obj/gc-tracepoint.o: src/gc-tracepoint.c | .deps obj
$(COMPILE) -c $<
obj/%.gc-ephemeron.o: src/gc-ephemeron.c | .deps obj
$(COMPILE) -include benchmarks/$*-embedder.h -c $<
obj/%.gc-finalizer.o: src/gc-finalizer.c | .deps obj
$(COMPILE) -include benchmarks/$*-embedder.h -c $<
GC_STEM_bdw = bdw
GC_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
GC_IMPL_CFLAGS_bdw = `pkg-config --cflags bdw-gc`
GC_LIBS_bdw = `pkg-config --libs bdw-gc`
GC_STEM_semi = semi
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
GC_LIBS_semi = -lm
GC_STEM_pcc = pcc
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
GC_LIBS_pcc = -lm
GC_STEM_generational_pcc = $(GC_STEM_pcc)
GC_CFLAGS_generational_pcc = $(GC_CFLAGS_pcc) -DGC_GENERATIONAL=1
GC_LIBS_generational_pcc = $(GC_LIBS_pcc)
define mmc_variant
GC_STEM_$(1) = mmc
GC_CFLAGS_$(1) = $(2)
GC_LIBS_$(1) = -lm
endef
define generational_mmc_variants
$(call mmc_variant,$(1)mmc,$(2))
$(call mmc_variant,$(1)generational_mmc,$(2) -DGC_GENERATIONAL=1)
endef
define parallel_mmc_variants
$(call generational_mmc_variants,$(1),$(2))
$(call generational_mmc_variants,$(1)parallel_,$(2) -DGC_PARALLEL=1)
endef
define trace_mmc_variants
$(call parallel_mmc_variants,,-DGC_PRECISE_ROOTS=1)
$(call parallel_mmc_variants,stack_conservative_,-DGC_CONSERVATIVE_ROOTS=1)
$(call parallel_mmc_variants,heap_conservative_,-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1)
endef
$(eval $(call trace_mmc_variants))
# $(1) is the benchmark, $(2) is the collector configuration
make_gc_var = $$($(1)$(subst -,_,$(2)))
gc_impl = $(call make_gc_var,GC_STEM_,$(1)).c
gc_attrs = $(call make_gc_var,GC_STEM_,$(1))-attrs.h
gc_cflags = $(call make_gc_var,GC_CFLAGS_,$(1))
gc_impl_cflags = $(call make_gc_var,GC_IMPL_CFLAGS_,$(1))
gc_libs = $(call make_gc_var,GC_LIBS_,$(1))
define benchmark_template
obj/$(1).$(2).gc.o: src/$(call gc_impl,$(2)) | .deps obj
$$(COMPILE) $(call gc_cflags,$(2)) $(call gc_impl_cflags,$(2)) -include benchmarks/$(1)-embedder.h -c $$<
obj/$(1).$(2).o: benchmarks/$(1).c | .deps obj
$$(COMPILE) $(call gc_cflags,$(2)) -include api/$(call gc_attrs,$(2)) -c $$<
bin/$(1).$(2): obj/$(1).$(2).gc.o obj/$(1).$(2).o obj/gc-stack.o obj/gc-options.o obj/gc-platform.o obj/gc-tracepoint.o obj/$(1).gc-ephemeron.o obj/$(1).gc-finalizer.o | bin
$$(LINK) $$^ $(call gc_libs,$(2))
endef
$(foreach BENCHMARK,$(TESTS),\
$(foreach COLLECTOR,$(COLLECTORS),\
$(eval $(call benchmark_template,$(BENCHMARK),$(COLLECTOR)))))
.PRECIOUS: $(ALL_TESTS) $(OBJS)
clean:
rm -f $(ALL_TESTS)
rm -rf .deps obj bin
# Clear some of the default rules.
.SUFFIXES:
.SECONDARY:
%.c:;
Makefile:;

View file

@ -0,0 +1,91 @@
# Whippet Garbage Collector
This repository is for development of Whippet, a new garbage collector
implementation, eventually for use in [Guile
Scheme](https://gnu.org/s/guile).
Whippet is an embed-only C library, designed to be copied into a
program's source tree. It exposes an abstract C API for managed memory
allocation, and provides a number of implementations of that API.
## Documentation
See the [documentation](./doc/README.md).
## Features
- Per-object pinning (with `mmc` collectors)
- Finalization (supporting resuscitation)
- Ephemerons (except on `bdw`, which has a polyfill)
- Conservative roots (optionally with `mmc` or always with `bdw`)
- Precise roots (optionally with `mmc` or always with `semi` / `pcc`)
- Precise embedder-parameterized heap tracing (except with `bdw`)
- Conservative heap tracing (optionally with `mmc`, always with `bdw`)
- Parallel tracing (except `semi`)
- Parallel mutators (except `semi`)
- Inline allocation / write barrier fast paths (supporting JIT)
- One unified API with no-overhead abstraction: switch collectors when
you like
- Three policies for sizing heaps: fixed, proportional to live size, and
[MemBalancer](http://marisa.moe/balancer.html)
## Source repository structure
* [api/](./api/): The user-facing API. Also, the "embedder API"; see
the [manual](./doc/manual.md) for more.
* [doc/](./doc/): Documentation, such as it is.
* [src/](./src/): The actual GC implementation, containing a number of
collector implementations. The embedder chooses which collector to
use at compile-time. See the [documentation](./doc/collectors.md)
for more on the different collectors (`semi`, `bdw`, `pcc`, and the
different flavors of `mmc`).
* [benchmarks/](./benchmarks/): Benchmarks. A work in progress.
* [test/](./test/): A dusty attic of minimal testing.
## Status and roadmap
As of January 2025, Whippet is good to go! Of course there will surely
be new features to build as Whippet gets integrated it into language
run-times, but the basics are there.
The next phase on the roadmap is support for tracing, and
some performance noodling.
Once that is done, the big task is integrating Whippet into the [Guile
Scheme](https://gnu.org/s/guile) language run-time, replacing BDW-GC.
Fingers crossed!
## About the name
It sounds better than WIP (work-in-progress) garbage collector, doesn't
it? Also apparently a whippet is a kind of dog that is fast for its
size. It would be nice if the Whippet collectors turn out to have this
property.
## License
```
Copyright (c) 2022-2024 Andy Wingo
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
```
Note that some benchmarks have other licenses; see
[`benchmarks/README.md`](./benchmarks/README.md) for more.

View file

@ -0,0 +1,91 @@
#ifndef BDW_ATTRS_H
#define BDW_ATTRS_H
#include "gc-attrs.h"
#include "gc-assert.h"
static inline enum gc_allocator_kind gc_allocator_kind(void) {
return GC_ALLOCATOR_INLINE_FREELIST;
}
static inline size_t gc_allocator_small_granule_size(void) {
return 2 * sizeof(void *);
}
static inline size_t gc_allocator_large_threshold(void) {
return 256;
}
static inline size_t gc_allocator_allocation_pointer_offset(void) {
GC_CRASH();
}
static inline size_t gc_allocator_allocation_limit_offset(void) {
GC_CRASH();
}
static inline size_t gc_allocator_freelist_offset(size_t size,
enum gc_allocation_kind kind) {
GC_ASSERT(size);
size_t base;
switch (kind) {
case GC_ALLOCATION_TAGGED:
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
base = 0;
break;
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
case GC_ALLOCATION_TAGGED_POINTERLESS:
base = (sizeof(void*) * gc_allocator_large_threshold() /
gc_allocator_small_granule_size());
break;
}
size_t bucket = (size - 1) / gc_allocator_small_granule_size();
return base + sizeof(void*) * bucket;
}
static inline size_t gc_allocator_alloc_table_alignment(void) {
return 0;
}
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind) {
GC_CRASH();
}
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
GC_CRASH();
}
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t) {
return GC_OLD_GENERATION_CHECK_NONE;
}
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
GC_CRASH();
}
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
GC_CRASH();
}
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t) {
return GC_WRITE_BARRIER_NONE;
}
static inline size_t gc_write_barrier_field_table_alignment(void) {
GC_CRASH();
}
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
GC_CRASH();
}
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
GC_CRASH();
}
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
GC_CRASH();
}
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
return GC_SAFEPOINT_MECHANISM_SIGNAL;
}
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
return GC_COOPERATIVE_SAFEPOINT_NONE;
}
static inline int gc_can_pin_objects(void) {
return 1;
}
#endif // BDW_ATTRS_H

View file

@ -0,0 +1,19 @@
#ifndef GC_ALLOCATION_KIND_H
#define GC_ALLOCATION_KIND_H
enum gc_allocation_kind {
// An object whose type can be inspected at run-time based on its contents,
// and whose fields be traced via the gc_trace_object procedure.
GC_ALLOCATION_TAGGED,
// Like GC_ALLOCATION_TAGGED, but not containing any fields that reference
// GC-managed objects. The GC may choose to handle these specially.
GC_ALLOCATION_TAGGED_POINTERLESS,
// A raw allocation whose type cannot be inspected at trace-time, and whose
// fields should be traced conservatively.
GC_ALLOCATION_UNTAGGED_CONSERVATIVE,
// A raw allocation whose type cannot be inspected at trace-time, but
// containing no fields that reference GC-managed objects.
GC_ALLOCATION_UNTAGGED_POINTERLESS
};
#endif // GC_ALLOCATION_KIND_H

View file

@ -0,0 +1,301 @@
#ifndef GC_API_H_
#define GC_API_H_
#include "gc-config.h"
#include "gc-allocation-kind.h"
#include "gc-assert.h"
#include "gc-attrs.h"
#include "gc-collection-kind.h"
#include "gc-edge.h"
#include "gc-event-listener.h"
#include "gc-inline.h"
#include "gc-options.h"
#include "gc-ref.h"
#include "gc-visibility.h"
#include <stdatomic.h>
#include <stdint.h>
#include <string.h>
struct gc_heap;
struct gc_mutator;
struct gc_stack_addr;
GC_API_ void* gc_call_with_stack_addr(void* (*f)(struct gc_stack_addr *,
void *),
void *data) GC_NEVER_INLINE;
GC_API_ int gc_init(const struct gc_options *options,
struct gc_stack_addr *base, struct gc_heap **heap,
struct gc_mutator **mutator,
struct gc_event_listener event_listener,
void *event_listener_data);
GC_API_ uint64_t gc_allocation_counter(struct gc_heap *heap);
GC_API_ struct gc_heap* gc_mutator_heap(struct gc_mutator *mut);
GC_API_ uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap);
GC_API_ uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap);
struct gc_mutator_roots;
GC_API_ void gc_mutator_set_roots(struct gc_mutator *mut,
struct gc_mutator_roots *roots);
struct gc_heap_roots;
GC_API_ void gc_heap_set_roots(struct gc_heap *heap,
struct gc_heap_roots *roots);
struct gc_extern_space;
GC_API_ void gc_heap_set_extern_space(struct gc_heap *heap,
struct gc_extern_space *space);
GC_API_ struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *base,
struct gc_heap *heap);
GC_API_ void gc_finish_for_thread(struct gc_mutator *mut);
GC_API_ void* gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*),
void *data) GC_NEVER_INLINE;
GC_API_ void gc_collect(struct gc_mutator *mut,
enum gc_collection_kind requested_kind);
static inline void gc_update_alloc_table(struct gc_ref obj, size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline void gc_update_alloc_table(struct gc_ref obj, size_t size,
enum gc_allocation_kind kind) {
size_t alignment = gc_allocator_alloc_table_alignment();
if (!alignment) return;
uintptr_t addr = gc_ref_value(obj);
uintptr_t base = addr & ~(alignment - 1);
size_t granule_size = gc_allocator_small_granule_size();
uintptr_t granule = (addr & (alignment - 1)) / granule_size;
uint8_t *alloc = (uint8_t*)(base + granule);
uint8_t begin_pattern = gc_allocator_alloc_table_begin_pattern(kind);
uint8_t end_pattern = gc_allocator_alloc_table_end_pattern();
if (end_pattern) {
size_t granules = size / granule_size;
if (granules == 1) {
alloc[0] = begin_pattern | end_pattern;
} else {
alloc[0] = begin_pattern;
if (granules > 2)
memset(alloc + 1, 0, granules - 2);
alloc[granules - 1] = end_pattern;
}
} else {
alloc[0] = begin_pattern;
}
}
GC_API_ void* gc_allocate_slow(struct gc_mutator *mut, size_t bytes,
enum gc_allocation_kind kind) GC_NEVER_INLINE;
static inline void*
gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline void* gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut,
size_t size,
enum gc_allocation_kind kind) {
GC_ASSERT(size <= gc_allocator_large_threshold());
size_t granule_size = gc_allocator_small_granule_size();
size_t hp_offset = gc_allocator_allocation_pointer_offset();
size_t limit_offset = gc_allocator_allocation_limit_offset();
uintptr_t base_addr = (uintptr_t)mut;
uintptr_t *hp_loc = (uintptr_t*)(base_addr + hp_offset);
uintptr_t *limit_loc = (uintptr_t*)(base_addr + limit_offset);
size = (size + granule_size - 1) & ~(granule_size - 1);
uintptr_t hp = *hp_loc;
uintptr_t limit = *limit_loc;
uintptr_t new_hp = hp + size;
if (GC_UNLIKELY (new_hp > limit))
return NULL;
*hp_loc = new_hp;
gc_update_alloc_table(gc_ref(hp), size, kind);
return (void*)hp;
}
static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut,
size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut,
size_t size,
enum gc_allocation_kind kind) {
GC_ASSERT(size <= gc_allocator_large_threshold());
size_t freelist_offset = gc_allocator_freelist_offset(size, kind);
uintptr_t base_addr = (uintptr_t)mut;
void **freelist_loc = (void**)(base_addr + freelist_offset);
void *head = *freelist_loc;
if (GC_UNLIKELY(!head))
return NULL;
*freelist_loc = *(void**)head;
gc_update_alloc_table(gc_ref_from_heap_object(head), size, kind);
return head;
}
static inline void* gc_allocate_small_fast(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline void* gc_allocate_small_fast(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) {
GC_ASSERT(size != 0);
GC_ASSERT(size <= gc_allocator_large_threshold());
switch (gc_allocator_kind()) {
case GC_ALLOCATOR_INLINE_BUMP_POINTER:
return gc_allocate_small_fast_bump_pointer(mut, size, kind);
case GC_ALLOCATOR_INLINE_FREELIST:
return gc_allocate_small_fast_freelist(mut, size, kind);
case GC_ALLOCATOR_INLINE_NONE:
return NULL;
default:
GC_CRASH();
}
}
static inline void* gc_allocate_fast(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline void* gc_allocate_fast(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) {
GC_ASSERT(size != 0);
if (size > gc_allocator_large_threshold())
return NULL;
return gc_allocate_small_fast(mut, size, kind);
}
static inline void* gc_allocate(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline void* gc_allocate(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) {
void *ret = gc_allocate_fast(mut, size, kind);
if (GC_LIKELY(ret != NULL))
return ret;
return gc_allocate_slow(mut, size, kind);
}
GC_API_ int gc_object_is_old_generation_slow(struct gc_mutator *mut,
struct gc_ref obj) GC_NEVER_INLINE;
static inline int gc_object_is_old_generation(struct gc_mutator *mut,
struct gc_ref obj,
size_t obj_size) GC_ALWAYS_INLINE;
static inline int gc_object_is_old_generation(struct gc_mutator *mut,
struct gc_ref obj,
size_t obj_size) {
switch (gc_old_generation_check_kind(obj_size)) {
case GC_OLD_GENERATION_CHECK_ALLOC_TABLE: {
size_t alignment = gc_allocator_alloc_table_alignment();
GC_ASSERT(alignment);
uintptr_t addr = gc_ref_value(obj);
uintptr_t base = addr & ~(alignment - 1);
size_t granule_size = gc_allocator_small_granule_size();
uintptr_t granule = (addr & (alignment - 1)) / granule_size;
uint8_t *byte_loc = (uint8_t*)(base + granule);
uint8_t byte = atomic_load_explicit(byte_loc, memory_order_relaxed);
uint8_t mask = gc_old_generation_check_alloc_table_tag_mask();
uint8_t young = gc_old_generation_check_alloc_table_young_tag();
return (byte & mask) != young;
}
case GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY: {
struct gc_heap *heap = gc_mutator_heap(mut);
// Note that these addresses are fixed and that the embedder might
// want to store them somewhere or inline them into the output of
// JIT-generated code. They may also be power-of-two aligned.
uintptr_t low_addr = gc_small_object_nursery_low_address(heap);
uintptr_t high_addr = gc_small_object_nursery_high_address(heap);
uintptr_t size = high_addr - low_addr;
uintptr_t addr = gc_ref_value(obj);
return addr - low_addr >= size;
}
case GC_OLD_GENERATION_CHECK_SLOW:
return gc_object_is_old_generation_slow(mut, obj);
default:
GC_CRASH();
}
}
GC_API_ void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) GC_NEVER_INLINE;
static inline int gc_write_barrier_fast(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) GC_ALWAYS_INLINE;
static inline int gc_write_barrier_fast(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) {
switch (gc_write_barrier_kind(obj_size)) {
case GC_WRITE_BARRIER_NONE:
return 0;
case GC_WRITE_BARRIER_FIELD: {
if (!gc_object_is_old_generation(mut, obj, obj_size))
return 0;
size_t field_table_alignment = gc_write_barrier_field_table_alignment();
size_t fields_per_byte = gc_write_barrier_field_fields_per_byte();
uint8_t first_bit_pattern = gc_write_barrier_field_first_bit_pattern();
ssize_t table_offset = gc_write_barrier_field_table_offset();
uintptr_t addr = gc_edge_address(edge);
uintptr_t base = addr & ~(field_table_alignment - 1);
uintptr_t field = (addr & (field_table_alignment - 1)) / sizeof(uintptr_t);
uintptr_t log_byte = field / fields_per_byte;
uint8_t log_bit = first_bit_pattern << (field % fields_per_byte);
uint8_t *byte_loc = (uint8_t*)(base + table_offset + log_byte);
uint8_t byte = atomic_load_explicit(byte_loc, memory_order_relaxed);
return !(byte & log_bit);
}
case GC_WRITE_BARRIER_SLOW:
return 1;
default:
GC_CRASH();
}
}
static inline void gc_write_barrier(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) GC_ALWAYS_INLINE;
static inline void gc_write_barrier(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) {
if (GC_UNLIKELY(gc_write_barrier_fast(mut, obj, obj_size, edge, new_val)))
gc_write_barrier_slow(mut, obj, obj_size, edge, new_val);
}
GC_API_ void gc_pin_object(struct gc_mutator *mut, struct gc_ref obj);
GC_API_ void gc_safepoint_slow(struct gc_mutator *mut) GC_NEVER_INLINE;
GC_API_ int* gc_safepoint_flag_loc(struct gc_mutator *mut);
static inline int gc_should_stop_for_safepoint(struct gc_mutator *mut) {
switch (gc_cooperative_safepoint_kind()) {
case GC_COOPERATIVE_SAFEPOINT_NONE:
return 0;
case GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG:
case GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG: {
return atomic_load_explicit(gc_safepoint_flag_loc(mut),
memory_order_relaxed);
}
default:
GC_CRASH();
}
}
static inline void gc_safepoint(struct gc_mutator *mut) {
if (GC_UNLIKELY(gc_should_stop_for_safepoint(mut)))
gc_safepoint_slow(mut);
}
#endif // GC_API_H_

View file

@ -0,0 +1,21 @@
#ifndef GC_ASSERT_H
#define GC_ASSERT_H
#include "gc-config.h"
#define GC_UNLIKELY(e) __builtin_expect(e, 0)
#define GC_LIKELY(e) __builtin_expect(e, 1)
#define GC_CRASH() __builtin_trap()
#if GC_DEBUG
#define GC_ASSERT(x) do { if (GC_UNLIKELY(!(x))) GC_CRASH(); } while (0)
#define GC_UNREACHABLE() GC_CRASH()
#else
#define GC_ASSERT(x) do { } while (0)
#define GC_UNREACHABLE() __builtin_unreachable()
#endif
#define GC_ASSERT_EQ(a, b) GC_ASSERT((a) == (b))
#endif // GC_ASSERT_H

View file

@ -0,0 +1,69 @@
#ifndef GC_ATTRS_H
#define GC_ATTRS_H
#include "gc-inline.h"
#include "gc-allocation-kind.h"
#include <stddef.h>
#include <stdint.h>
enum gc_allocator_kind {
GC_ALLOCATOR_INLINE_BUMP_POINTER,
GC_ALLOCATOR_INLINE_FREELIST,
GC_ALLOCATOR_INLINE_NONE
};
static inline enum gc_allocator_kind gc_allocator_kind(void) GC_ALWAYS_INLINE;
static inline size_t gc_allocator_large_threshold(void) GC_ALWAYS_INLINE;
static inline size_t gc_allocator_small_granule_size(void) GC_ALWAYS_INLINE;
static inline size_t gc_allocator_allocation_pointer_offset(void) GC_ALWAYS_INLINE;
static inline size_t gc_allocator_allocation_limit_offset(void) GC_ALWAYS_INLINE;
static inline size_t gc_allocator_freelist_offset(size_t size,
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline size_t gc_allocator_alloc_table_alignment(void) GC_ALWAYS_INLINE;
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) GC_ALWAYS_INLINE;
enum gc_old_generation_check_kind {
GC_OLD_GENERATION_CHECK_NONE,
GC_OLD_GENERATION_CHECK_ALLOC_TABLE,
GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY,
GC_OLD_GENERATION_CHECK_SLOW
};
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) GC_ALWAYS_INLINE;
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) GC_ALWAYS_INLINE;
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) GC_ALWAYS_INLINE;
enum gc_write_barrier_kind {
GC_WRITE_BARRIER_NONE,
GC_WRITE_BARRIER_FIELD,
GC_WRITE_BARRIER_SLOW
};
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) GC_ALWAYS_INLINE;
static inline size_t gc_write_barrier_field_table_alignment(void) GC_ALWAYS_INLINE;
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) GC_ALWAYS_INLINE;
static inline size_t gc_write_barrier_field_fields_per_byte(void) GC_ALWAYS_INLINE;
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) GC_ALWAYS_INLINE;
enum gc_safepoint_mechanism {
GC_SAFEPOINT_MECHANISM_COOPERATIVE,
GC_SAFEPOINT_MECHANISM_SIGNAL,
};
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) GC_ALWAYS_INLINE;
enum gc_cooperative_safepoint_kind {
GC_COOPERATIVE_SAFEPOINT_NONE,
GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG,
GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG,
};
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) GC_ALWAYS_INLINE;
static inline int gc_can_pin_objects(void) GC_ALWAYS_INLINE;
#endif // GC_ATTRS_H

View file

@ -0,0 +1,177 @@
#ifndef GC_BASIC_STATS_H
#define GC_BASIC_STATS_H
#include "gc-event-listener.h"
#include "gc-histogram.h"
#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <time.h>
GC_DEFINE_HISTOGRAM(gc_latency, 25, 4);
struct gc_basic_stats {
uint64_t major_collection_count;
uint64_t minor_collection_count;
uint64_t last_time_usec;
uint64_t last_cpu_time_usec;
uint64_t elapsed_mutator_usec;
uint64_t elapsed_collector_usec;
uint64_t cpu_mutator_usec;
uint64_t cpu_collector_usec;
size_t heap_size;
size_t max_heap_size;
size_t max_live_data_size;
struct gc_latency pause_times;
};
static inline uint64_t gc_basic_stats_now(void) {
struct timeval tv;
if (gettimeofday(&tv, NULL) != 0) GC_CRASH();
uint64_t ret = tv.tv_sec;
ret *= 1000 * 1000;
ret += tv.tv_usec;
return ret;
}
static inline uint64_t gc_basic_stats_cpu_time(void) {
struct timespec ts;
clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts);
uint64_t ret = ts.tv_sec;
ret *= 1000 * 1000;
ret += ts.tv_nsec / 1000;
return ret;
}
static inline void gc_basic_stats_init(void *data, size_t heap_size) {
struct gc_basic_stats *stats = data;
memset(stats, 0, sizeof(*stats));
stats->last_time_usec = gc_basic_stats_now();
stats->last_cpu_time_usec = gc_basic_stats_cpu_time();
stats->heap_size = stats->max_heap_size = heap_size;
}
static inline void gc_basic_stats_requesting_stop(void *data) {
struct gc_basic_stats *stats = data;
uint64_t now = gc_basic_stats_now();
uint64_t cpu_time = gc_basic_stats_cpu_time();
stats->elapsed_mutator_usec += now - stats->last_time_usec;
stats->cpu_mutator_usec += cpu_time - stats->last_cpu_time_usec;
stats->last_time_usec = now;
stats->last_cpu_time_usec = cpu_time;
}
static inline void gc_basic_stats_waiting_for_stop(void *data) {}
static inline void gc_basic_stats_mutators_stopped(void *data) {}
static inline void gc_basic_stats_prepare_gc(void *data,
enum gc_collection_kind kind) {
struct gc_basic_stats *stats = data;
if (kind == GC_COLLECTION_MINOR)
stats->minor_collection_count++;
else
stats->major_collection_count++;
}
static inline void gc_basic_stats_roots_traced(void *data) {}
static inline void gc_basic_stats_heap_traced(void *data) {}
static inline void gc_basic_stats_ephemerons_traced(void *data) {}
static inline void gc_basic_stats_finalizers_traced(void *data) {}
static inline void gc_basic_stats_restarting_mutators(void *data) {
struct gc_basic_stats *stats = data;
uint64_t now = gc_basic_stats_now();
uint64_t cpu_time = gc_basic_stats_cpu_time();
uint64_t pause_time = now - stats->last_time_usec;
uint64_t pause_cpu_time = cpu_time - stats->last_cpu_time_usec;
stats->elapsed_collector_usec += pause_time;
stats->cpu_collector_usec += pause_cpu_time;
gc_latency_record(&stats->pause_times, pause_time);
stats->last_time_usec = now;
stats->last_cpu_time_usec = cpu_time;
}
static inline void* gc_basic_stats_mutator_added(void *data) {
return NULL;
}
static inline void gc_basic_stats_mutator_cause_gc(void *mutator_data) {}
static inline void gc_basic_stats_mutator_stopping(void *mutator_data) {}
static inline void gc_basic_stats_mutator_stopped(void *mutator_data) {}
static inline void gc_basic_stats_mutator_restarted(void *mutator_data) {}
static inline void gc_basic_stats_mutator_removed(void *mutator_data) {}
static inline void gc_basic_stats_heap_resized(void *data, size_t size) {
struct gc_basic_stats *stats = data;
stats->heap_size = size;
if (size > stats->max_heap_size)
stats->max_heap_size = size;
}
static inline void gc_basic_stats_live_data_size(void *data, size_t size) {
struct gc_basic_stats *stats = data;
if (size > stats->max_live_data_size)
stats->max_live_data_size = size;
}
#define GC_BASIC_STATS \
((struct gc_event_listener) { \
gc_basic_stats_init, \
gc_basic_stats_requesting_stop, \
gc_basic_stats_waiting_for_stop, \
gc_basic_stats_mutators_stopped, \
gc_basic_stats_prepare_gc, \
gc_basic_stats_roots_traced, \
gc_basic_stats_heap_traced, \
gc_basic_stats_ephemerons_traced, \
gc_basic_stats_finalizers_traced, \
gc_basic_stats_restarting_mutators, \
gc_basic_stats_mutator_added, \
gc_basic_stats_mutator_cause_gc, \
gc_basic_stats_mutator_stopping, \
gc_basic_stats_mutator_stopped, \
gc_basic_stats_mutator_restarted, \
gc_basic_stats_mutator_removed, \
gc_basic_stats_heap_resized, \
gc_basic_stats_live_data_size, \
})
static inline void gc_basic_stats_finish(struct gc_basic_stats *stats) {
uint64_t now = gc_basic_stats_now();
uint64_t cpu_time = gc_basic_stats_cpu_time();
stats->elapsed_mutator_usec += now - stats->last_time_usec;
stats->cpu_mutator_usec += cpu_time - stats->last_cpu_time_usec;
stats->last_time_usec = now;
stats->last_cpu_time_usec = cpu_time;
}
static inline void gc_basic_stats_print(struct gc_basic_stats *stats, FILE *f) {
fprintf(f, "Completed %" PRIu64 " major collections (%" PRIu64 " minor).\n",
stats->major_collection_count, stats->minor_collection_count);
uint64_t stopped = stats->elapsed_collector_usec;
uint64_t elapsed = stats->elapsed_mutator_usec + stopped;
uint64_t cpu_stopped = stats->cpu_collector_usec;
uint64_t cpu_total = stats->cpu_mutator_usec + cpu_stopped;
uint64_t ms = 1000; // per usec
fprintf(f, "%" PRIu64 ".%.3" PRIu64 " ms total time "
"(%" PRIu64 ".%.3" PRIu64 " stopped); "
"%" PRIu64 ".%.3" PRIu64 " ms CPU time "
"(%" PRIu64 ".%.3" PRIu64 " stopped).\n",
elapsed / ms, elapsed % ms, stopped / ms, stopped % ms,
cpu_total / ms, cpu_total % ms, cpu_stopped / ms, cpu_stopped % ms);
uint64_t pause_median = gc_latency_median(&stats->pause_times);
uint64_t pause_p95 = gc_latency_percentile(&stats->pause_times, 0.95);
uint64_t pause_max = gc_latency_max(&stats->pause_times);
fprintf(f, "%" PRIu64 ".%.3" PRIu64 " ms median pause time, "
"%" PRIu64 ".%.3" PRIu64 " p95, "
"%" PRIu64 ".%.3" PRIu64 " max.\n",
pause_median / ms, pause_median % ms, pause_p95 / ms, pause_p95 % ms,
pause_max / ms, pause_max % ms);
double MB = 1e6;
fprintf(f, "Heap size is %.3f MB (max %.3f MB); peak live data %.3f MB.\n",
stats->heap_size / MB, stats->max_heap_size / MB,
stats->max_live_data_size / MB);
}
#endif // GC_BASIC_STATS_H_

View file

@ -0,0 +1,11 @@
#ifndef GC_COLLECTION_KIND_H
#define GC_COLLECTION_KIND_H
enum gc_collection_kind {
GC_COLLECTION_ANY,
GC_COLLECTION_MINOR,
GC_COLLECTION_MAJOR,
GC_COLLECTION_COMPACTING,
};
#endif // GC_COLLECTION_KIND_H

View file

@ -0,0 +1,40 @@
#ifndef GC_CONFIG_H
#define GC_CONFIG_H
#ifndef GC_DEBUG
#define GC_DEBUG 0
#endif
#ifndef GC_HAS_IMMEDIATES
#define GC_HAS_IMMEDIATES 1
#endif
#ifndef GC_PARALLEL
#define GC_PARALLEL 0
#endif
#ifndef GC_GENERATIONAL
#define GC_GENERATIONAL 0
#endif
// Though you normally wouldn't configure things this way, it's possible
// to have both precise and conservative roots. However we have to
// either have precise or conservative tracing; not a mix.
#ifndef GC_PRECISE_ROOTS
#define GC_PRECISE_ROOTS 0
#endif
#ifndef GC_CONSERVATIVE_ROOTS
#define GC_CONSERVATIVE_ROOTS 0
#endif
#ifndef GC_CONSERVATIVE_TRACE
#define GC_CONSERVATIVE_TRACE 0
#endif
#ifndef GC_CONCURRENT_TRACE
#define GC_CONCURRENT_TRACE 0
#endif
#endif // GC_CONFIG_H

View file

@ -0,0 +1,17 @@
#ifndef GC_CONSERVATIVE_REF_H
#define GC_CONSERVATIVE_REF_H
#include <stdint.h>
struct gc_conservative_ref {
uintptr_t value;
};
static inline struct gc_conservative_ref gc_conservative_ref(uintptr_t value) {
return (struct gc_conservative_ref){value};
}
static inline uintptr_t gc_conservative_ref_value(struct gc_conservative_ref ref) {
return ref.value;
}
#endif // GC_CONSERVATIVE_REF_H

View file

@ -0,0 +1,26 @@
#ifndef GC_EDGE_H
#define GC_EDGE_H
#include "gc-ref.h"
struct gc_edge {
struct gc_ref *dst;
};
static inline struct gc_edge gc_edge(void* addr) {
return (struct gc_edge){addr};
}
static inline struct gc_ref gc_edge_ref(struct gc_edge edge) {
return *edge.dst;
}
static inline struct gc_ref* gc_edge_loc(struct gc_edge edge) {
return edge.dst;
}
static inline uintptr_t gc_edge_address(struct gc_edge edge) {
return (uintptr_t)gc_edge_loc(edge);
}
static inline void gc_edge_update(struct gc_edge edge, struct gc_ref ref) {
*edge.dst = ref;
}
#endif // GC_EDGE_H

View file

@ -0,0 +1,67 @@
#ifndef GC_EMBEDDER_API_H
#define GC_EMBEDDER_API_H
#include <stddef.h>
#include "gc-config.h"
#include "gc-edge.h"
#include "gc-inline.h"
#include "gc-forwarding.h"
#ifndef GC_EMBEDDER_API
#define GC_EMBEDDER_API static
#endif
struct gc_mutator_roots;
struct gc_heap_roots;
struct gc_atomic_forward;
struct gc_heap;
struct gc_extern_space;
GC_EMBEDDER_API inline int gc_is_valid_conservative_ref_displacement(uintptr_t displacement);
GC_EMBEDDER_API inline size_t gc_finalizer_priority_count(void);
GC_EMBEDDER_API inline int gc_extern_space_visit(struct gc_extern_space *space,
struct gc_edge edge,
struct gc_ref ref) GC_ALWAYS_INLINE;
GC_EMBEDDER_API inline void gc_extern_space_start_gc(struct gc_extern_space *space,
int is_minor_gc);
GC_EMBEDDER_API inline void gc_extern_space_finish_gc(struct gc_extern_space *space,
int is_minor_gc);
GC_EMBEDDER_API inline void gc_trace_object(struct gc_ref ref,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data,
size_t *size) GC_ALWAYS_INLINE;
GC_EMBEDDER_API inline void gc_trace_mutator_roots(struct gc_mutator_roots *roots,
void (*trace_edge)(struct gc_edge edge,
struct gc_heap *heap,
void *trace_data),
struct gc_heap *heap,
void *trace_data);
GC_EMBEDDER_API inline void gc_trace_heap_roots(struct gc_heap_roots *roots,
void (*trace_edge)(struct gc_edge edge,
struct gc_heap *heap,
void *trace_data),
struct gc_heap *heap,
void *trace_data);
GC_EMBEDDER_API inline uintptr_t gc_object_forwarded_nonatomic(struct gc_ref ref);
GC_EMBEDDER_API inline void gc_object_forward_nonatomic(struct gc_ref ref,
struct gc_ref new_ref);
GC_EMBEDDER_API inline struct gc_atomic_forward gc_atomic_forward_begin(struct gc_ref ref);
GC_EMBEDDER_API inline void gc_atomic_forward_acquire(struct gc_atomic_forward *);
GC_EMBEDDER_API inline int gc_atomic_forward_retry_busy(struct gc_atomic_forward *);
GC_EMBEDDER_API inline void gc_atomic_forward_abort(struct gc_atomic_forward *);
GC_EMBEDDER_API inline size_t gc_atomic_forward_object_size(struct gc_atomic_forward *);
GC_EMBEDDER_API inline void gc_atomic_forward_commit(struct gc_atomic_forward *,
struct gc_ref new_ref);
GC_EMBEDDER_API inline uintptr_t gc_atomic_forward_address(struct gc_atomic_forward *);
#endif // GC_EMBEDDER_API_H

View file

@ -0,0 +1,42 @@
#ifndef GC_EPHEMERON_H_
#define GC_EPHEMERON_H_
#include "gc-edge.h"
#include "gc-ref.h"
#include "gc-visibility.h"
// Ephemerons establish an association between a "key" object and a
// "value" object. If the ephemeron and the key are live, then the
// value is live, and can be retrieved from the ephemeron. Ephemerons
// can be chained together, which allows them to function as links in a
// buckets-and-chains hash table.
//
// This file defines the user-facing API for ephemerons.
struct gc_heap;
struct gc_mutator;
struct gc_ephemeron;
GC_API_ size_t gc_ephemeron_size(void);
GC_API_ struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut);
GC_API_ void gc_ephemeron_init(struct gc_mutator *mut,
struct gc_ephemeron *ephemeron,
struct gc_ref key, struct gc_ref value);
GC_API_ struct gc_ref gc_ephemeron_key(struct gc_ephemeron *ephemeron);
GC_API_ struct gc_ref gc_ephemeron_value(struct gc_ephemeron *ephemeron);
GC_API_ struct gc_ephemeron* gc_ephemeron_chain_head(struct gc_ephemeron **loc);
GC_API_ void gc_ephemeron_chain_push(struct gc_ephemeron **loc,
struct gc_ephemeron *ephemeron);
GC_API_ struct gc_ephemeron* gc_ephemeron_chain_next(struct gc_ephemeron *ephemeron);
GC_API_ void gc_ephemeron_mark_dead(struct gc_ephemeron *ephemeron);
GC_API_ void gc_trace_ephemeron(struct gc_ephemeron *ephemeron,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data);
#endif // GC_EPHEMERON_H_

View file

@ -0,0 +1,145 @@
#ifndef GC_EVENT_LISTENER_CHAIN_H
#define GC_EVENT_LISTENER_CHAIN_H
#include "gc-event-listener.h"
struct gc_event_listener_chain {
struct gc_event_listener head; void *head_data;
struct gc_event_listener tail; void *tail_data;
};
struct gc_event_listener_chain_mutator {
struct gc_event_listener_chain *chain;
void *head_mutator_data;
void *tail_mutator_data;
};
static inline void gc_event_listener_chain_init(void *data, size_t heap_size) {
struct gc_event_listener_chain *chain = data;
chain->head.init(chain->head_data, heap_size);
chain->tail.init(chain->tail_data, heap_size);
}
static inline void gc_event_listener_chain_requesting_stop(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.requesting_stop(chain->head_data);
chain->tail.requesting_stop(chain->tail_data);
}
static inline void gc_event_listener_chain_waiting_for_stop(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.waiting_for_stop(chain->head_data);
chain->tail.waiting_for_stop(chain->tail_data);
}
static inline void gc_event_listener_chain_mutators_stopped(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.mutators_stopped(chain->head_data);
chain->tail.mutators_stopped(chain->tail_data);
}
static inline void
gc_event_listener_chain_prepare_gc(void *data, enum gc_collection_kind kind) {
struct gc_event_listener_chain *chain = data;
chain->head.prepare_gc(chain->head_data, kind);
chain->tail.prepare_gc(chain->tail_data, kind);
}
static inline void gc_event_listener_chain_roots_traced(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.roots_traced(chain->head_data);
chain->tail.roots_traced(chain->tail_data);
}
static inline void gc_event_listener_chain_heap_traced(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.heap_traced(chain->head_data);
chain->tail.heap_traced(chain->tail_data);
}
static inline void gc_event_listener_chain_ephemerons_traced(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.ephemerons_traced(chain->head_data);
chain->tail.ephemerons_traced(chain->tail_data);
}
static inline void gc_event_listener_chain_finalizers_traced(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.finalizers_traced(chain->head_data);
chain->tail.finalizers_traced(chain->tail_data);
}
static inline void gc_event_listener_chain_restarting_mutators(void *data) {
struct gc_event_listener_chain *chain = data;
chain->head.restarting_mutators(chain->head_data);
chain->tail.restarting_mutators(chain->tail_data);
}
static inline void* gc_event_listener_chain_mutator_added(void *data) {
struct gc_event_listener_chain *chain = data;
struct gc_event_listener_chain_mutator *mutator = malloc(sizeof(*mutator));;
if (!mutator) abort();
mutator->chain = chain;
mutator->head_mutator_data = chain->head.mutator_added(chain->head_data);
mutator->tail_mutator_data = chain->tail.mutator_added(chain->tail_data);
return mutator;
}
static inline void gc_event_listener_chain_mutator_cause_gc(void *mutator_data) {
struct gc_event_listener_chain_mutator *mutator = mutator_data;
mutator->chain->head.restarting_mutators(mutator->head_data);
mutator->chain->tail.restarting_mutators(mutator->tail_data);
}
static inline void gc_event_listener_chain_mutator_stopping(void *mutator_data) {
struct gc_event_listener_chain_mutator *mutator = mutator_data;
mutator->chain->head.mutator_stopping(mutator->head_data);
mutator->chain->tail.mutator_stopping(mutator->tail_data);
}
static inline void gc_event_listener_chain_mutator_stopped(void *mutator_data) {
struct gc_event_listener_chain_mutator *mutator = mutator_data;
mutator->chain->head.mutator_stopped(mutator->head_data);
mutator->chain->tail.mutator_stopped(mutator->tail_data);
}
static inline void gc_event_listener_chain_mutator_restarted(void *mutator_data) {
struct gc_event_listener_chain_mutator *mutator = mutator_data;
mutator->chain->head.mutator_restarted(mutator->head_data);
mutator->chain->tail.mutator_restarted(mutator->tail_data);
}
static inline void gc_event_listener_chain_mutator_removed(void *mutator_data) {
struct gc_event_listener_chain_mutator *mutator = mutator_data;
mutator->chain->head.mutator_removed(mutator->head_data);
mutator->chain->tail.mutator_removed(mutator->tail_data);
free(mutator);
}
static inline void gc_event_listener_chain_heap_resized(void *data, size_t size) {
struct gc_event_listener_chain *chain = data;
chain->head.heap_resized(chain->head_data, size);
chain->tail.heap_resized(chain->tail_data, size);
}
static inline void gc_event_listener_chain_live_data_size(void *data, size_t size) {
struct gc_event_listener_chain *chain = data;
chain->head.live_data_size(chain->head_data, size);
chain->tail.live_data_size(chain->tail_data, size);
}
#define GC_EVENT_LISTENER_CHAIN \
((struct gc_event_listener) { \
gc_event_listener_chain_init, \
gc_event_listener_chain_requesting_stop, \
gc_event_listener_chain_waiting_for_stop, \
gc_event_listener_chain_mutators_stopped, \
gc_event_listener_chain_prepare_gc, \
gc_event_listener_chain_roots_traced, \
gc_event_listener_chain_heap_traced, \
gc_event_listener_chain_ephemerons_traced, \
gc_event_listener_chain_finalizers_traced, \
gc_event_listener_chain_restarting_mutators, \
gc_event_listener_chain_mutator_added, \
gc_event_listener_chain_mutator_cause_gc, \
gc_event_listener_chain_mutator_stopping, \
gc_event_listener_chain_mutator_stopped, \
gc_event_listener_chain_mutator_restarted, \
gc_event_listener_chain_mutator_removed, \
gc_event_listener_chain_heap_resized, \
gc_event_listener_chain_live_data_size, \
})
#define GC_EVENT_LISTENER_CHAIN_DATA(head, head_data, tail, tail_data) \
((struct gc_event_listener_chain_data){head, head_data, tail, tail_data})
#endif // GC_EVENT_LISTENER_CHAIN_H

View file

@ -0,0 +1,29 @@
#ifndef GC_EVENT_LISTENER_H
#define GC_EVENT_LISTENER_H
#include "gc-collection-kind.h"
struct gc_event_listener {
void (*init)(void *data, size_t heap_size);
void (*requesting_stop)(void *data);
void (*waiting_for_stop)(void *data);
void (*mutators_stopped)(void *data);
void (*prepare_gc)(void *data, enum gc_collection_kind kind);
void (*roots_traced)(void *data);
void (*heap_traced)(void *data);
void (*ephemerons_traced)(void *data);
void (*finalizers_traced)(void *data);
void (*restarting_mutators)(void *data);
void* (*mutator_added)(void *data);
void (*mutator_cause_gc)(void *mutator_data);
void (*mutator_stopping)(void *mutator_data);
void (*mutator_stopped)(void *mutator_data);
void (*mutator_restarted)(void *mutator_data);
void (*mutator_removed)(void *mutator_data);
void (*heap_resized)(void *data, size_t size);
void (*live_data_size)(void *data, size_t size);
};
#endif // GC_EVENT_LISTENER_H

View file

@ -0,0 +1,81 @@
#ifndef GC_FINALIZER_H_
#define GC_FINALIZER_H_
#include "gc-edge.h"
#include "gc-ref.h"
#include "gc-visibility.h"
// A finalizer allows the embedder to be notified when an object becomes
// unreachable.
//
// A finalizer has a priority. When the heap is created, the embedder
// should declare how many priorities there are. Lower-numbered
// priorities take precedence; if an object has a priority-0 finalizer
// outstanding, that will prevent any finalizer at level 1 (or 2, ...)
// from firing until no priority-0 finalizer remains.
//
// Call gc_attach_finalizer to attach a finalizer to an object.
//
// A finalizer also references an associated GC-managed closure object.
// A finalizer's reference to the closure object is strong: if a
// finalizer's closure closure references its finalizable object,
// directly or indirectly, the finalizer will never fire.
//
// When an object with a finalizer becomes unreachable, it is added to a
// queue. The embedder can call gc_pop_finalizable to get the next
// finalizable object and its associated closure. At that point the
// embedder can do anything with the object, including keeping it alive.
// Ephemeron associations will still be present while the finalizable
// object is live. Note however that any objects referenced by the
// finalizable object may themselves be already finalized; finalizers
// are enqueued for objects when they become unreachable, which can
// concern whole subgraphs of objects at once.
//
// The usual way for an embedder to know when the queue of finalizable
// object is non-empty is to call gc_set_finalizer_callback to
// provide a function that will be invoked when there are pending
// finalizers.
//
// Arranging to call gc_pop_finalizable and doing something with the
// finalizable object and closure is the responsibility of the embedder.
// The embedder's finalization action can end up invoking arbitrary
// code, so unless the embedder imposes some kind of restriction on what
// finalizers can do, generally speaking finalizers should be run in a
// dedicated thread instead of recursively from within whatever mutator
// thread caused GC. Setting up such a thread is the responsibility of
// the mutator. gc_pop_finalizable is thread-safe, allowing multiple
// finalization threads if that is appropriate.
//
// gc_allocate_finalizer returns a finalizer, which is a fresh
// GC-managed heap object. The mutator should then directly attach it
// to an object using gc_finalizer_attach. When the finalizer is fired,
// it becomes available to the mutator via gc_pop_finalizable.
struct gc_heap;
struct gc_mutator;
struct gc_finalizer;
GC_API_ size_t gc_finalizer_size(void);
GC_API_ struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut);
GC_API_ void gc_finalizer_attach(struct gc_mutator *mut,
struct gc_finalizer *finalizer,
unsigned priority,
struct gc_ref object, struct gc_ref closure);
GC_API_ struct gc_ref gc_finalizer_object(struct gc_finalizer *finalizer);
GC_API_ struct gc_ref gc_finalizer_closure(struct gc_finalizer *finalizer);
GC_API_ struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut);
typedef void (*gc_finalizer_callback)(struct gc_heap *heap, size_t count);
GC_API_ void gc_set_finalizer_callback(struct gc_heap *heap,
gc_finalizer_callback callback);
GC_API_ void gc_trace_finalizer(struct gc_finalizer *finalizer,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data);
#endif // GC_FINALIZER_H_

View file

@ -0,0 +1,20 @@
#ifndef GC_FORWARDING_H
#define GC_FORWARDING_H
#include <stdint.h>
#include "gc-ref.h"
enum gc_forwarding_state {
GC_FORWARDING_STATE_FORWARDED,
GC_FORWARDING_STATE_BUSY,
GC_FORWARDING_STATE_ACQUIRED,
GC_FORWARDING_STATE_NOT_FORWARDED
};
struct gc_atomic_forward {
struct gc_ref ref;
uintptr_t data;
enum gc_forwarding_state state;
};
#endif // GC_FORWARDING_H

View file

@ -0,0 +1,82 @@
#ifndef GC_HISTOGRAM_H
#define GC_HISTOGRAM_H
#include "gc-assert.h"
#include <stdint.h>
static inline size_t gc_histogram_bucket(uint64_t max_value_bits,
uint64_t precision,
uint64_t val) {
uint64_t major = val < (1ULL << precision)
? 0ULL
: 64ULL - __builtin_clzl(val) - precision;
uint64_t minor = val < (1 << precision)
? val
: (val >> (major - 1ULL)) & ((1ULL << precision) - 1ULL);
uint64_t idx = (major << precision) | minor;
if (idx >= (max_value_bits << precision))
idx = max_value_bits << precision;
return idx;
}
static inline uint64_t gc_histogram_bucket_min_val(uint64_t precision,
size_t idx) {
uint64_t major = idx >> precision;
uint64_t minor = idx & ((1ULL << precision) - 1ULL);
uint64_t min_val = major
? ((1ULL << precision) | minor) << (major - 1ULL)
: minor;
return min_val;
}
#define GC_DEFINE_HISTOGRAM(name, max_value_bits, precision) \
struct name { uint32_t buckets[((max_value_bits) << (precision)) + 1]; }; \
static inline size_t name##_size(void) { \
return ((max_value_bits) << (precision)) + 1; \
} \
static inline uint64_t name##_bucket_min_val(size_t idx) { \
GC_ASSERT(idx < name##_size()); \
return gc_histogram_bucket_min_val((precision), idx); \
} \
static inline struct name make_##name(void) { \
return (struct name) { { 0, }}; \
} \
static inline void name##_record(struct name *h, uint64_t val) { \
h->buckets[gc_histogram_bucket((max_value_bits), (precision), val)]++; \
} \
static inline uint64_t name##_ref(struct name *h, size_t idx) { \
GC_ASSERT(idx < name##_size()); \
return h->buckets[idx]; \
} \
static inline uint64_t name##_min(struct name *h) { \
for (size_t bucket = 0; bucket < name##_size(); bucket++) \
if (h->buckets[bucket]) return name##_bucket_min_val(bucket); \
return -1; \
} \
static inline uint64_t name##_max(struct name *h) { \
if (h->buckets[name##_size()-1]) return -1LL; \
for (ssize_t bucket = name##_size() - 1; bucket >= 0; bucket--) \
if (h->buckets[bucket]) return name##_bucket_min_val(bucket+1); \
return 0; \
} \
static inline uint64_t name##_count(struct name *h) { \
uint64_t sum = 0; \
for (size_t bucket = 0; bucket < name##_size(); bucket++) \
sum += h->buckets[bucket]; \
return sum; \
} \
static inline uint64_t name##_percentile(struct name *h, double p) { \
uint64_t n = name##_count(h) * p; \
uint64_t sum = 0; \
for (size_t bucket = 0; bucket + 1 < name##_size(); bucket++) { \
sum += h->buckets[bucket]; \
if (sum >= n) return name##_bucket_min_val(bucket+1); \
} \
return -1ULL; \
} \
static inline uint64_t name##_median(struct name *h) { \
return name##_percentile(h, 0.5); \
}
#endif // GC_HISTOGRAM_H

View file

@ -0,0 +1,7 @@
#ifndef GC_INLINE_H_
#define GC_INLINE_H_
#define GC_ALWAYS_INLINE __attribute__((always_inline))
#define GC_NEVER_INLINE __attribute__((noinline))
#endif // GC_INLINE_H_

View file

@ -0,0 +1,100 @@
#define LTTNG_UST_TRACEPOINT_PROVIDER whippet
#undef LTTNG_UST_TRACEPOINT_INCLUDE
#define LTTNG_UST_TRACEPOINT_INCLUDE "gc-lttng.h"
#if !defined(_TP_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
#define _TP_H
#include <lttng/tracepoint.h>
LTTNG_UST_TRACEPOINT_ENUM(
whippet, gc_kind,
LTTNG_UST_TP_ENUM_VALUES
(lttng_ust_field_enum_value("MINOR", 1)
lttng_ust_field_enum_value("MAJOR", 2)
lttng_ust_field_enum_value("COMPACTING", 3)))
LTTNG_UST_TRACEPOINT_EVENT_CLASS(
whippet, tracepoint,
LTTNG_UST_TP_ARGS(),
LTTNG_UST_TP_FIELDS())
LTTNG_UST_TRACEPOINT_EVENT_CLASS(
whippet, size_tracepoint,
LTTNG_UST_TP_ARGS(size_t, size),
LTTNG_UST_TP_FIELDS(lttng_ust_field_integer(size_t, size, size)))
/* The tracepoint instances */
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, size_tracepoint, whippet, init,
LTTNG_UST_TP_ARGS(size_t, size))
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, size_tracepoint, whippet, heap_resized,
LTTNG_UST_TP_ARGS(size_t, size))
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, size_tracepoint, whippet, live_data_size,
LTTNG_UST_TP_ARGS(size_t, size))
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, requesting_stop, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, waiting_for_stop, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutators_stopped, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT(
whippet, prepare_gc,
LTTNG_UST_TP_ARGS(int, gc_kind),
LTTNG_UST_TP_FIELDS(
lttng_ust_field_enum(whippet, gc_kind, int, gc_kind, gc_kind)))
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, roots_traced, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, heap_traced, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, ephemerons_traced, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, finalizers_traced, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, restarting_mutators, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutator_added, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutator_cause_gc, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutator_stopping, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutator_stopped, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutator_restarted, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, mutator_removed, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_unpark_all, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_share, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_check_termination_begin, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_check_termination_end, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_steal, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_roots_begin, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_roots_end, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_objects_begin, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_objects_end, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_worker_begin, LTTNG_UST_TP_ARGS())
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
whippet, tracepoint, whippet, trace_worker_end, LTTNG_UST_TP_ARGS())
#endif /* _TP_H */
#include <lttng/tracepoint-event.h>

View file

@ -0,0 +1,50 @@
#ifndef GC_NULL_EVENT_LISTENER_H
#define GC_NULL_EVENT_LISTENER_H
#include "gc-event-listener.h"
static inline void gc_null_event_listener_init(void *data, size_t size) {}
static inline void gc_null_event_listener_requesting_stop(void *data) {}
static inline void gc_null_event_listener_waiting_for_stop(void *data) {}
static inline void gc_null_event_listener_mutators_stopped(void *data) {}
static inline void gc_null_event_listener_prepare_gc(void *data,
enum gc_collection_kind) {}
static inline void gc_null_event_listener_roots_traced(void *data) {}
static inline void gc_null_event_listener_heap_traced(void *data) {}
static inline void gc_null_event_listener_ephemerons_traced(void *data) {}
static inline void gc_null_event_listener_finalizers_traced(void *data) {}
static inline void gc_null_event_listener_restarting_mutators(void *data) {}
static inline void* gc_null_event_listener_mutator_added(void *data) {}
static inline void gc_null_event_listener_mutator_cause_gc(void *mutator_data) {}
static inline void gc_null_event_listener_mutator_stopping(void *mutator_data) {}
static inline void gc_null_event_listener_mutator_stopped(void *mutator_data) {}
static inline void gc_null_event_listener_mutator_restarted(void *mutator_data) {}
static inline void gc_null_event_listener_mutator_removed(void *mutator_data) {}
static inline void gc_null_event_listener_heap_resized(void *, size_t) {}
static inline void gc_null_event_listener_live_data_size(void *, size_t) {}
#define GC_NULL_EVENT_LISTENER \
((struct gc_event_listener) { \
gc_null_event_listener_init, \
gc_null_event_listener_requesting_stop, \
gc_null_event_listener_waiting_for_stop, \
gc_null_event_listener_mutators_stopped, \
gc_null_event_listener_prepare_gc, \
gc_null_event_listener_roots_traced, \
gc_null_event_listener_heap_traced, \
gc_null_event_listener_ephemerons_traced, \
gc_null_event_listener_finalizers_traced, \
gc_null_event_listener_restarting_mutators, \
gc_null_event_listener_mutator_added, \
gc_null_event_listener_mutator_cause_gc, \
gc_null_event_listener_mutator_stopping, \
gc_null_event_listener_mutator_stopped, \
gc_null_event_listener_mutator_restarted, \
gc_null_event_listener_mutator_removed, \
gc_null_event_listener_heap_resized, \
gc_null_event_listener_live_data_size, \
})
#endif // GC_NULL_EVENT_LISTENER_H_

View file

@ -0,0 +1,39 @@
#ifndef GC_OPTIONS_H
#define GC_OPTIONS_H
#include "gc-visibility.h"
enum gc_heap_size_policy {
GC_HEAP_SIZE_FIXED,
GC_HEAP_SIZE_GROWABLE,
GC_HEAP_SIZE_ADAPTIVE,
};
enum {
GC_OPTION_HEAP_SIZE_POLICY,
GC_OPTION_HEAP_SIZE,
GC_OPTION_MAXIMUM_HEAP_SIZE,
GC_OPTION_HEAP_SIZE_MULTIPLIER,
GC_OPTION_HEAP_EXPANSIVENESS,
GC_OPTION_PARALLELISM
};
struct gc_options;
GC_API_ int gc_option_from_string(const char *str);
GC_API_ struct gc_options* gc_allocate_options(void);
GC_API_ int gc_options_set_int(struct gc_options *options, int option,
int value);
GC_API_ int gc_options_set_size(struct gc_options *options, int option,
size_t value);
GC_API_ int gc_options_set_double(struct gc_options *options, int option,
double value);
GC_API_ int gc_options_parse_and_set(struct gc_options *options,
int option, const char *value);
GC_API_ int gc_options_parse_and_set_many(struct gc_options *options,
const char *str);
#endif // GC_OPTIONS_H

View file

@ -0,0 +1,50 @@
#ifndef GC_REF_H
#define GC_REF_H
#include "gc-assert.h"
#include "gc-config.h"
#include <stdint.h>
struct gc_ref {
uintptr_t value;
};
static inline struct gc_ref gc_ref(uintptr_t value) {
return (struct gc_ref){value};
}
static inline uintptr_t gc_ref_value(struct gc_ref ref) {
return ref.value;
}
static inline struct gc_ref gc_ref_null(void) {
return gc_ref(0);
}
static inline int gc_ref_is_null(struct gc_ref ref) {
return ref.value == 0;
}
static inline int gc_ref_is_immediate(struct gc_ref ref) {
GC_ASSERT(!gc_ref_is_null(ref));
return GC_HAS_IMMEDIATES && (ref.value & (sizeof(void*) - 1));
}
static inline struct gc_ref gc_ref_immediate(uintptr_t val) {
GC_ASSERT(val & (sizeof(void*) - 1));
GC_ASSERT(GC_HAS_IMMEDIATES);
return gc_ref(val);
}
static inline int gc_ref_is_heap_object(struct gc_ref ref) {
return !gc_ref_is_immediate(ref);
}
static inline struct gc_ref gc_ref_from_heap_object_or_null(void *obj) {
return gc_ref((uintptr_t) obj);
}
static inline struct gc_ref gc_ref_from_heap_object(void *obj) {
GC_ASSERT(obj);
return gc_ref_from_heap_object_or_null(obj);
}
static inline void* gc_ref_heap_object(struct gc_ref ref) {
GC_ASSERT(gc_ref_is_heap_object(ref));
return (void *) gc_ref_value(ref);
}
#endif // GC_REF_H

View file

@ -0,0 +1,17 @@
#ifndef GC_TRACEPOINT_H
#define GC_TRACEPOINT_H
#ifdef GC_TRACEPOINT_LTTNG
#include "gc-lttng.h"
#define GC_TRACEPOINT(...) \
lttng_ust_tracepoint(whippet, __VA_ARGS__)
#else // GC_TRACEPOINT_LTTNG
#define GC_TRACEPOINT(...) do {} while (0)
#endif // GC_TRACEPOINT_LTTNG
#endif // GC_TRACEPOINT_H

View file

@ -0,0 +1,12 @@
#ifndef GC_VISIBILITY_H_
#define GC_VISIBILITY_H_
#define GC_INTERNAL __attribute__((visibility("hidden")))
#define GC_PUBLIC __attribute__((visibility("default")))
// FIXME: Conflict with bdw-gc GC_API. Switch prefix?
#ifndef GC_API_
#define GC_API_ GC_INTERNAL
#endif
#endif // GC_VISIBILITY_H

View file

@ -0,0 +1,121 @@
#ifndef MMC_ATTRS_H
#define MMC_ATTRS_H
#include "gc-config.h"
#include "gc-assert.h"
#include "gc-attrs.h"
static inline enum gc_allocator_kind gc_allocator_kind(void) {
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
}
static inline size_t gc_allocator_small_granule_size(void) {
return 16;
}
static inline size_t gc_allocator_large_threshold(void) {
return 8192;
}
static inline size_t gc_allocator_allocation_pointer_offset(void) {
return sizeof(uintptr_t) * 0;
}
static inline size_t gc_allocator_allocation_limit_offset(void) {
return sizeof(uintptr_t) * 1;
}
static inline size_t gc_allocator_freelist_offset(size_t size,
enum gc_allocation_kind kind) {
GC_CRASH();
}
static inline size_t gc_allocator_alloc_table_alignment(void) {
return 4 * 1024 * 1024;
}
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) {
uint8_t young = 1;
uint8_t trace_precisely = 0;
uint8_t trace_none = 8;
uint8_t trace_conservatively = 16;
uint8_t pinned = 16;
if (GC_CONSERVATIVE_TRACE) {
switch (kind) {
case GC_ALLOCATION_TAGGED:
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
return young | trace_conservatively;
case GC_ALLOCATION_TAGGED_POINTERLESS:
return young | trace_none;
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
return young | trace_none;
default:
GC_CRASH();
};
} else {
switch (kind) {
case GC_ALLOCATION_TAGGED:
return young | trace_precisely;
case GC_ALLOCATION_TAGGED_POINTERLESS:
return young | trace_none;
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
return young | trace_none | pinned;
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
default:
GC_CRASH();
};
}
}
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
return 32;
}
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) {
if (GC_GENERATIONAL) {
if (obj_size <= gc_allocator_large_threshold())
return GC_OLD_GENERATION_CHECK_ALLOC_TABLE;
return GC_OLD_GENERATION_CHECK_SLOW;
}
return GC_OLD_GENERATION_CHECK_NONE;
}
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
return 7;
}
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
return 1;
}
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
if (GC_GENERATIONAL) {
if (obj_size <= gc_allocator_large_threshold())
return GC_WRITE_BARRIER_FIELD;
return GC_WRITE_BARRIER_SLOW;
}
return GC_WRITE_BARRIER_NONE;
}
static inline size_t gc_write_barrier_field_table_alignment(void) {
GC_ASSERT(GC_GENERATIONAL);
return gc_allocator_alloc_table_alignment();
}
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
GC_ASSERT(GC_GENERATIONAL);
return 0;
}
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
GC_ASSERT(GC_GENERATIONAL);
return 2;
}
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
GC_ASSERT(GC_GENERATIONAL);
return 64; // NOFL_METADATA_BYTE_LOGGED_0
}
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
}
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
return GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG;
}
static inline int gc_can_pin_objects(void) {
return 1;
}
#endif // MMC_ATTRS_H

View file

@ -0,0 +1,92 @@
#ifndef PCC_ATTRS_H
#define PCC_ATTRS_H
#include "gc-config.h"
#include "gc-assert.h"
#include "gc-attrs.h"
static const uintptr_t GC_ALIGNMENT = 8;
static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192;
static inline enum gc_allocator_kind gc_allocator_kind(void) {
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
}
static inline size_t gc_allocator_small_granule_size(void) {
return GC_ALIGNMENT;
}
static inline size_t gc_allocator_large_threshold(void) {
return GC_LARGE_OBJECT_THRESHOLD;
}
static inline size_t gc_allocator_allocation_pointer_offset(void) {
return sizeof(uintptr_t) * 0;
}
static inline size_t gc_allocator_allocation_limit_offset(void) {
return sizeof(uintptr_t) * 1;
}
static inline size_t gc_allocator_freelist_offset(size_t size, enum gc_allocation_kind kind) {
GC_CRASH();
}
static inline size_t gc_allocator_alloc_table_alignment(void) {
return 0;
}
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) {
GC_CRASH();
}
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
GC_CRASH();
}
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t size) {
if (!GC_GENERATIONAL)
return GC_OLD_GENERATION_CHECK_NONE;
if (size <= gc_allocator_large_threshold())
return GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY;
return GC_OLD_GENERATION_CHECK_SLOW;
}
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
GC_CRASH();
}
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
GC_CRASH();
}
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
if (!GC_GENERATIONAL)
return GC_WRITE_BARRIER_NONE;
if (obj_size <= gc_allocator_large_threshold())
return GC_WRITE_BARRIER_FIELD;
return GC_WRITE_BARRIER_SLOW;
}
static inline size_t gc_write_barrier_field_table_alignment(void) {
GC_ASSERT(GC_GENERATIONAL);
return 64 * 1024 * 1024;
}
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
GC_ASSERT(GC_GENERATIONAL);
return 128 * 1024;
}
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
GC_ASSERT(GC_GENERATIONAL);
return 8;
}
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
GC_ASSERT(GC_GENERATIONAL);
return 1;
}
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
}
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
return GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG;
}
static inline int gc_can_pin_objects(void) {
return 0;
}
#endif // PCC_ATTRS_H

View file

@ -0,0 +1,80 @@
#ifndef SEMI_ATTRS_H
#define SEMI_ATTRS_H
#include "gc-attrs.h"
#include "gc-assert.h"
static const uintptr_t GC_ALIGNMENT = 8;
static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192;
static inline enum gc_allocator_kind gc_allocator_kind(void) {
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
}
static inline size_t gc_allocator_small_granule_size(void) {
return GC_ALIGNMENT;
}
static inline size_t gc_allocator_large_threshold(void) {
return GC_LARGE_OBJECT_THRESHOLD;
}
static inline size_t gc_allocator_allocation_pointer_offset(void) {
return sizeof(uintptr_t) * 0;
}
static inline size_t gc_allocator_allocation_limit_offset(void) {
return sizeof(uintptr_t) * 1;
}
static inline size_t gc_allocator_freelist_offset(size_t size,
enum gc_allocation_kind kind) {
GC_CRASH();
}
static inline size_t gc_allocator_alloc_table_alignment(void) {
return 0;
}
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) {
GC_CRASH();
}
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
GC_CRASH();
}
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t) {
return GC_OLD_GENERATION_CHECK_NONE;
}
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
GC_CRASH();
}
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
GC_CRASH();
}
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t) {
return GC_WRITE_BARRIER_NONE;
}
static inline size_t gc_write_barrier_field_table_alignment(void) {
GC_CRASH();
}
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
GC_CRASH();
}
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
GC_CRASH();
}
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
GC_CRASH();
}
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
}
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
return GC_COOPERATIVE_SAFEPOINT_NONE;
}
static inline int gc_can_pin_objects(void) {
return 0;
}
#endif // SEMI_ATTRS_H

View file

@ -0,0 +1,35 @@
# Benchmarks
- [`mt-gcbench.c`](./mt-gcbench.c): The multi-threaded [GCBench
benchmark](https://hboehm.info/gc/gc_bench.html). An old but
standard benchmark that allocates different sizes of binary trees.
As parameters it takes a heap multiplier and a number of mutator
threads. We analytically compute the peak amount of live data and
then size the GC heap as a multiplier of that size. It has a peak
heap consumption of 10 MB or so per mutator thread: not very large.
At a 2x heap multiplier, it causes about 30 collections for the `mmc`
collector, and runs somewhere around 200-400 milliseconds in
single-threaded mode, on the machines I have in 2022. For low thread
counts, the GCBench benchmark is small; but then again many Guile
processes also are quite short-lived, so perhaps it is useful to
ensure that small heaps remain lightweight.
To stress `mmc`'s handling of fragmentation, we modified this
benchmark to intersperse pseudorandomly-sized holes between tree
nodes.
- [`quads.c`](./quads.c): A synthetic benchmark that allocates quad
trees. The mutator begins by allocating one long-lived tree of depth
N, and then allocates 13% of the heap in depth-3 trees, 20 times,
simulating a fixed working set and otherwise an allocation-heavy
workload. By observing the times to allocate 13% of the heap in
garbage we can infer mutator overheads, and also note the variance
for the cycles in which GC hits.
## License
mt-gcbench.c was originally from https://hboehm.info/gc/gc_bench/, which
has a somewhat unclear license. I have modified GCBench significantly
so that I can slot in different GC implementations. Other files are
distributed under the Whippet license; see the top-level
[README.md](../README.md) for more.

View file

@ -0,0 +1,54 @@
#ifndef EPHEMERONS_EMBEDDER_H
#define EPHEMERONS_EMBEDDER_H
#include <stddef.h>
#include "ephemerons-types.h"
#include "gc-ephemeron.h"
struct gc_heap;
#define DEFINE_METHODS(name, Name, NAME) \
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
static inline void visit_##name##_fields(Name *obj,\
void (*visit)(struct gc_edge edge, \
struct gc_heap *heap, \
void *visit_data), \
struct gc_heap *heap, \
void *visit_data) GC_ALWAYS_INLINE;
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
#undef DEFINE_METHODS
static inline size_t small_object_size(SmallObject *obj) { return sizeof(*obj); }
static inline size_t ephemeron_size(Ephemeron *obj) { return gc_ephemeron_size(); }
static inline size_t box_size(Box *obj) { return sizeof(*obj); }
static inline void
visit_small_object_fields(SmallObject *obj,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {}
static inline void
visit_ephemeron_fields(Ephemeron *ephemeron,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {
gc_trace_ephemeron((struct gc_ephemeron*)ephemeron, visit, heap, visit_data);
}
static inline void
visit_box_fields(Box *box,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {
visit(gc_edge(&box->obj), heap, visit_data);
}
#include "simple-gc-embedder.h"
#endif // EPHEMERONS_EMBEDDER_H

View file

@ -0,0 +1,21 @@
#ifndef EPHEMERONS_TYPES_H
#define EPHEMERONS_TYPES_H
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
M(box, Box, BOX) \
M(ephemeron, Ephemeron, EPHEMERON) \
M(small_object, SmallObject, SMALL_OBJECT)
#include "heap-objects.h"
#include "simple-tagging-scheme.h"
struct SmallObject {
struct gc_header header;
};
struct Box {
struct gc_header header;
void *obj;
};
#endif // EPHEMERONS_TYPES_H

View file

@ -0,0 +1,272 @@
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <sys/time.h>
#include "assert.h"
#include "gc-api.h"
#include "gc-basic-stats.h"
#include "gc-ephemeron.h"
#include "simple-roots-api.h"
#include "ephemerons-types.h"
#include "simple-allocator.h"
typedef HANDLE_TO(SmallObject) SmallObjectHandle;
typedef HANDLE_TO(struct gc_ephemeron) EphemeronHandle;
typedef HANDLE_TO(Box) BoxHandle;
static SmallObject* allocate_small_object(struct gc_mutator *mut) {
return gc_allocate_with_kind(mut, ALLOC_KIND_SMALL_OBJECT, sizeof(SmallObject));
}
static Box* allocate_box(struct gc_mutator *mut) {
return gc_allocate_with_kind(mut, ALLOC_KIND_BOX, sizeof(Box));
}
static struct gc_ephemeron* allocate_ephemeron(struct gc_mutator *mut) {
struct gc_ephemeron *ret = gc_allocate_ephemeron(mut);
*tag_word(gc_ref_from_heap_object(ret)) = tag_live(ALLOC_KIND_EPHEMERON);
return ret;
}
/* Get the current time in microseconds */
static unsigned long current_time(void)
{
struct timeval t;
if (gettimeofday(&t, NULL) == -1)
return 0;
return t.tv_sec * 1000 * 1000 + t.tv_usec;
}
struct thread {
struct gc_mutator *mut;
struct gc_mutator_roots roots;
};
static void print_elapsed(const char *what, unsigned long start) {
unsigned long end = current_time();
unsigned long msec = (end - start) / 1000;
unsigned long usec = (end - start) % 1000;
printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec);
}
struct call_with_gc_data {
void* (*f)(struct thread *);
struct gc_heap *heap;
};
static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) {
struct call_with_gc_data *data = arg;
struct gc_mutator *mut = gc_init_for_thread(addr, data->heap);
struct thread t = { mut, };
gc_mutator_set_roots(mut, &t.roots);
void *ret = data->f(&t);
gc_finish_for_thread(mut);
return ret;
}
static void* call_with_gc(void* (*f)(struct thread *),
struct gc_heap *heap) {
struct call_with_gc_data data = { f, heap };
return gc_call_with_stack_addr(call_with_gc_inner, &data);
}
#define CHECK(x) \
do { \
if (!(x)) { \
fprintf(stderr, "%s:%d: check failed: %s\n", __FILE__, __LINE__, #x); \
exit(1); \
} \
} while (0)
#define CHECK_EQ(x, y) CHECK((x) == (y))
#define CHECK_NE(x, y) CHECK((x) != (y))
#define CHECK_NULL(x) CHECK_EQ(x, NULL)
#define CHECK_NOT_NULL(x) CHECK_NE(x, NULL)
static size_t ephemeron_chain_length(struct gc_ephemeron **loc,
SmallObject *key) {
struct gc_ephemeron *head = gc_ephemeron_chain_head(loc);
size_t len = 0;
while (head) {
CHECK_EQ(key, (SmallObject*)gc_ref_value(gc_ephemeron_key(head)));
Box *value = gc_ref_heap_object(gc_ephemeron_value(head));
CHECK_NOT_NULL(value);
key = value->obj;
CHECK_NOT_NULL(key);
head = gc_ephemeron_chain_next(head);
len++;
}
return len;
}
static double heap_size;
static double heap_multiplier;
static size_t nthreads;
static void cause_gc(struct gc_mutator *mut) {
// Doing a full collection lets us reason precisely about liveness.
gc_collect(mut, GC_COLLECTION_MAJOR);
}
static void make_ephemeron_chain(struct thread *t, EphemeronHandle *head,
SmallObjectHandle *head_key, size_t length) {
BoxHandle tail_box = { NULL };
PUSH_HANDLE(t, tail_box);
CHECK_NULL(HANDLE_REF(*head_key));
HANDLE_SET(*head_key, allocate_small_object(t->mut));
for (size_t i = 0; i < length; i++) {
HANDLE_SET(tail_box, allocate_box(t->mut));
HANDLE_REF(tail_box)->obj = HANDLE_REF(*head_key);
HANDLE_SET(*head_key, allocate_small_object(t->mut));
struct gc_ephemeron *ephemeron = allocate_ephemeron(t->mut);
gc_ephemeron_init(t->mut, ephemeron,
gc_ref_from_heap_object(HANDLE_REF(*head_key)),
gc_ref_from_heap_object(HANDLE_REF(tail_box)));
gc_ephemeron_chain_push(HANDLE_LOC(*head), ephemeron);
}
POP_HANDLE(t);
}
static void* run_one_test(struct thread *t) {
size_t unit_size = gc_ephemeron_size() + sizeof(Box);
size_t list_length = heap_size / nthreads / heap_multiplier / unit_size;
printf("Allocating ephemeron list %zu nodes long. Total size %.3fGB.\n",
list_length, list_length * unit_size / 1e9);
unsigned long thread_start = current_time();
SmallObjectHandle head_key = { NULL };
EphemeronHandle head = { NULL };
PUSH_HANDLE(t, head_key);
PUSH_HANDLE(t, head);
make_ephemeron_chain(t, &head, &head_key, list_length);
size_t measured_length = ephemeron_chain_length(HANDLE_LOC(head),
HANDLE_REF(head_key));
CHECK_EQ(measured_length, list_length);
cause_gc(t->mut);
measured_length = ephemeron_chain_length(HANDLE_LOC(head),
HANDLE_REF(head_key));
CHECK_EQ(measured_length, list_length);
if (!GC_CONSERVATIVE_ROOTS) {
HANDLE_SET(head_key, NULL);
cause_gc(t->mut);
measured_length = ephemeron_chain_length(HANDLE_LOC(head),
HANDLE_REF(head_key));
CHECK_EQ(measured_length, 0);
}
// swap head_key for a key halfway in, cause gc
// check length is expected half-length; warn, or error if precise
// clear and return
print_elapsed("thread", thread_start);
POP_HANDLE(t);
POP_HANDLE(t);
return NULL;
}
static void* run_one_test_in_thread(void *arg) {
struct gc_heap *heap = arg;
return call_with_gc(run_one_test, heap);
}
struct join_data { int status; pthread_t thread; };
static void *join_thread(void *data) {
struct join_data *join_data = data;
void *ret;
join_data->status = pthread_join(join_data->thread, &ret);
return ret;
}
#define MAX_THREAD_COUNT 256
int main(int argc, char *argv[]) {
if (argc < 4 || 5 < argc) {
fprintf(stderr, "usage: %s HEAP_SIZE MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]);
return 1;
}
heap_size = atof(argv[1]);
heap_multiplier = atof(argv[2]);
nthreads = atol(argv[3]);
if (heap_size < 8192) {
fprintf(stderr,
"Heap size should probably be at least 8192, right? '%s'\n",
argv[1]);
return 1;
}
if (!(1.0 < heap_multiplier && heap_multiplier < 100)) {
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
return 1;
}
if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n",
(int)MAX_THREAD_COUNT, argv[2]);
return 1;
}
printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n",
heap_size / 1e9, heap_multiplier);
struct gc_options *options = gc_allocate_options();
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
if (argc == 5) {
if (!gc_options_parse_and_set_many(options, argv[4])) {
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[4]);
return 1;
}
}
struct gc_heap *heap;
struct gc_mutator *mut;
struct gc_basic_stats stats;
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
(size_t)heap_size);
return 1;
}
struct thread main_thread = { mut, };
gc_mutator_set_roots(mut, &main_thread.roots);
pthread_t threads[MAX_THREAD_COUNT];
// Run one of the threads in the main thread.
for (size_t i = 1; i < nthreads; i++) {
int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap);
if (status) {
errno = status;
perror("Failed to create thread");
return 1;
}
}
run_one_test(&main_thread);
for (size_t i = 1; i < nthreads; i++) {
struct join_data data = { 0, threads[i] };
gc_call_without_gc(mut, join_thread, &data);
if (data.status) {
errno = data.status;
perror("Failed to join thread");
return 1;
}
}
gc_basic_stats_finish(&stats);
fputs("\n", stdout);
gc_basic_stats_print(&stats, stdout);
return 0;
}

View file

@ -0,0 +1,55 @@
#ifndef FINALIZERS_EMBEDDER_H
#define FINALIZERS_EMBEDDER_H
#include <stddef.h>
#include "finalizers-types.h"
#include "gc-finalizer.h"
struct gc_heap;
#define DEFINE_METHODS(name, Name, NAME) \
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
static inline void visit_##name##_fields(Name *obj,\
void (*visit)(struct gc_edge edge, \
struct gc_heap *heap, \
void *visit_data), \
struct gc_heap *heap, \
void *visit_data) GC_ALWAYS_INLINE;
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
#undef DEFINE_METHODS
static inline size_t small_object_size(SmallObject *obj) { return sizeof(*obj); }
static inline size_t finalizer_size(Finalizer *obj) { return gc_finalizer_size(); }
static inline size_t pair_size(Pair *obj) { return sizeof(*obj); }
static inline void
visit_small_object_fields(SmallObject *obj,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {}
static inline void
visit_finalizer_fields(Finalizer *finalizer,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {
gc_trace_finalizer((struct gc_finalizer*)finalizer, visit, heap, visit_data);
}
static inline void
visit_pair_fields(Pair *pair,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {
visit(gc_edge(&pair->car), heap, visit_data);
visit(gc_edge(&pair->cdr), heap, visit_data);
}
#include "simple-gc-embedder.h"
#endif // FINALIZERS_EMBEDDER_H

View file

@ -0,0 +1,22 @@
#ifndef FINALIZERS_TYPES_H
#define FINALIZERS_TYPES_H
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
M(pair, Pair, PAIR) \
M(finalizer, Finalizer, FINALIZER) \
M(small_object, SmallObject, SMALL_OBJECT)
#include "heap-objects.h"
#include "simple-tagging-scheme.h"
struct SmallObject {
struct gc_header header;
};
struct Pair {
struct gc_header header;
void *car;
void *cdr;
};
#endif // FINALIZERS_TYPES_H

View file

@ -0,0 +1,284 @@
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <sys/time.h>
#include "assert.h"
#include "gc-api.h"
#include "gc-basic-stats.h"
#include "gc-finalizer.h"
#include "simple-roots-api.h"
#include "finalizers-types.h"
#include "simple-allocator.h"
typedef HANDLE_TO(SmallObject) SmallObjectHandle;
typedef HANDLE_TO(struct gc_finalizer) FinalizerHandle;
typedef HANDLE_TO(Pair) PairHandle;
static SmallObject* allocate_small_object(struct gc_mutator *mut) {
return gc_allocate_with_kind(mut, ALLOC_KIND_SMALL_OBJECT, sizeof(SmallObject));
}
static Pair* allocate_pair(struct gc_mutator *mut) {
return gc_allocate_with_kind(mut, ALLOC_KIND_PAIR, sizeof(Pair));
}
static struct gc_finalizer* allocate_finalizer(struct gc_mutator *mut) {
struct gc_finalizer *ret = gc_allocate_finalizer(mut);
*tag_word(gc_ref_from_heap_object(ret)) = tag_live(ALLOC_KIND_FINALIZER);
return ret;
}
/* Get the current time in microseconds */
static unsigned long current_time(void)
{
struct timeval t;
if (gettimeofday(&t, NULL) == -1)
return 0;
return t.tv_sec * 1000 * 1000 + t.tv_usec;
}
struct thread {
struct gc_mutator *mut;
struct gc_mutator_roots roots;
};
static void print_elapsed(const char *what, unsigned long start) {
unsigned long end = current_time();
unsigned long msec = (end - start) / 1000;
unsigned long usec = (end - start) % 1000;
printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec);
}
struct call_with_gc_data {
void* (*f)(struct thread *);
struct gc_heap *heap;
};
static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) {
struct call_with_gc_data *data = arg;
struct gc_mutator *mut = gc_init_for_thread(addr, data->heap);
struct thread t = { mut, };
gc_mutator_set_roots(mut, &t.roots);
void *ret = data->f(&t);
gc_finish_for_thread(mut);
return ret;
}
static void* call_with_gc(void* (*f)(struct thread *),
struct gc_heap *heap) {
struct call_with_gc_data data = { f, heap };
return gc_call_with_stack_addr(call_with_gc_inner, &data);
}
#define CHECK(x) \
do { \
if (!(x)) { \
fprintf(stderr, "%s:%d: check failed: %s\n", __FILE__, __LINE__, #x); \
exit(1); \
} \
} while (0)
#define CHECK_EQ(x, y) CHECK((x) == (y))
#define CHECK_NE(x, y) CHECK((x) != (y))
#define CHECK_NULL(x) CHECK_EQ(x, NULL)
#define CHECK_NOT_NULL(x) CHECK_NE(x, NULL)
static double heap_size;
static double heap_multiplier;
static size_t nthreads;
static void cause_gc(struct gc_mutator *mut) {
// Doing a full collection lets us reason precisely about liveness.
gc_collect(mut, GC_COLLECTION_MAJOR);
}
static inline void set_car(struct gc_mutator *mut, Pair *obj, void *val) {
void **field = &obj->car;
if (val)
gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Pair),
gc_edge(field),
gc_ref_from_heap_object(val));
*field = val;
}
static inline void set_cdr(struct gc_mutator *mut, Pair *obj, void *val) {
void **field = &obj->cdr;
if (val)
gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Pair),
gc_edge(field),
gc_ref_from_heap_object(val));
field = val;
}
static Pair* make_finalizer_chain(struct thread *t, size_t length) {
PairHandle head = { NULL };
PairHandle tail = { NULL };
PUSH_HANDLE(t, head);
PUSH_HANDLE(t, tail);
for (size_t i = 0; i < length; i++) {
HANDLE_SET(tail, HANDLE_REF(head));
HANDLE_SET(head, allocate_pair(t->mut));
set_car(t->mut, HANDLE_REF(head), allocate_small_object(t->mut));
set_cdr(t->mut, HANDLE_REF(head), HANDLE_REF(tail));
struct gc_finalizer *finalizer = allocate_finalizer(t->mut);
gc_finalizer_attach(t->mut, finalizer, 0,
gc_ref_from_heap_object(HANDLE_REF(head)),
gc_ref_from_heap_object(HANDLE_REF(head)->car));
}
Pair *ret = HANDLE_REF(head);
POP_HANDLE(t);
POP_HANDLE(t);
return ret;
}
static void* run_one_test(struct thread *t) {
size_t unit_size = gc_finalizer_size() + sizeof(Pair);
size_t list_length = heap_size / nthreads / heap_multiplier / unit_size;
ssize_t outstanding = list_length;
printf("Allocating list %zu nodes long. Total size %.3fGB.\n",
list_length, list_length * unit_size / 1e9);
unsigned long thread_start = current_time();
PairHandle chain = { NULL };
PUSH_HANDLE(t, chain);
HANDLE_SET(chain, make_finalizer_chain(t, list_length));
cause_gc(t->mut);
size_t finalized = 0;
for (struct gc_finalizer *f = gc_pop_finalizable(t->mut);
f;
f = gc_pop_finalizable(t->mut)) {
Pair* p = gc_ref_heap_object(gc_finalizer_object(f));
SmallObject* o = gc_ref_heap_object(gc_finalizer_closure(f));
CHECK_EQ(p->car, o);
finalized++;
}
printf("thread %p: GC before clear finalized %zu nodes.\n", t, finalized);
outstanding -= finalized;
HANDLE_SET(chain, NULL);
cause_gc(t->mut);
finalized = 0;
for (struct gc_finalizer *f = gc_pop_finalizable(t->mut);
f;
f = gc_pop_finalizable(t->mut)) {
Pair* p = gc_ref_heap_object(gc_finalizer_object(f));
SmallObject* o = gc_ref_heap_object(gc_finalizer_closure(f));
CHECK_EQ(p->car, o);
finalized++;
}
printf("thread %p: GC after clear finalized %zu nodes.\n", t, finalized);
outstanding -= finalized;
print_elapsed("thread", thread_start);
POP_HANDLE(t);
return (void*)outstanding;
}
static void* run_one_test_in_thread(void *arg) {
struct gc_heap *heap = arg;
return call_with_gc(run_one_test, heap);
}
struct join_data { int status; pthread_t thread; };
static void *join_thread(void *data) {
struct join_data *join_data = data;
void *ret;
join_data->status = pthread_join(join_data->thread, &ret);
return ret;
}
#define MAX_THREAD_COUNT 256
int main(int argc, char *argv[]) {
if (argc < 4 || 5 < argc) {
fprintf(stderr, "usage: %s HEAP_SIZE MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]);
return 1;
}
heap_size = atof(argv[1]);
heap_multiplier = atof(argv[2]);
nthreads = atol(argv[3]);
if (heap_size < 8192) {
fprintf(stderr,
"Heap size should probably be at least 8192, right? '%s'\n",
argv[1]);
return 1;
}
if (!(1.0 < heap_multiplier && heap_multiplier < 100)) {
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
return 1;
}
if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n",
(int)MAX_THREAD_COUNT, argv[2]);
return 1;
}
printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n",
heap_size / 1e9, heap_multiplier);
struct gc_options *options = gc_allocate_options();
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
if (argc == 5) {
if (!gc_options_parse_and_set_many(options, argv[4])) {
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[4]);
return 1;
}
}
struct gc_heap *heap;
struct gc_mutator *mut;
struct gc_basic_stats stats;
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
(size_t)heap_size);
return 1;
}
struct thread main_thread = { mut, };
gc_mutator_set_roots(mut, &main_thread.roots);
pthread_t threads[MAX_THREAD_COUNT];
// Run one of the threads in the main thread.
for (size_t i = 1; i < nthreads; i++) {
int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap);
if (status) {
errno = status;
perror("Failed to create thread");
return 1;
}
}
ssize_t outstanding = (size_t)run_one_test(&main_thread);
for (size_t i = 1; i < nthreads; i++) {
struct join_data data = { 0, threads[i] };
void *ret = gc_call_without_gc(mut, join_thread, &data);
if (data.status) {
errno = data.status;
perror("Failed to join thread");
return 1;
}
ssize_t thread_outstanding = (ssize_t)ret;
outstanding += thread_outstanding;
}
if (outstanding)
printf("\n\nWARNING: %zd nodes outstanding!!!\n\n", outstanding);
gc_basic_stats_finish(&stats);
fputs("\n", stdout);
gc_basic_stats_print(&stats, stdout);
return 0;
}

View file

@ -0,0 +1,19 @@
#ifndef HEAP_OBJECTS_H
#define HEAP_OBJECTS_H
#include "gc-inline.h"
#include "gc-edge.h"
#define DECLARE_NODE_TYPE(name, Name, NAME) \
struct Name; \
typedef struct Name Name;
FOR_EACH_HEAP_OBJECT_KIND(DECLARE_NODE_TYPE)
#undef DECLARE_NODE_TYPE
#define DEFINE_ENUM(name, Name, NAME) ALLOC_KIND_##NAME,
enum alloc_kind {
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_ENUM)
};
#undef DEFINE_ENUM
#endif // HEAP_OBJECTS_H

View file

@ -0,0 +1,54 @@
#ifndef MT_GCBENCH_EMBEDDER_H
#define MT_GCBENCH_EMBEDDER_H
#include "gc-config.h"
#include "mt-gcbench-types.h"
struct gc_heap;
#define DEFINE_METHODS(name, Name, NAME) \
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
static inline void visit_##name##_fields(Name *obj,\
void (*visit)(struct gc_edge edge, \
struct gc_heap *heap, \
void *visit_data), \
struct gc_heap *heap, \
void *visit_data) GC_ALWAYS_INLINE;
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
#undef DEFINE_METHODS
static inline size_t node_size(Node *obj) {
return sizeof(Node);
}
static inline size_t double_array_size(DoubleArray *array) {
return sizeof(*array) + array->length * sizeof(double);
}
static inline size_t hole_size(Hole *hole) {
return sizeof(*hole) + hole->length * sizeof(uintptr_t);
}
static inline void
visit_node_fields(Node *node,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap, void *visit_data) {
visit(gc_edge(&node->left), heap, visit_data);
visit(gc_edge(&node->right), heap, visit_data);
}
static inline void
visit_double_array_fields(DoubleArray *obj,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap, void *visit_data),
struct gc_heap *heap, void *visit_data) {
}
static inline void
visit_hole_fields(Hole *obj,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap, void *visit_data),
struct gc_heap *heap, void *visit_data) {
if (GC_PRECISE_ROOTS)
GC_CRASH();
}
#include "simple-gc-embedder.h"
#endif // MT_GCBENCH_EMBEDDER_H

View file

@ -0,0 +1,34 @@
#ifndef GCBENCH_TYPES_H
#define GCBENCH_TYPES_H
#include <stddef.h>
#include <stdint.h>
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
M(node, Node, NODE) \
M(double_array, DoubleArray, DOUBLE_ARRAY) \
M(hole, Hole, HOLE)
#include "heap-objects.h"
#include "simple-tagging-scheme.h"
struct Node {
struct gc_header header;
struct Node *left;
struct Node *right;
int i, j;
};
struct DoubleArray {
struct gc_header header;
size_t length;
double values[0];
};
struct Hole {
struct gc_header header;
size_t length;
uintptr_t values[0];
};
#endif // GCBENCH_TYPES_H

View file

@ -0,0 +1,402 @@
// This is adapted from a benchmark written by John Ellis and Pete Kovac
// of Post Communications.
// It was modified by Hans Boehm of Silicon Graphics.
// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
//
// This is no substitute for real applications. No actual application
// is likely to behave in exactly this way. However, this benchmark was
// designed to be more representative of real applications than other
// Java GC benchmarks of which we are aware.
// It attempts to model those properties of allocation requests that
// are important to current GC techniques.
// It is designed to be used either to obtain a single overall performance
// number, or to give a more detailed estimate of how collector
// performance varies with object lifetimes. It prints the time
// required to allocate and collect balanced binary trees of various
// sizes. Smaller trees result in shorter object lifetimes. Each cycle
// allocates roughly the same amount of memory.
// Two data structures are kept around during the entire process, so
// that the measured performance is representative of applications
// that maintain some live in-memory data. One of these is a tree
// containing many pointers. The other is a large array containing
// double precision floating point numbers. Both should be of comparable
// size.
//
// The results are only really meaningful together with a specification
// of how much memory was used. It is possible to trade memory for
// better time performance. This benchmark should be run in a 32 MB
// heap, though we don't currently know how to enforce that uniformly.
//
// Unlike the original Ellis and Kovac benchmark, we do not attempt
// measure pause times. This facility should eventually be added back
// in. There are several reasons for omitting it for now. The original
// implementation depended on assumptions about the thread scheduler
// that don't hold uniformly. The results really measure both the
// scheduler and GC. Pause time measurements tend to not fit well with
// current benchmark suites. As far as we know, none of the current
// commercial Java implementations seriously attempt to minimize GC pause
// times.
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include "assert.h"
#include "gc-api.h"
#include "gc-basic-stats.h"
#include "mt-gcbench-types.h"
#include "simple-roots-api.h"
#include "simple-allocator.h"
#define MAX_THREAD_COUNT 256
static const int long_lived_tree_depth = 16; // about 4Mb
static const int array_size = 500000; // about 4Mb
static const int min_tree_depth = 4;
static const int max_tree_depth = 16;
typedef HANDLE_TO(Node) NodeHandle;
typedef HANDLE_TO(DoubleArray) DoubleArrayHandle;
static Node* allocate_node(struct gc_mutator *mut) {
// memset to 0 by the collector.
return gc_allocate_with_kind(mut, ALLOC_KIND_NODE, sizeof (Node));
}
static DoubleArray* allocate_double_array(struct gc_mutator *mut,
size_t size) {
// May be uninitialized.
size_t bytes = sizeof(DoubleArray) + sizeof (double) * size;
DoubleArray *ret =
gc_allocate_pointerless_with_kind(mut, ALLOC_KIND_DOUBLE_ARRAY, bytes);
ret->length = size;
return ret;
}
static Hole* allocate_hole(struct gc_mutator *mut, size_t size) {
size_t bytes = sizeof(Hole) + sizeof (uintptr_t) * size;
Hole *ret = gc_allocate_with_kind(mut, ALLOC_KIND_HOLE, bytes);
ret->length = size;
return ret;
}
static unsigned long current_time(void) {
struct timeval t = { 0 };
gettimeofday(&t, NULL);
return t.tv_sec * 1000 * 1000 + t.tv_usec;
}
static double elapsed_millis(unsigned long start) {
return (current_time() - start) * 1e-3;
}
// Nodes used by a tree of a given size
static int tree_size(int i) {
return ((1 << (i + 1)) - 1);
}
// Number of iterations to use for a given tree depth
static int compute_num_iters(int i) {
return 2 * tree_size(max_tree_depth + 2) / tree_size(i);
}
// A power-law distribution. Each integer was selected by starting at 0, taking
// a random number in [0,1), and then accepting the integer if the random number
// was less than 0.15, or trying again with the next integer otherwise. Useful
// for modelling allocation sizes or number of garbage objects to allocate
// between live allocations.
static const uint8_t power_law_distribution[256] = {
1, 15, 3, 12, 2, 8, 4, 0, 18, 7, 9, 8, 15, 2, 36, 5,
1, 9, 6, 11, 9, 19, 2, 0, 0, 3, 9, 6, 3, 2, 1, 1,
6, 1, 8, 4, 2, 0, 5, 3, 7, 0, 0, 3, 0, 4, 1, 7,
1, 8, 2, 2, 2, 14, 0, 7, 8, 0, 2, 1, 4, 12, 7, 5,
0, 3, 4, 13, 10, 2, 3, 7, 0, 8, 0, 23, 0, 16, 1, 1,
6, 28, 1, 18, 0, 3, 6, 5, 8, 6, 14, 5, 2, 5, 0, 11,
0, 18, 4, 16, 1, 4, 3, 13, 3, 23, 7, 4, 10, 5, 3, 13,
0, 14, 5, 5, 2, 5, 0, 16, 2, 0, 1, 1, 0, 0, 4, 2,
7, 7, 0, 5, 7, 2, 1, 24, 27, 3, 7, 1, 0, 8, 1, 4,
0, 3, 0, 7, 7, 3, 9, 2, 9, 2, 5, 10, 1, 1, 12, 6,
2, 9, 5, 0, 4, 6, 0, 7, 2, 1, 5, 4, 1, 0, 1, 15,
4, 0, 15, 4, 0, 0, 32, 18, 2, 2, 1, 7, 8, 3, 11, 1,
2, 7, 11, 1, 9, 1, 2, 6, 11, 17, 1, 2, 5, 1, 14, 3,
6, 1, 1, 15, 3, 1, 0, 6, 10, 8, 1, 3, 2, 7, 0, 1,
0, 11, 3, 3, 5, 8, 2, 0, 0, 7, 12, 2, 5, 20, 3, 7,
4, 4, 5, 22, 1, 5, 2, 7, 15, 2, 4, 6, 11, 8, 12, 1
};
static size_t power_law(size_t *counter) {
return power_law_distribution[(*counter)++ & 0xff];
}
struct thread {
struct gc_mutator *mut;
struct gc_mutator_roots roots;
size_t counter;
};
static void allocate_garbage(struct thread *t) {
size_t hole = power_law(&t->counter);
if (hole) {
allocate_hole(t->mut, hole);
}
}
static inline void set_field(struct gc_mutator *mut, Node *obj,
Node **field, Node *val) {
gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Node),
gc_edge(field),
gc_ref_from_heap_object(val));
*field = val;
}
// Build tree top down, assigning to older objects.
static void populate(struct thread *t, int depth, Node *node) {
struct gc_mutator *mut = t->mut;
if (depth <= 0)
return;
NodeHandle self = { node };
PUSH_HANDLE(t, self);
allocate_garbage(t);
NodeHandle l = { allocate_node(mut) };
PUSH_HANDLE(t, l);
allocate_garbage(t);
NodeHandle r = { allocate_node(mut) };
PUSH_HANDLE(t, r);
set_field(mut, HANDLE_REF(self), &HANDLE_REF(self)->left, HANDLE_REF(l));
set_field(mut, HANDLE_REF(self), &HANDLE_REF(self)->right, HANDLE_REF(r));
// i is 0 because the memory is zeroed.
HANDLE_REF(self)->j = depth;
populate(t, depth-1, HANDLE_REF(self)->left);
populate(t, depth-1, HANDLE_REF(self)->right);
POP_HANDLE(t);
POP_HANDLE(t);
POP_HANDLE(t);
}
// Build tree bottom-up
static Node* make_tree(struct thread *t, int depth) {
struct gc_mutator *mut = t->mut;
if (depth <= 0)
return allocate_node(mut);
NodeHandle left = { make_tree(t, depth-1) };
PUSH_HANDLE(t, left);
NodeHandle right = { make_tree(t, depth-1) };
PUSH_HANDLE(t, right);
allocate_garbage(t);
Node *result = allocate_node(mut);
result->left = HANDLE_REF(left);
result->right = HANDLE_REF(right);
// i is 0 because the memory is zeroed.
result->j = depth;
POP_HANDLE(t);
POP_HANDLE(t);
return result;
}
static void validate_tree(Node *tree, int depth) {
#ifndef NDEBUG
GC_ASSERT_EQ(tree->i, 0);
GC_ASSERT_EQ(tree->j, depth);
if (depth == 0) {
GC_ASSERT(!tree->left);
GC_ASSERT(!tree->right);
} else {
GC_ASSERT(tree->left);
GC_ASSERT(tree->right);
validate_tree(tree->left, depth - 1);
validate_tree(tree->right, depth - 1);
}
#endif
}
static void time_construction(struct thread *t, int depth) {
struct gc_mutator *mut = t->mut;
int num_iters = compute_num_iters(depth);
NodeHandle temp_tree = { NULL };
PUSH_HANDLE(t, temp_tree);
printf("Creating %d trees of depth %d\n", num_iters, depth);
{
unsigned long start = current_time();
for (int i = 0; i < num_iters; ++i) {
HANDLE_SET(temp_tree, allocate_node(mut));
populate(t, depth, HANDLE_REF(temp_tree));
validate_tree(HANDLE_REF(temp_tree), depth);
HANDLE_SET(temp_tree, NULL);
}
printf("\tTop down construction took %.3f msec\n",
elapsed_millis(start));
}
{
long start = current_time();
for (int i = 0; i < num_iters; ++i) {
HANDLE_SET(temp_tree, make_tree(t, depth));
validate_tree(HANDLE_REF(temp_tree), depth);
HANDLE_SET(temp_tree, NULL);
}
printf("\tBottom up construction took %.3f msec\n",
elapsed_millis(start));
}
POP_HANDLE(t);
}
struct call_with_gc_data {
void* (*f)(struct thread *);
struct gc_heap *heap;
};
static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) {
struct call_with_gc_data *data = arg;
struct gc_mutator *mut = gc_init_for_thread(addr, data->heap);
struct thread t = { mut, };
gc_mutator_set_roots(mut, &t.roots);
void *ret = data->f(&t);
gc_finish_for_thread(mut);
return ret;
}
static void* call_with_gc(void* (*f)(struct thread *),
struct gc_heap *heap) {
struct call_with_gc_data data = { f, heap };
return gc_call_with_stack_addr(call_with_gc_inner, &data);
}
static void* run_one_test(struct thread *t) {
NodeHandle long_lived_tree = { NULL };
NodeHandle temp_tree = { NULL };
DoubleArrayHandle array = { NULL };
PUSH_HANDLE(t, long_lived_tree);
PUSH_HANDLE(t, temp_tree);
PUSH_HANDLE(t, array);
// Create a long lived object
printf(" Creating a long-lived binary tree of depth %d\n",
long_lived_tree_depth);
HANDLE_SET(long_lived_tree, allocate_node(t->mut));
populate(t, long_lived_tree_depth, HANDLE_REF(long_lived_tree));
// Create long-lived array, filling half of it
printf(" Creating a long-lived array of %d doubles\n", array_size);
HANDLE_SET(array, allocate_double_array(t->mut, array_size));
for (int i = 0; i < array_size/2; ++i) {
HANDLE_REF(array)->values[i] = 1.0/i;
}
for (int d = min_tree_depth; d <= max_tree_depth; d += 2) {
time_construction(t, d);
}
validate_tree(HANDLE_REF(long_lived_tree), long_lived_tree_depth);
// Fake reference to LongLivedTree and array to keep them from being optimized
// away.
if (HANDLE_REF(long_lived_tree)->i != 0
|| HANDLE_REF(array)->values[1000] != 1.0/1000)
fprintf(stderr, "Failed\n");
POP_HANDLE(t);
POP_HANDLE(t);
POP_HANDLE(t);
return NULL;
}
static void* run_one_test_in_thread(void *arg) {
struct gc_heap *heap = arg;
return call_with_gc(run_one_test, heap);
}
struct join_data { int status; pthread_t thread; };
static void *join_thread(void *data) {
struct join_data *join_data = data;
void *ret;
join_data->status = pthread_join(join_data->thread, &ret);
return ret;
}
int main(int argc, char *argv[]) {
size_t heap_max_live =
tree_size(long_lived_tree_depth) * sizeof(Node) +
tree_size(max_tree_depth) * sizeof(Node) +
sizeof(DoubleArray) + sizeof(double) * array_size;
if (argc < 3 || argc > 4) {
fprintf(stderr, "usage: %s MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]);
return 1;
}
double multiplier = atof(argv[1]);
size_t nthreads = atol(argv[2]);
if (!(0.1 < multiplier && multiplier < 100)) {
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[1]);
return 1;
}
if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n",
(int)MAX_THREAD_COUNT, argv[2]);
return 1;
}
size_t heap_size = heap_max_live * multiplier * nthreads;
struct gc_options *options = gc_allocate_options();
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
if (argc == 4) {
if (!gc_options_parse_and_set_many(options, argv[3])) {
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[3]);
return 1;
}
}
struct gc_heap *heap;
struct gc_mutator *mut;
struct gc_basic_stats stats;
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
heap_size);
return 1;
}
struct thread main_thread = { mut, };
gc_mutator_set_roots(mut, &main_thread.roots);
printf("Garbage Collector Test\n");
printf(" Live storage will peak at %zd bytes.\n\n", heap_max_live);
pthread_t threads[MAX_THREAD_COUNT];
// Run one of the threads in the main thread.
for (size_t i = 1; i < nthreads; i++) {
int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap);
if (status) {
errno = status;
perror("Failed to create thread");
return 1;
}
}
run_one_test(&main_thread);
for (size_t i = 1; i < nthreads; i++) {
struct join_data data = { 0, threads[i] };
gc_call_without_gc(mut, join_thread, &data);
if (data.status) {
errno = data.status;
perror("Failed to join thread");
return 1;
}
}
gc_basic_stats_finish(&stats);
fputs("\n", stdout);
gc_basic_stats_print(&stats, stdout);
}

View file

@ -0,0 +1,37 @@
#ifndef QUADS_EMBEDDER_H
#define QUADS_EMBEDDER_H
#include <stddef.h>
#include "quads-types.h"
struct gc_heap;
#define DEFINE_METHODS(name, Name, NAME) \
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
static inline void visit_##name##_fields(Name *obj,\
void (*visit)(struct gc_edge edge, \
struct gc_heap *heap, \
void *visit_data), \
struct gc_heap *heap, \
void *visit_data) GC_ALWAYS_INLINE;
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
#undef DEFINE_METHODS
static inline size_t quad_size(Quad *obj) {
return sizeof(Quad);
}
static inline void
visit_quad_fields(Quad *quad,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {
for (size_t i = 0; i < 4; i++)
visit(gc_edge(&quad->kids[i]), heap, visit_data);
}
#include "simple-gc-embedder.h"
#endif // QUADS_EMBEDDER_H

View file

@ -0,0 +1,15 @@
#ifndef QUADS_TYPES_H
#define QUADS_TYPES_H
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
M(quad, Quad, QUAD)
#include "heap-objects.h"
#include "simple-tagging-scheme.h"
struct Quad {
struct gc_header header;
struct Quad *kids[4];
};
#endif // QUADS_TYPES_H

View file

@ -0,0 +1,181 @@
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#include <sys/time.h>
#include "assert.h"
#include "gc-api.h"
#include "gc-basic-stats.h"
#include "simple-roots-api.h"
#include "quads-types.h"
#include "simple-allocator.h"
typedef HANDLE_TO(Quad) QuadHandle;
static Quad* allocate_quad(struct gc_mutator *mut) {
// memset to 0 by the collector.
return gc_allocate_with_kind(mut, ALLOC_KIND_QUAD, sizeof (Quad));
}
/* Get the current time in microseconds */
static unsigned long current_time(void)
{
struct timeval t;
if (gettimeofday(&t, NULL) == -1)
return 0;
return t.tv_sec * 1000 * 1000 + t.tv_usec;
}
struct thread {
struct gc_mutator *mut;
struct gc_mutator_roots roots;
size_t counter;
};
// Build tree bottom-up
static Quad* make_tree(struct thread *t, int depth) {
if (depth<=0) {
return allocate_quad(t->mut);
} else {
QuadHandle kids[4] = { { NULL }, };
for (size_t i = 0; i < 4; i++) {
HANDLE_SET(kids[i], make_tree(t, depth-1));
PUSH_HANDLE(t, kids[i]);
}
Quad *result = allocate_quad(t->mut);
for (size_t i = 0; i < 4; i++)
result->kids[i] = HANDLE_REF(kids[i]);
for (size_t i = 0; i < 4; i++)
POP_HANDLE(t);
return result;
}
}
static void validate_tree(Quad *tree, int depth) {
for (size_t i = 0; i < 4; i++) {
if (depth == 0) {
if (tree->kids[i])
abort();
} else {
if (!tree->kids[i])
abort();
validate_tree(tree->kids[i], depth - 1);
}
}
}
static void print_elapsed(const char *what, unsigned long start) {
unsigned long end = current_time();
unsigned long msec = (end - start) / 1000;
unsigned long usec = (end - start) % 1000;
printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec);
}
static size_t parse_size(char *arg, const char *what) {
long val = atol(arg);
if (val <= 0) {
fprintf(stderr, "Failed to parse %s '%s'\n", what, arg);
exit(1);
}
return val;
}
static size_t tree_size(size_t depth) {
size_t nquads = 0;
size_t leaf_count = 1;
for (size_t i = 0; i <= depth; i++) {
if (nquads > ((size_t)-1) - leaf_count) {
fprintf(stderr,
"error: address space too small for quad tree of depth %zu\n",
depth);
exit(1);
}
nquads += leaf_count;
leaf_count *= 4;
}
return nquads;
}
#define MAX_THREAD_COUNT 256
int main(int argc, char *argv[]) {
if (argc < 3 || 4 < argc) {
fprintf(stderr, "usage: %s DEPTH MULTIPLIER [GC-OPTIONS]\n", argv[0]);
return 1;
}
size_t depth = parse_size(argv[1], "depth");
double multiplier = atof(argv[2]);
if (!(1.0 < multiplier && multiplier < 100)) {
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
return 1;
}
size_t nquads = tree_size(depth);
size_t tree_bytes = nquads * sizeof(Quad);
size_t heap_size = tree_bytes * multiplier;
printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n",
heap_size / 1e9, multiplier);
struct gc_options *options = gc_allocate_options();
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
if (argc == 4) {
if (!gc_options_parse_and_set_many(options, argv[3])) {
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[3]);
return 1;
}
}
struct gc_heap *heap;
struct gc_mutator *mut;
struct gc_basic_stats stats;
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
heap_size);
return 1;
}
struct thread t = { mut, };
gc_mutator_set_roots(mut, &t.roots);
QuadHandle quad = { NULL };
PUSH_HANDLE(&t, quad);
printf("Making quad tree of depth %zu (%zu nodes). Total size %.3fGB.\n",
depth, nquads, (nquads * sizeof(Quad)) / 1e9);
unsigned long start = current_time();
HANDLE_SET(quad, make_tree(&t, depth));
print_elapsed("construction", start);
validate_tree(HANDLE_REF(quad), depth);
size_t garbage_step = heap_size / 7.5;
printf("Allocating %.3f GB of garbage, 20 times, validating live tree each time.\n",
garbage_step / 1e9);
unsigned long garbage_start = current_time();
for (size_t i = 0; i < 20; i++) {
size_t garbage_depth = 3;
start = current_time();
for (size_t i = garbage_step/(tree_size(garbage_depth)*4*sizeof(Quad*)); i; i--)
make_tree(&t, garbage_depth);
print_elapsed("allocating garbage", start);
start = current_time();
validate_tree(HANDLE_REF(quad), depth);
}
print_elapsed("allocation loop", garbage_start);
gc_basic_stats_finish(&stats);
fputs("\n", stdout);
gc_basic_stats_print(&stats, stdout);
POP_HANDLE(&t);
return 0;
}

View file

@ -0,0 +1,21 @@
#ifndef SIMPLE_ALLOCATOR_H
#define SIMPLE_ALLOCATOR_H
#include "simple-tagging-scheme.h"
#include "gc-api.h"
static inline void*
gc_allocate_with_kind(struct gc_mutator *mut, enum alloc_kind kind, size_t bytes) {
void *obj = gc_allocate(mut, bytes, GC_ALLOCATION_TAGGED);
*tag_word(gc_ref_from_heap_object(obj)) = tag_live(kind);
return obj;
}
static inline void*
gc_allocate_pointerless_with_kind(struct gc_mutator *mut, enum alloc_kind kind, size_t bytes) {
void *obj = gc_allocate(mut, bytes, GC_ALLOCATION_TAGGED_POINTERLESS);
*tag_word(gc_ref_from_heap_object(obj)) = tag_live(kind);
return obj;
}
#endif // SIMPLE_ALLOCATOR_H

View file

@ -0,0 +1,183 @@
#include <stdatomic.h>
#include "simple-tagging-scheme.h"
#include "simple-roots-types.h"
#include "gc-config.h"
#include "gc-embedder-api.h"
#define GC_EMBEDDER_EPHEMERON_HEADER struct gc_header header;
#define GC_EMBEDDER_FINALIZER_HEADER struct gc_header header;
static inline size_t gc_finalizer_priority_count(void) { return 2; }
static inline int
gc_is_valid_conservative_ref_displacement(uintptr_t displacement) {
#if GC_CONSERVATIVE_ROOTS || GC_CONSERVATIVE_TRACE
// Here is where you would allow tagged heap object references.
return displacement == 0;
#else
// Shouldn't get here.
GC_CRASH();
#endif
}
// No external objects in simple benchmarks.
static inline int gc_extern_space_visit(struct gc_extern_space *space,
struct gc_edge edge,
struct gc_ref ref) {
GC_CRASH();
}
static inline void gc_extern_space_start_gc(struct gc_extern_space *space,
int is_minor_gc) {
}
static inline void gc_extern_space_finish_gc(struct gc_extern_space *space,
int is_minor_gc) {
}
static inline void gc_trace_object(struct gc_ref ref,
void (*trace_edge)(struct gc_edge edge,
struct gc_heap *heap,
void *trace_data),
struct gc_heap *heap,
void *trace_data,
size_t *size) {
#if GC_CONSERVATIVE_TRACE
// Shouldn't get here.
GC_CRASH();
#else
switch (tag_live_alloc_kind(*tag_word(ref))) {
#define SCAN_OBJECT(name, Name, NAME) \
case ALLOC_KIND_##NAME: \
if (trace_edge) \
visit_##name##_fields(gc_ref_heap_object(ref), trace_edge, \
heap, trace_data); \
if (size) \
*size = name##_size(gc_ref_heap_object(ref)); \
break;
FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT)
#undef SCAN_OBJECT
default:
GC_CRASH();
}
#endif
}
static inline void visit_roots(struct handle *roots,
void (*trace_edge)(struct gc_edge edge,
struct gc_heap *heap,
void *trace_data),
struct gc_heap *heap,
void *trace_data) {
for (struct handle *h = roots; h; h = h->next)
trace_edge(gc_edge(&h->v), heap, trace_data);
}
static inline void gc_trace_mutator_roots(struct gc_mutator_roots *roots,
void (*trace_edge)(struct gc_edge edge,
struct gc_heap *heap,
void *trace_data),
struct gc_heap *heap,
void *trace_data) {
if (roots)
visit_roots(roots->roots, trace_edge, heap, trace_data);
}
static inline void gc_trace_heap_roots(struct gc_heap_roots *roots,
void (*trace_edge)(struct gc_edge edge,
struct gc_heap *heap,
void *trace_data),
struct gc_heap *heap,
void *trace_data) {
if (roots)
visit_roots(roots->roots, trace_edge, heap, trace_data);
}
static inline uintptr_t gc_object_forwarded_nonatomic(struct gc_ref ref) {
uintptr_t tag = *tag_word(ref);
return (tag & gcobj_not_forwarded_bit) ? 0 : tag;
}
static inline void gc_object_forward_nonatomic(struct gc_ref ref,
struct gc_ref new_ref) {
*tag_word(ref) = gc_ref_value(new_ref);
}
static inline struct gc_atomic_forward
gc_atomic_forward_begin(struct gc_ref ref) {
uintptr_t tag = atomic_load_explicit(tag_word(ref), memory_order_acquire);
enum gc_forwarding_state state;
if (tag == gcobj_busy)
state = GC_FORWARDING_STATE_BUSY;
else if (tag & gcobj_not_forwarded_bit)
state = GC_FORWARDING_STATE_NOT_FORWARDED;
else
state = GC_FORWARDING_STATE_FORWARDED;
return (struct gc_atomic_forward){ ref, tag, state };
}
static inline int
gc_atomic_forward_retry_busy(struct gc_atomic_forward *fwd) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_BUSY);
uintptr_t tag = atomic_load_explicit(tag_word(fwd->ref),
memory_order_acquire);
if (tag == gcobj_busy)
return 0;
if (tag & gcobj_not_forwarded_bit) {
fwd->state = GC_FORWARDING_STATE_NOT_FORWARDED;
fwd->data = tag;
} else {
fwd->state = GC_FORWARDING_STATE_FORWARDED;
fwd->data = tag;
}
return 1;
}
static inline void
gc_atomic_forward_acquire(struct gc_atomic_forward *fwd) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_NOT_FORWARDED);
if (atomic_compare_exchange_strong(tag_word(fwd->ref), &fwd->data,
gcobj_busy))
fwd->state = GC_FORWARDING_STATE_ACQUIRED;
else if (fwd->data == gcobj_busy)
fwd->state = GC_FORWARDING_STATE_BUSY;
else {
GC_ASSERT((fwd->data & gcobj_not_forwarded_bit) == 0);
fwd->state = GC_FORWARDING_STATE_FORWARDED;
}
}
static inline void
gc_atomic_forward_abort(struct gc_atomic_forward *fwd) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
atomic_store_explicit(tag_word(fwd->ref), fwd->data, memory_order_release);
fwd->state = GC_FORWARDING_STATE_NOT_FORWARDED;
}
static inline size_t
gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
switch (tag_live_alloc_kind(fwd->data)) {
#define OBJECT_SIZE(name, Name, NAME) \
case ALLOC_KIND_##NAME: \
return name##_size(gc_ref_heap_object(fwd->ref));
FOR_EACH_HEAP_OBJECT_KIND(OBJECT_SIZE)
#undef OBJECT_SIZE
default:
GC_CRASH();
}
}
static inline void
gc_atomic_forward_commit(struct gc_atomic_forward *fwd, struct gc_ref new_ref) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
*tag_word(new_ref) = fwd->data;
atomic_store_explicit(tag_word(fwd->ref), gc_ref_value(new_ref),
memory_order_release);
fwd->state = GC_FORWARDING_STATE_FORWARDED;
}
static inline uintptr_t
gc_atomic_forward_address(struct gc_atomic_forward *fwd) {
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_FORWARDED);
return fwd->data;
}

View file

@ -0,0 +1,26 @@
#ifndef SIMPLE_ROOTS_API_H
#define SIMPLE_ROOTS_API_H
#include "gc-config.h"
#include "simple-roots-types.h"
#define HANDLE_TO(T) union { T* v; struct handle handle; }
#define HANDLE_LOC(h) &(h).v
#define HANDLE_REF(h) (h).v
#define HANDLE_SET(h,val) do { (h).v = val; } while (0)
#define PUSH_HANDLE(cx, h) push_handle(&(cx)->roots.roots, &h.handle)
#define POP_HANDLE(cx) pop_handle(&(cx)->roots.roots)
static inline void push_handle(struct handle **roots, struct handle *handle) {
if (GC_PRECISE_ROOTS) {
handle->next = *roots;
*roots = handle;
}
}
static inline void pop_handle(struct handle **roots) {
if (GC_PRECISE_ROOTS)
*roots = (*roots)->next;
}
#endif // SIMPLE_ROOTS_API_H

View file

@ -0,0 +1,17 @@
#ifndef SIMPLE_ROOTS_TYPES_H
#define SIMPLE_ROOTS_TYPES_H
struct handle {
void *v;
struct handle *next;
};
struct gc_heap_roots {
struct handle *roots;
};
struct gc_mutator_roots {
struct handle *roots;
};
#endif // SIMPLE_ROOTS_TYPES_H

View file

@ -0,0 +1,29 @@
#ifndef SIMPLE_TAGGING_SCHEME_H
#define SIMPLE_TAGGING_SCHEME_H
#include <stdint.h>
struct gc_header {
uintptr_t tag;
};
// Alloc kind is in bits 1-7, for live objects.
static const uintptr_t gcobj_alloc_kind_mask = 0x7f;
static const uintptr_t gcobj_alloc_kind_shift = 1;
static const uintptr_t gcobj_forwarded_mask = 0x1;
static const uintptr_t gcobj_not_forwarded_bit = 0x1;
static const uintptr_t gcobj_busy = 0;
static inline uint8_t tag_live_alloc_kind(uintptr_t tag) {
return (tag >> gcobj_alloc_kind_shift) & gcobj_alloc_kind_mask;
}
static inline uintptr_t tag_live(uint8_t alloc_kind) {
return ((uintptr_t)alloc_kind << gcobj_alloc_kind_shift)
| gcobj_not_forwarded_bit;
}
static inline uintptr_t* tag_word(struct gc_ref ref) {
struct gc_header *header = gc_ref_heap_object(ref);
return &header->tag;
}
#endif // SIMPLE_TAGGING_SCHEME_H

160
libguile/whippet/ctf_to_json.py Executable file
View file

@ -0,0 +1,160 @@
#!/usr/bin/env python3
# Any copyright is dedicated to the Public Domain.
# https://creativecommons.org/publicdomain/zero/1.0/
#
# Originally written by Andy Wingo <wingo@igalia.com>.
import bt2 # From the babeltrace2 package.
import sys
import json
from enum import Enum
# Usage: ./ctf_to_json.py ~/lttng-traces/name-of-your-trace > foo.json
#
# Convert a Common Trace Format (CTF) trace, for example as produced by
# LTTng, to the JSON-based Trace Event Format (TEF), for example as
# consumed by `chrome://tracing`, `https://ui.perfetto.dev/`, or
# `https://profiler.firefox.com`.
# The Trace Event Format is documented here:
#
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0
# By default, events are emitted as EventPhase.INSTANT. We also support
# rewriting the event stream so as to generate EventPhase.BEGIN /
# EventPhase.END events for specific named events.
synthetic_events = {
'gc': ['whippet:mutator_cause_gc',
'whippet:restarting_mutators'],
'stop-the-world': ['whippet:requesting_stop',
'whippet:mutators_stopped'],
'trace': ['whippet:prepare_gc',
'whippet:restarting_mutators'],
'mutator-stopped': ['whippet:mutator_stopping',
'whippet:mutator_restarted'],
'trace-roots': ['whippet:trace_roots_begin',
'whippet:trace_roots_end'],
'trace-check-termination': ['whippet:trace_check_termination_begin',
'whippet:trace_check_termination_end'],
'trace-objects': ['whippet:trace_objects_begin',
'whippet:trace_objects_end'],
'trace-worker': ['whippet:trace_worker_begin',
'whippet:trace_worker_end']
}
class EventPhase(Enum):
BEGIN = 'B'
END = 'E'
COMPLETE = 'X'
INSTANT = 'i'
COUNTER = 'C'
NESTABLE_START = 'b'
NESTABLE_INSTANT = 'n'
NESTABLE_END = 'e'
FLOW_START = 's'
FLOW_STEP = 't'
FLOW_END = 'f'
SAMPLE = 'P'
OBJECT_CREATED = 'N'
OBJECT_SNAPSHOT = 'O'
OBJECT_DESTROYED = 'D'
METADATA = 'M'
MEMORY_DUMP_GLOBAL = 'V'
MEMORY_DUMP_PROCESS = 'V'
MARK = 'R'
CLOCK_SYNC = 'c'
CONTEXT_BEGIN = '('
CONTEXT_END = ')'
base_time = None
def event_us(msg):
assert(msg.default_clock_snapshot.clock_class.name == 'monotonic')
assert(msg.default_clock_snapshot.clock_class.frequency == 1e9)
global base_time
ns = msg.default_clock_snapshot.value
if base_time is None:
base_time = ns
return (ns - base_time) * 1e-3
def lower(x):
if isinstance(x, str) or isinstance(x, int) or isinstance(x, float):
return x
if isinstance(x, dict) or isinstance(x, bt2._StructureFieldConst):
return {lower(k):lower(v) for k, v in x.items()}
if isinstance(x, bt2._BoolValueConst) or isinstance(x, bt2._BoolFieldConst):
return bool(x)
if isinstance(x, bt2._EnumerationFieldConst):
return repr(x)
if isinstance(x, bt2._IntegerValueConst) or isinstance(x, bt2._IntegerFieldConst):
return int(x)
if isinstance(x, bt2._RealValueConst) or isinstance(x, bt2._RealFieldConst):
return float(x)
if isinstance(x, bt2._StringValueConst) or isinstance(x, bt2._StringFieldConst):
return str(x)
raise ValueError("Unexpected value from trace", x)
# Specific Whippet events.
synthetic_begin = {}
synthetic_end = {}
for synthetic, [begin, end] in synthetic_events.items():
synthetic_begin[begin] = []
synthetic_end[end] = []
for synthetic, [begin, end] in synthetic_events.items():
synthetic_begin[begin].append(synthetic)
synthetic_end[end].append(synthetic)
def put(str):
sys.stdout.write(str)
need_comma = False
def print_event(ev):
global need_comma
if need_comma:
sys.stdout.write(',\n ')
else:
need_comma = True
# It appears to be faster to make a string, then print the string,
# than to call json.dump with a file object.
# json.dump(ev, sys.stdout, ensure_ascii=False, check_circular=False)
put(json.dumps(ev, ensure_ascii=False, check_circular=False))
def emit_event(msg, name, phase):
ev = {'name': name,
'cat': 'whippet',
'ph': phase.value,
'ts': event_us(msg),
'pid': lower(msg.event.common_context_field['vpid']),
'tid': lower(msg.event.common_context_field['vtid']),
'args': lower(msg.event.payload_field)}
print_event(ev)
def emit_begin_event(msg, name):
emit_event(msg, name, EventPhase.BEGIN)
def emit_end_event(msg, name):
emit_event(msg, name, EventPhase.END)
def emit_events(msg):
emit_event(msg, msg.event.name, EventPhase.INSTANT)
for begin in synthetic_begin.get(msg.event.name, []):
emit_begin_event(msg, begin)
for end in synthetic_end.get(msg.event.name, []):
emit_end_event(msg, end)
def ctf_to_json(path):
msg_it = bt2.TraceCollectionMessageIterator(path)
put('{\n')
put(' "traceEvents": [\n ')
for msg in msg_it:
if hasattr(msg, 'event'):
emit_events(msg)
put('\n')
put('\n ],\n')
put(' "displayTimeUnit": "ns"\n')
put('}\n')
if len(sys.argv) != 2:
sys.stderr.write(
'usage: ' + sys.argv[0] + ' ~/lttng-traces/name-of-your-trace\n')
sys.exit(1)
else:
ctf_to_json(sys.argv[1])

View file

@ -0,0 +1,13 @@
# Whippet documentation
* [Manual](./manual.md): How do you get your program to use
Whippet? What is the API?
* [Collector implementations](./collectors.md): There are a number of
implementations of the Whippet API with differing performance
characteristics and which impose different requirements on the
embedder.
* [Guile](./guile.md): Some notes on a potential rebase of Guile on
top of Whippet.

View file

@ -0,0 +1,26 @@
# Boehm-Demers-Weiser collector
Whippet's `bdw` collector is backed by a third-party garbage collector,
the [Boehm-Demers-Weiser collector](https://github.com/ivmai/bdwgc).
BDW-GC is a mark-sweep collector with conservative root-finding,
conservative heap tracing, and parallel tracing.
Whereas the other Whippet collectors which rely on mutators to
[periodically check if they need to
stop](https://github.com/wingo/whippet/blob/main/doc/manual.md#safepoints),
`bdw` will stop mutators with a POSIX signal. Also, it doesn't really
support ephemerons (the Whippet `bdw` collector simulates them using
finalizers), and both ephemerons and finalizers only approximate the
Whippet behavior, because they are implemented in terms of what BDW-GC
provides.
`bdw` supports the `fixed` and `growable` heap-sizing policies, but not
`adaptive`, as BDW-GC can't reliably return memory to the OS. Also,
[`growable` has an effective limit of a 3x heap
multiplier](https://github.com/wingo/whippet/blob/main/src/bdw.c#L478).
Oh well!
It's a bit of an oddball from a Whippet perspective, but useful as a
migration path if you have an embedder that is already using BDW-GC.
And, it is a useful performance comparison.

View file

@ -0,0 +1,148 @@
# Mostly-marking collector
The `mmc` collector is mainly a mark-region collector, inspired by
[Immix](http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf).
To a first approximation, `mmc` is a whole-heap Immix collector with a
large object space on the side.
When tracing, `mmc` mostly marks objects in place. If the heap is
too fragmented, it can compact the heap by choosing to evacuate
sparsely-populated heap blocks instead of marking in place. However
evacuation is strictly optional, which means that `mmc` is also
compatible with conservative root-finding, making it a good replacement
for embedders that currently use the [Boehm-Demers-Weiser
collector](./collector-bdw.md).
## Differences from Immix
The original Immix divides the heap into 32kB blocks, and then divides
those blocks into 128B lines. An Immix allocation can span lines but
not blocks; allocations larger than 8kB go into a separate large object
space. Mutators request blocks from the global store and allocate into
those blocks using bump-pointer allocation. When all blocks are
consumed, Immix stops the world and traces the object graph, marking
objects but also the lines that objects are on. After marking, blocks
contain some lines with live objects and others that are completely
free. Spans of free lines are called holes. When a mutator gets a
recycled block from the global block store, it allocates into those
holes. For an exposition of Immix, see the lovely detailed [Rust
implementation](http://users.cecs.anu.edu.au/~steveb/pubs/papers/rust-ismm-2016.pdf).
The essential difference of `mmc` from Immix stems from a simple
observation: Immix needs a side table of line mark bytes and also a mark
bit or bits in each object (or in a side table). But if instead you
choose to store mark bytes instead of bits (for concurrency reasons) in
a side table, with one mark byte per granule (unit of allocation,
perhaps 16 bytes), then you effectively have a line mark table where the
granule size is the line size. You can bump-pointer allocate into holes
in the mark byte table.
You might think this is a bad tradeoff, and perhaps it is: I don't know
yet. If your granule size is two pointers, then one mark byte per
granule is 6.25% overhead on 64-bit, or 12.5% on 32-bit. Especially on
32-bit, it's a lot! On the other hand, instead of the worst case of one
survivor object wasting a line (or two, in the case of conservative line
marking), granule-size-is-line-size instead wastes nothing. Also, you
don't need GC bits in the object itself, and you can use the mark byte
array to record the object end, so that finding holes in a block can
just read the mark table and can avoid looking at object memory.
## Optional features
The `mmc` collector has a few feature flags that can be turned on or
off. If you use the [standard embedder makefile include](../embed.mk),
then there is a name for each combination of features: `mmc` has no
additional features, `parallel-mmc` enables parallel marking,
`parallel-generational-mmc` enables generations,
`stack-conservative-parallel-generational-mmc` uses conservative
root-finding, and `heap-conservative-parallel-generational-mmc`
additionally traces the heap conservatively. You can leave off
components of the name to get a collector without those features.
Underneath this corresponds to some pre-processor definitions passed to
the compiler on the command line.
### Generations
`mmc` supports generational tracing via the [sticky mark-bit
algorithm](https://wingolog.org/archives/2022/10/22/the-sticky-mark-bit-algorithm).
This requires that the embedder emit [write
barriers](https://github.com/wingo/whippet/blob/main/doc/manual.md#write-barriers);
if your embedder cannot ensure write barriers are always invoked, then
generational collection is not for you. (We could perhaps relax this a
bit, following what [Ruby developers
did](http://rvm.jp/~ko1/activities/rgengc_ismm.pdf).)
The write barrier is currently a card-marking barrier emitted on stores,
with one card byte per 256 object bytes, where the card location can be
computed from the object address because blocks are allocated in
two-megabyte aligned slabs.
### Parallel tracing
You almost certainly want this on! `parallel-mmc` uses a the
[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h).
Each trace worker maintains a [local queue of objects that need
tracing](../src/local-worklist.h), which currently has a capacity of
1024 entries. If the local queue becomes full, the worker will publish
3/4 of those entries to the worker's [shared
worklist](../src/shared-worklist.h). When a worker runs out of local
work, it will first try to remove work from its own shared worklist,
then will try to steal from other workers.
The memory used for the external worklist is dynamically allocated from
the OS and is not currently counted as contributing to the heap size.
If you absolutely need to avoid dynamic allocation during GC, `mmc`
(even `serial-mmc`) would need some work for your use case, to allocate
a fixed-size space for a marking queue and to gracefully handle mark
queue overflow.
### Conservative stack scanning
With `semi` and `pcc`, embedders must precisely enumerate the set of
*roots*: the edges into the heap from outside. Commonly, roots include
global variables, as well as working variables from each mutator's
stack. `mmc` can optionally mark mutator stacks *conservatively*:
treating each word on the stack as if it may be an object reference, and
marking any object at that address.
After all these years, *whether* to mark stacks conservatively or not is
still an open research question. Conservative stack scanning can retain
too much data if an integer is confused for an object reference and
removes a layer of correctness-by-construction from a system. Sometimes
conservative stack-scanning is required, for example if your embedder
cannot enumerate roots precisely. But there are reasons to consider it
even if you can do precise roots: conservative scanning removes the need
for the compiler to produce a stack map to store the precise root
enumeration at every safepoint; it removes the need to look up a stack
map when tracing; and it allows C or C++ support code to avoid having to
place roots in traceable locations published to the garbage collector.
And the [performance question is still
open](https://dl.acm.org/doi/10.1145/2660193.2660198).
Anyway. `mmc` can scan roots conservatively. Those roots are pinned
for the collection; even if the collection will compact via evacuation,
referents of conservative roots won't be moved. Objects not directly
referenced by roots can be evacuated, however.
### Conservative heap scanning
In addition to stack and global references, the Boehm-Demers-Weiser
collector scans heap objects conservatively as well, treating each word
of each heap object as if it were a reference. `mmc` can do that, if
the embedder is unable to provide a `gc_trace_object` implementation.
However this is generally a performance lose, and it prevents
evacuation.
## Other implementation tidbits
`mmc` does lazy sweeping: as a mutator grabs a fresh block, it
reclaims memory that was unmarked in the previous collection before
making the memory available for allocation. This makes sweeping
naturally cache-friendly and parallel.
The mark byte array facilitates conservative collection by being an
oracle for "does this address start an object".
For a detailed introduction, see [Whippet: Towards a new local
maximum](https://wingolog.org/archives/2023/02/07/whippet-towards-a-new-local-maximum),
a talk given at FOSDEM 2023.

View file

@ -0,0 +1,84 @@
# Parallel copying collector
Whippet's `pcc` collector is a copying collector, like the more simple
[`semi`](./collector-semi.md), but supporting multiple mutator threads,
multiple tracing threads, and using an external FIFO worklist instead of
a Cheney worklist.
Like `semi`, `pcc` traces by evacuation: it moves all live objects on
every collection. (Exception: objects larger than 8192 bytes are
placed into a partitioned space which traces by marking in place instead
of copying.) Evacuation requires precise roots, so if your embedder
does not support precise roots, `pcc` is not for you.
Again like `semi`, `pcc` generally requires a heap size at least twice
as large as the maximum live heap size, and performs best with ample
heap sizes; between 3× and 5× is best.
Overall, `pcc` is a better version of `semi`. It should have broadly
the same performance characteristics with a single mutator and with
parallelism disabled, additionally allowing multiple mutators, and
scaling better with multiple tracing threads.
`pcc` has a generational configuration, conventionally referred to as
`generational-pcc`, in which both the nursery and the old generation are
copy spaces. Objects stay in the nursery for one cycle before moving on
to the old generation. This configuration is a bit new (January 2025)
and still needs some tuning.
## Implementation notes
Unlike `semi` which has a single global bump-pointer allocation region,
`pcc` structures the heap into 64-kB blocks. In this way it supports
multiple mutator threads: mutators do local bump-pointer allocation into
their own block, and when their block is full, they fetch another from
the global store.
The block size is 64 kB, but really it's 128 kB, because each block has
two halves: the active region and the copy reserve. Dividing each block
in two allows the collector to easily grow and shrink the heap while
ensuring there is always enough reserve space.
Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
a slab. The first block in a slab is used by the collector itself, to
keep metadata for the rest of the blocks, for example a chain pointer
allowing blocks to be collected in lists, a saved allocation pointer for
partially-filled blocks, whether the block is paged in or out, and so
on.
`pcc` supports tracing in parallel. This mechanism works somewhat like
allocation, in which multiple trace workers compete to evacuate objects
into their local allocation buffers; when an allocation buffer is full,
the trace worker grabs another, just like mutators do.
Unlike the simple semi-space collector which uses a Cheney grey
worklist, `pcc` uses an external worklist. If parallelism is disabled
at compile-time, it uses a simple first-in, first-out queue of objects
to be traced. Like a Cheney worklist, this should result in objects
being copied in breadth-first order. The literature would suggest that
depth-first is generally better for locality, but that preserving
allocation order is generally best. This is something to experiment
with in the future.
If parallelism is enabled, as it is by default, `pcc` uses a
[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h).
Each trace worker maintains a [local queue of objects that need
tracing](../src/local-worklist.h), which currently has 1024 entries. If
the local queue becomes full, the worker will publish 3/4 of those
entries to the worker's [shared worklist](../src/shared-worklist.h).
When a worker runs out of local work, it will first try to remove work
from its own shared worklist, then will try to steal from other workers.
If only one tracing thread is enabled at run-time (`parallelism=1`) (or
if parallelism is disabled at compile-time), `pcc` will evacuate by
non-atomic forwarding, but if multiple threads compete to evacuate
objects, `pcc` uses [atomic compare-and-swap instead of simple
forwarding pointer updates](./manual.md#forwarding-objects). This
imposes around a ~30% performance penalty but having multiple tracing
threads is generally worth it, unless the object graph is itself serial.
The memory used for the external worklist is dynamically allocated from
the OS and is not currently counted as contributing to the heap size.
If you are targetting a microcontroller or something, probably you need
to choose a different kind of collector that never dynamically
allocates, such as `semi`.

View file

@ -0,0 +1,23 @@
# Semi-space collector
The `semi` collector is simple. It is mostly useful as a first
collector to try out, to make sure that a mutator correctly records all
roots: because `semi` moves every live object on every collection, it is
very effective at shaking out mutator bugs.
If your embedder chooses to not precisely record roots, for example
instead choosing to conservatively scan the stack, then the semi-space
collector is not for you: `semi` requires precise roots.
For more on semi-space collectors, see
https://wingolog.org/archives/2022/12/10/a-simple-semi-space-collector.
Whippet's `semi` collector incorporates a large-object space, which
marks objects in place instead of moving. Otherwise, `semi` generally
requires a heap size at least twice as large as the maximum live heap
size, and performs best with ample heap sizes; between 3× and 5× is
best.
The semi-space collector doesn't support multiple mutator threads. If
you want a copying collector for a multi-threaded mutator, look at
[pcc](./collector-pcc.md).

View file

@ -0,0 +1,43 @@
# Whippet collectors
Whippet has four collectors currently:
- [Semi-space collector (`semi`)](./collector-semi.md): For
single-threaded embedders who are not too tight on memory.
- [Parallel copying collector (`pcc`)](./collector-pcc.md): Like
`semi`, but with support for multiple mutator and tracing threads and
generational collection.
- [Mostly marking collector (`mmc`)](./collector-mmc.md):
Immix-inspired collector. Optionally parallel, conservative (stack
and/or heap), and/or generational.
- [Boehm-Demers-Weiser collector (`bdw`)](./collector-bdw.md):
Conservative mark-sweep collector, implemented by
Boehm-Demers-Weiser library.
## How to choose?
If you are migrating an embedder off BDW-GC, then it could be reasonable
to first go to `bdw`, then `stack-conservative-parallel-mmc`.
If you have an embedder with precise roots, use `pcc`. That will shake
out mutator/embedder bugs. Then if memory is tight, switch to
`parallel-mmc`, possibly `parallel-generational-mmc`.
If you are aiming for maximum simplicity and minimal code size (ten
kilobytes or so), use `semi`.
If you are writing a new project, you have a choice as to whether to pay
the development cost of precise roots or not. If you choose to not have
precise roots, then go for `stack-conservative-parallel-mmc` directly.
## More collectors
It would be nice to have a generational GC that uses the space from
`parallel-mmc` for the old generation but a pcc-style copying nursery.
We have `generational-pcc` now, so this should be possible.
Support for concurrent marking in `mmc` would be good as well, perhaps
with a SATB barrier. (Or, if you are the sort of person to bet on
conservative stack scanning, perhaps a retreating-wavefront barrier
would be more appropriate.)
Contributions are welcome, provided they have no more dependencies!

View file

@ -0,0 +1,26 @@
# Whippet and Guile
If the `mmc` collector works out, it could replace Guile's garbage
collector. Guile currently uses BDW-GC. Guile has a widely used C API
and implements part of its run-time in C. For this reason it may be
infeasible to require precise enumeration of GC roots -- we may need to
allow GC roots to be conservatively identified from data sections and
from stacks. Such conservative roots would be pinned, but other objects
can be moved by the collector if it chooses to do so. We assume that
object references within a heap object can be precisely identified.
(However, Guile currently uses BDW-GC in its default configuration,
which scans for references conservatively even on the heap.)
The existing C API allows direct access to mutable object fields,
without the mediation of read or write barriers. Therefore it may be
impossible to switch to collector strategies that need barriers, such as
generational or concurrent collectors. However, we shouldn't write off
this possibility entirely; an ideal replacement for Guile's GC will
offer the possibility of migration to other GC designs without imposing
new requirements on C API users in the initial phase.
In this regard, the Whippet experiment also has the goal of identifying
a smallish GC abstraction in Guile, so that we might consider evolving
GC implementation in the future without too much pain. If we switch
away from BDW-GC, we should be able to evaluate that it's a win for a
large majority of use cases.

View file

@ -0,0 +1,718 @@
# Whippet user's guide
Whippet is an embed-only library: it should be copied into the source
tree of the program that uses it. The program's build system needs to
be wired up to compile Whippet, then link it into the program that uses
it.
## Subtree merges
One way is get Whippet is just to manually copy the files present in a
Whippet checkout into your project. However probably the best way is to
perform a [subtree
merge](https://docs.github.com/en/get-started/using-git/about-git-subtree-merges)
of Whippet into your project's Git repository, so that you can easily
update your copy of Whippet in the future.
Performing the first subtree merge is annoying and full of arcane
incantations. Follow the [subtree merge
page](https://docs.github.com/en/get-started/using-git/about-git-subtree-merges)
for full details, but for a cheat sheet, you might do something like
this to copy Whippet into the `whippet/` directory of your project root:
```
git remote add whippet https://github.com/wingo/whippet
git fetch whippet
git merge -s ours --no-commit --allow-unrelated-histories whippet/main
git read-tree --prefix=whippet/ -u whippet/main
git commit -m 'Added initial Whippet merge'
```
Then to later update your copy of whippet, assuming you still have the
`whippet` remote, just do:
```
git pull -s subtree whippet main
```
## `gc-embedder-api.h`
To determine the live set of objects, a tracing garbage collector starts
with a set of root objects, and then transitively visits all reachable
object edges. Exactly how it goes about doing this depends on the
program that is using the garbage collector; different programs will
have different object representations, different strategies for
recording roots, and so on.
To traverse the heap in a program-specific way but without imposing an
abstraction overhead, Whippet requires that a number of data types and
inline functions be implemented by the program, for use by Whippet
itself. This is the *embedder API*, and this document describes what
Whippet requires from a program.
A program should provide a header file implementing the API in
[`gc-embedder-api.h`](../api/gc-embedder-api.h). This header should only be
included when compiling Whippet itself; it is not part of the API that
Whippet exposes to the program.
### Identifying roots
The collector uses two opaque struct types, `struct gc_mutator_roots`
and `struct gc_heap_roots`, that are used by the program to record
object roots. Probably you should put the definition of these data
types in a separate header that is included both by Whippet, via the
embedder API, and via users of Whippet, so that programs can populate
the root set. In any case the embedder-API use of these structs is via
`gc_trace_mutator_roots` and `gc_trace_heap_roots`, two functions that
are passed a trace visitor function `trace_edge`, and which should call
that function on all edges from a given mutator or heap. (Usually
mutator roots are per-thread roots, such as from the stack, and heap
roots are global roots.)
### Tracing objects
The `gc_trace_object` is responsible for calling the `trace_edge`
visitor function on all outgoing edges in an object. It also includes a
`size` out-parameter, for when the collector wants to measure the size
of an object. `trace_edge` and `size` may be `NULL`, in which case no
tracing or size computation should be performed.
### Tracing ephemerons and finalizers
Most kinds of GC-managed object are defined by the program, but the GC
itself has support for two specific object kind: ephemerons and
finalizers. If the program allocates ephemerons, it should trace them
in the `gc_trace_object` function by calling `gc_trace_ephemeron` from
[`gc-ephemerons.h`](../api/gc-ephemerons.h). Likewise if the program
allocates finalizers, it should trace them by calling
`gc_trace_finalizer` from [`gc-finalizer.h`](../api/gc-finalizer.h).
### Forwarding objects
When built with a collector that moves objects, the embedder must also
allow for forwarding pointers to be installed in an object. There are
two forwarding APIs: one that is atomic and one that isn't.
The nonatomic API is relatively simple; there is a
`gc_object_forwarded_nonatomic` function that returns an embedded
forwarding address, or 0 if the object is not yet forwarded, and
`gc_object_forward_nonatomic`, which installs a forwarding pointer.
The atomic API is gnarly. It is used by parallel collectors, in which
multiple collector threads can race to evacuate an object.
There is a state machine associated with the `gc_atomic_forward`
structure from [`gc-forwarding.h`](../api/gc-forwarding.h); the embedder API
implements the state changes. The collector calls
`gc_atomic_forward_begin` on an object to begin a forwarding attempt,
and the resulting `gc_atomic_forward` can be in the `NOT_FORWARDED`,
`FORWARDED`, or `BUSY` state.
If the `gc_atomic_forward`'s state is `BUSY`, the collector will call
`gc_atomic_forward_retry_busy`; a return value of 0 means the object is
still busy, because another thread is attempting to forward it.
Otherwise the forwarding state becomes either `FORWARDED`, if the other
thread succeeded in forwarding it, or go back to `NOT_FORWARDED`,
indicating that the other thread failed to forward it.
If the forwarding state is `FORWARDED`, the collector will call
`gc_atomic_forward_address` to get the new address.
If the forwarding state is `NOT_FORWARDED`, the collector may begin a
forwarding attempt by calling `gc_atomic_forward_acquire`. The
resulting state is `ACQUIRED` on success, or `BUSY` if another thread
acquired the object in the meantime, or `FORWARDED` if another thread
acquired and completed the forwarding attempt.
An `ACQUIRED` object can then be forwarded via
`gc_atomic_forward_commit`, or the forwarding attempt can be aborted via
`gc_atomic_forward_abort`. Also, when an object is acquired, the
collector may call `gc_atomic_forward_object_size` to compute how many
bytes to copy. (The collector may choose instead to record object sizes
in a different way.)
All of these `gc_atomic_forward` functions are to be implemented by the
embedder. Some programs may allocate a dedicated forwarding word in all
objects; some will manage to store the forwarding word in an initial
"tag" word, via a specific pattern for the low 3 bits of the tag that no
non-forwarded object will have. The low-bits approach takes advantage
of the collector's minimum object alignment, in which objects are
aligned at least to an 8-byte boundary, so all objects have 0 for the
low 3 bits of their address.
### Conservative references
Finally, when configured in a mode in which root edges or intra-object
edges are *conservative*, the embedder can filter out which bit patterns
might be an object reference by implementing
`gc_is_valid_conservative_ref_displacement`. Here, the collector masks
off the low bits of a conservative reference, and asks the embedder if a
value with those low bits might point to an object. Usually the
embedder should return 1 only if the displacement is 0, but if the
program allows low-bit tagged pointers, then it should also return 1 for
those pointer tags.
### External objects
Sometimes a system will allocate objects outside the GC, for example on
the stack or in static data sections. To support this use case, Whippet
allows the embedder to provide a `struct gc_extern_space`
implementation. Whippet will call `gc_extern_space_start_gc` at the
start of each collection, and `gc_extern_space_finish_gc` at the end.
External objects will be visited by `gc_extern_space_mark`, which should
return nonzero if the object hasn't been seen before and needs to be
traced via `gc_trace_object` (coloring the object grey). Note,
`gc_extern_space_mark` may be called concurrently from many threads; be
prepared!
## Configuration, compilation, and linking
To the user, Whippet presents an abstract API that does not encode the
specificities of any given collector. Whippet currently includes four
implementations of that API: `semi`, a simple semi-space collector;
`pcc`, a parallel copying collector (like semi but multithreaded);
`bdw`, an implementation via the third-party
[Boehm-Demers-Weiser](https://github.com/ivmai/bdwgc) conservative
collector; and `mmc`, a mostly-marking collector inspired by Immix.
The program that embeds Whippet selects the collector implementation at
build-time. For `pcc`, the program can also choose whether to be
generational or not. For `mmc` collector, the program configures a
specific collector mode, again at build-time: generational or not,
parallel or not, stack-conservative or not, and heap-conservative or
not. It may be nice in the future to be able to configure these at
run-time, but for the time being they are compile-time options so that
adding new features doesn't change the footprint of a more minimal
collector.
Different collectors have different allocation strategies: for example,
the BDW collector allocates from thread-local freelists, whereas the
semi-space collector has a bump-pointer allocator. A collector may also
expose a write barrier, for example to enable generational collection.
For performance reasons, many of these details can't be hidden behind an
opaque functional API: they must be inlined into call sites. Whippet's
approach is to expose fast paths as part of its inline API, but which
are *parameterized* on attributes of the selected garbage collector.
The goal is to keep the user's code generic and avoid any code
dependency on the choice of garbage collector. Because of inlining,
however, the choice of garbage collector does need to be specified when
compiling user code.
### Compiling the collector
As an embed-only library, Whippet needs to be integrated into the build
system of its host (embedder). There are two build systems supported
currently; we would be happy to add other systems over time.
#### GNU make
At a high level, first the embedder chooses a collector and defines how
to specialize the collector against the embedder. Whippet's `embed.mk`
Makefile snippet then defines how to build the set of object files that
define the collector, and how to specialize the embedder against the
chosen collector.
As an example, say you have a file `program.c`, and you want to compile
it against a Whippet checkout in `whippet/`. Your headers are in
`include/`, and you have written an implementation of the embedder
interface in `host-gc.h`. In that case you would have a Makefile like
this:
```
HOST_DIR:=$(dir $(lastword $(MAKEFILE_LIST)))
WHIPPET_DIR=$(HOST_DIR)whippet/
all: out
# The collector to choose: e.g. semi, bdw, pcc, generational-pcc, mmc,
# parallel-mmc, etc.
GC_COLLECTOR=pcc
include $(WHIPPET_DIR)embed.mk
# Host cflags go here...
HOST_CFLAGS=
# Whippet's embed.mk uses this variable when it compiles code that
# should be specialized against the embedder.
EMBEDDER_TO_GC_CFLAGS=$(HOST_CFLAGS) -include $(HOST_DIR)host-gc.h
program.o: program.c
$(GC_COMPILE) $(HOST_CFLAGS) $(GC_TO_EMBEDDER_CFLAGS) -c $<
program: program.o $(GC_OBJS)
$(GC_LINK) $^ $(GC_LIBS)
```
The optimization settings passed to the C compiler are taken from
`GC_BUILD_CFLAGS`. Embedders can override this variable directly, or
via the shorthand `GC_BUILD` variable. A `GC_BUILD` of `opt` indicates
maximum optimization and no debugging assertions; `optdebug` adds
debugging assertions; and `debug` removes optimizations.
Though Whippet tries to put performance-sensitive interfaces in header
files, users should also compile with link-time optimization (LTO) to
remove any overhead imposed by the division of code into separate
compilation units. `embed.mk` includes the necessary LTO flags in
`GC_CFLAGS` and `GC_LDFLAGS`.
#### GNU Autotools
To use Whippet from an autotools project, the basic idea is to include a
`Makefile.am` snippet from the subdirectory containing the Whippet
checkout. That will build `libwhippet.la`, which you should link into
your binary. There are some `m4` autoconf macros that need to be
invoked, for example to select the collector.
Let us imagine you have checked out Whippet in `whippet/`. Let us also
assume for the moment that we are going to build `mt-gcbench`, a program
included in Whippet itself.
A top-level autoconf file (`configure.ac`) might look like this:
```autoconf
AC_PREREQ([2.69])
AC_INIT([whippet-autotools-example],[0.1.0])
AC_CONFIG_SRCDIR([whippet/benchmarks/mt-gcbench.c])
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_MACRO_DIRS([m4 whippet])
AM_INIT_AUTOMAKE([subdir-objects foreign])
WHIPPET_ENABLE_LTO
LT_INIT
WARN_CFLAGS=-Wall
AC_ARG_ENABLE([Werror],
AS_HELP_STRING([--disable-Werror],
[Don't stop the build on errors]),
[],
WARN_CFLAGS="-Wall -Werror")
CFLAGS="$CFLAGS $WARN_CFLAGS"
WHIPPET_PKG
AC_CONFIG_FILES(Makefile)
AC_OUTPUT
```
Then your `Makefile.am` might look like this:
```automake
noinst_LTLIBRARIES =
WHIPPET_EMBEDDER_CPPFLAGS = -include $(srcdir)/whippet/benchmarks/mt-gcbench-embedder.h
include whippet/embed.am
noinst_PROGRAMS = whippet/benchmarks/mt-gcbench
whippet_benchmarks_mt_gcbench_SOURCES = \
whippet/benchmarks/heap-objects.h \
whippet/benchmarks/mt-gcbench-embedder.h \
whippet/benchmarks/mt-gcbench-types.h \
whippet/benchmarks/mt-gcbench.c \
whippet/benchmarks/simple-allocator.h \
whippet/benchmarks/simple-gc-embedder.h \
whippet/benchmarks/simple-roots-api.h \
whippet/benchmarks/simple-roots-types.h \
whippet/benchmarks/simple-tagging-scheme.h
AM_CFLAGS = $(WHIPPET_CPPFLAGS) $(WHIPPET_CFLAGS) $(WHIPPET_TO_EMBEDDER_CPPFLAGS)
LDADD = libwhippet.la
```
We have to list all the little header files it uses because, well,
autotools.
To actually build, you do the usual autotools dance:
```bash
autoreconf -vif && ./configure && make
```
See `./configure --help` for a list of user-facing options. Before the
`WHIPPET_PKG`, you can run e.g. `WHIPPET_PKG_COLLECTOR(mmc)` to set the
default collector to `mmc`; if you don't do that, the default collector
is `pcc`. There are also `WHIPPET_PKG_DEBUG`, `WHIPPET_PKG_TRACING`,
and `WHIPPET_PKG_PLATFORM`; see [`whippet.m4`](../whippet.m4) for more
details. See also
[`whippet-autotools`](https://github.com/wingo/whippet-autotools) for an
example of how this works.
#### Compile-time options
There are a number of pre-processor definitions that can parameterize
the collector at build-time:
* `GC_DEBUG`: If nonzero, then enable debugging assertions.
* `NDEBUG`: This one is a bit weird; if not defined, then enable
debugging assertions and some debugging printouts. Probably
Whippet's use of `NDEBUG` should be folded in to `GC_DEBUG`.
* `GC_PARALLEL`: If nonzero, then enable parallelism in the collector.
Defaults to 0.
* `GC_GENERATIONAL`: If nonzero, then enable generational collection.
Defaults to zero.
* `GC_PRECISE_ROOTS`: If nonzero, then collect precise roots via
`gc_heap_roots` and `gc_mutator_roots`. Defaults to zero.
* `GC_CONSERVATIVE_ROOTS`: If nonzero, then scan the stack and static
data sections for conservative roots. Defaults to zero. Not
mutually exclusive with `GC_PRECISE_ROOTS`.
* `GC_CONSERVATIVE_TRACE`: If nonzero, heap edges are scanned
conservatively. Defaults to zero.
Some collectors require specific compile-time options. For example, the
semi-space collector has to be able to move all objects; this is not
compatible with conservative roots or heap edges.
#### Tracing support
Whippet includes support for low-overhead run-time tracing via
[LTTng](https://lttng.org/). If the support library `lttng-ust` is
present when Whippet is compiled (as checked via `pkg-config`),
tracepoint support will be present. See
[tracepoints.md](./tracepoints.md) for more information on how to get
performance traces out of Whippet.
## Using the collector
Whew! So you finally built the thing! Did you also link it into your
program? No, because your program isn't written yet? Well this section
is for you: we describe the user-facing API of Whippet, where "user" in
this case denotes the embedding program.
What is the API, you ask? It is in [`gc-api.h`](../api/gc-api.h).
### Heaps and mutators
To start with, you create a *heap*. Usually an application will create
just one heap. A heap has one or more associated *mutators*. A mutator
is a thread-specific handle on the heap. Allocating objects requires a
mutator.
The initial heap and mutator are created via `gc_init`, which takes
three logical input parameters: the *options*, a stack base address, and
an *event listener*. The options specify the initial heap size and so
on. The event listener is mostly for gathering statistics; see below
for more. `gc_init` returns the new heap as an out parameter, and also
returns a mutator for the current thread.
To make a new mutator for a new thread, use `gc_init_for_thread`. When
a thread is finished with its mutator, call `gc_finish_for_thread`.
Each thread that allocates or accesses GC-managed objects should have
its own mutator.
The stack base address allows the collector to scan the mutator's stack,
if conservative root-finding is enabled. It may be omitted in the call
to `gc_init` and `gc_init_for_thread`; passing `NULL` tells Whippet to
ask the platform for the stack bounds of the current thread. Generally
speaking, this works on all platforms for the main thread, but not
necessarily on other threads. The most reliable solution is to
explicitly obtain a base address by trampolining through
`gc_call_with_stack_addr`.
### Options
There are some run-time parameters that programs and users might want to
set explicitly; these are encapsulated in the *options*. Make an
options object with `gc_allocate_options()`; this object will be
consumed by its `gc_init`. Then, the most convenient thing is to set
those options from `gc_options_parse_and_set_many` from a string passed
on the command line or an environment variable, but to get there we have
to explain the low-level first. There are a few options that are
defined for all collectors:
* `GC_OPTION_HEAP_SIZE_POLICY`: How should we size the heap? Either
it's `GC_HEAP_SIZE_FIXED` (which is 0), in which the heap size is
fixed at startup; or `GC_HEAP_SIZE_GROWABLE` (1), in which the heap
may grow but will never shrink; or `GC_HEAP_SIZE_ADAPTIVE` (2), in
which we take an
[adaptive](https://wingolog.org/archives/2023/01/27/three-approaches-to-heap-sizing)
approach, depending on the rate of allocation and the cost of
collection. Really you want the adaptive strategy, but if you are
benchmarking you definitely want the fixed policy.
* `GC_OPTION_HEAP_SIZE`: The initial heap size. For a
`GC_HEAP_SIZE_FIXED` policy, this is also the final heap size. In
bytes.
* `GC_OPTION_MAXIMUM_HEAP_SIZE`: For growable and adaptive heaps, the
maximum heap size, in bytes.
* `GC_OPTION_HEAP_SIZE_MULTIPLIER`: For growable heaps, the target heap
multiplier. A heap multiplier of 2.5 means that for 100 MB of live
data, the heap should be 250 MB.
* `GC_OPTION_HEAP_EXPANSIVENESS`: For adaptive heap sizing, an
indication of how much free space will be given to heaps, as a
proportion of the square root of the live data size.
* `GC_OPTION_PARALLELISM`: How many threads to devote to collection
tasks during GC pauses. By default, the current number of
processors, with a maximum of 8.
You can set these options via `gc_option_set_int` and so on; see
[`gc-options.h`](../api/gc-options.h). Or, you can parse options from
strings: `heap-size-policy`, `heap-size`, `maximum-heap-size`, and so
on. Use `gc_option_from_string` to determine if a string is really an
option. Use `gc_option_parse_and_set` to parse a value for an option.
Use `gc_options_parse_and_set_many` to parse a number of comma-delimited
*key=value* settings from a string.
### Allocation
So you have a heap and a mutator; great! Let's allocate! Call
`gc_allocate`, passing the mutator and the number of bytes to allocate.
There is also `gc_allocate_fast`, which is an inlined fast-path. If
that returns NULL, you need to call `gc_allocate_slow`. The advantage
of this API is that you can punt some root-saving overhead to the slow
path.
Allocation always succeeds. If it doesn't, it kills your program. The
bytes in the resulting allocation will be initialized to 0.
The allocation fast path is parameterized by collector-specific
attributes. JIT compilers can also read those attributes to emit
appropriate inline code that replicates the logic of `gc_allocate_fast`.
### Write barriers
For some collectors, mutators have to tell the collector whenever they
mutate an object. They tell the collector by calling a *write barrier*;
in Whippet this is currently the case only for generational collectors.
The write barrier is `gc_write_barrier`; see `gc-api.h` for its
parameters.
As with allocation, the fast path for the write barrier is parameterized
by collector-specific attributes, to allow JIT compilers to inline write
barriers.
### Safepoints
Sometimes Whippet will need to synchronize all threads, for example as
part of the "stop" phase of a stop-and-copy semi-space collector.
Whippet stops at *safepoints*. At a safepoint, all mutators must be
able to enumerate all of their edges to live objects.
Whippet has cooperative safepoints: mutators have to periodically call
into the collector to potentially synchronize with other mutators.
`gc_allocate_slow` is a safepoint, so if you a bunch of threads that are
all allocating, usually safepoints are reached in a more-or-less prompt
fashion. But if a mutator isn't allocating, it either needs to
temporarily mark itself as inactive by trampolining through
`gc_call_without_gc`, or it should arrange to periodically call
`gc_safepoint`. Marking a mutator as inactive is the right strategy
for, for example, system calls that might block. Periodic safepoints is
better for code that is active but not allocating.
Also, the BDW collector actually uses pre-emptive safepoints: it stops
threads via POSIX signals. `gc_safepoint` is a no-op with BDW.
Embedders can inline safepoint checks. If
`gc_cooperative_safepoint_kind()` is `GC_COOPERATIVE_SAFEPOINT_NONE`,
then the collector doesn't need safepoints, as is the case for `bdw`
which uses signals and `semi` which is single-threaded. If it is
`GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG`, then calling
`gc_safepoint_flag_loc` on a mutator will return the address of an `int`
in memory, which if nonzero when loaded using relaxed atomics indicates
that the mutator should call `gc_safepoint_slow`. Similarly for
`GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG`, except that the address is
per-mutator rather than global.
### Pinning
Sometimes a mutator or embedder would like to tell the collector to not
move a particular object. This can happen for example during a foreign
function call, or if the embedder allows programs to access the address
of an object, for example to compute an identity hash code. To support
this use case, some Whippet collectors allow the embedder to *pin*
objects. Call `gc_pin_object` to prevent the collector from relocating
an object.
Pinning is currently supported by the `bdw` collector, which never moves
objects, and also by the various `mmc` collectors, which can move
objects that have no inbound conservative references.
Pinning is not supported on `semi` or `pcc`.
Call `gc_can_pin_objects` to determine whether the current collector can
pin objects.
### Statistics
Sometimes a program would like some information from the GC: how many
bytes and objects have been allocated? How much time has been spent in
the GC? How many times has GC run, and how many of those were minor
collections? What's the maximum pause time? Stuff like that.
Instead of collecting a fixed set of information, Whippet emits
callbacks when the collector reaches specific states. The embedder
provides a *listener* for these events when initializing the collector.
The listener interface is defined in
[`gc-event-listener.h`](../api/gc-event-listener.h). Whippet ships with
two listener implementations,
[`GC_NULL_EVENT_LISTENER`](../api/gc-null-event-listener.h), and
[`GC_BASIC_STATS`](../api/gc-basic-stats.h). Most embedders will want
their own listener, but starting with the basic stats listener is not a
bad option:
```
#include "gc-api.h"
#include "gc-basic-stats.h"
#include <stdio.h>
int main() {
struct gc_options *options = NULL;
struct gc_heap *heap;
struct gc_mutator *mut;
struct gc_basic_stats stats;
gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats);
// ...
gc_basic_stats_finish(&stats);
gc_basic_stats_print(&stats, stdout);
}
```
As you can see, `GC_BASIC_STATS` expands to a `struct gc_event_listener`
definition. We pass an associated pointer to a `struct gc_basic_stats`
instance which will be passed to the listener at every event.
The output of this program might be something like:
```
Completed 19 major collections (0 minor).
654.597 ms total time (385.235 stopped).
Heap size is 167.772 MB (max 167.772 MB); peak live data 55.925 MB.
```
There are currently three different sorts of events: heap events to
track heap growth, collector events to time different parts of
collection, and mutator events to indicate when specific mutators are
stopped.
There are three heap events:
* `init(void* data, size_t heap_size)`: Called during `gc_init`, to
allow the listener to initialize its associated state.
* `heap_resized(void* data, size_t new_size)`: Called if the heap grows
or shrinks.
* `live_data_size(void* data, size_t size)`: Called periodically when
the collector learns about live data size.
The collection events form a kind of state machine, and are called in
this order:
* `requesting_stop(void* data)`: Called when the collector asks
mutators to stop.
* `waiting_for_stop(void* data)`: Called when the collector has done
all the pre-stop work that it is able to and is just waiting on
mutators to stop.
* `mutators_stopped(void* data)`: Called when all mutators have
stopped; the trace phase follows.
* `prepare_gc(void* data, enum gc_collection_kind gc_kind)`: Called
to indicate which kind of collection is happening.
* `roots_traced(void* data)`: Called when roots have been visited.
* `heap_traced(void* data)`: Called when the whole heap has been
traced.
* `ephemerons_traced(void* data)`: Called when the [ephemeron
fixpoint](https://wingolog.org/archives/2023/01/24/parallel-ephemeron-tracing)
has been reached.
* `restarting_mutators(void* data)`: Called right before the collector
restarts mutators.
The collectors in Whippet will call all of these event handlers, but it
may be that they are called conservatively: for example, the
single-mutator, single-collector semi-space collector will never have to
wait for mutators to stop. It will still call the functions, though!
Finally, there are the mutator events:
* `mutator_added(void* data) -> void*`: The only event handler that
returns a value, called when a new mutator is added. The parameter
is the overall event listener data, and the result is
mutator-specific data. The rest of the mutator events pass this
mutator-specific data instead.
* `mutator_cause_gc(void* mutator_data)`: Called when a mutator causes
GC, either via allocation or an explicit `gc_collect` call.
* `mutator_stopping(void* mutator_data)`: Called when a mutator has
received the signal to stop. It may perform some marking work before
it stops.
* `mutator_stopped(void* mutator_data)`: Called when a mutator parks
itself.
* `mutator_restarted(void* mutator_data)`: Called when a mutator
restarts.
* `mutator_removed(void* mutator_data)`: Called when a mutator goes
away.
Note that these events handlers shouldn't really do much. In
particular, they shouldn't call into the Whippet API, and they shouldn't
even access GC-managed objects. Event listeners are really about
statistics and profiling and aren't a place to mutate the object graph.
### Ephemerons
Whippet supports ephemerons, first-class objects that weakly associate
keys with values. If the an ephemeron's key ever becomes unreachable,
the ephemeron becomes dead and loses its value.
The user-facing API is in [`gc-ephemeron.h`](../api/gc-ephemeron.h). To
allocate an ephemeron, call `gc_allocate_ephemeron`, then initialize its
key and value via `gc_ephemeron_init`. Get the key and value via
`gc_ephemeron_key` and `gc_ephemeron_value`, respectively.
In Whippet, ephemerons can be linked together in a chain. During GC, if
an ephemeron's chain points to a dead ephemeron, that link will be
elided, allowing the dead ephemeron itself to be collected. In that
way, ephemerons can be used to build weak data structures such as weak
maps.
Weak data structures are often shared across multiple threads, so all
routines to access and modify chain links are atomic. Use
`gc_ephemeron_chain_head` to access the head of a storage location that
points to an ephemeron; push a new ephemeron on a location with
`gc_ephemeron_chain_push`; and traverse a chain with
`gc_ephemeron_chain_next`.
An ephemeron association can be removed via `gc_ephemeron_mark_dead`.
### Finalizers
A finalizer allows the embedder to be notified when an object becomes
unreachable.
A finalizer has a priority. When the heap is created, the embedder
should declare how many priorities there are. Lower-numbered priorities
take precedence; if an object has a priority-0 finalizer outstanding,
that will prevent any finalizer at level 1 (or 2, ...) from firing
until no priority-0 finalizer remains.
Call `gc_attach_finalizer`, from `gc-finalizer.h`, to attach a finalizer
to an object.
A finalizer also references an associated GC-managed closure object.
A finalizer's reference to the closure object is strong: if a
finalizer's closure closure references its finalizable object,
directly or indirectly, the finalizer will never fire.
When an object with a finalizer becomes unreachable, it is added to a
queue. The embedder can call `gc_pop_finalizable` to get the next
finalizable object and its associated closure. At that point the
embedder can do anything with the object, including keeping it alive.
Ephemeron associations will still be present while the finalizable
object is live. Note however that any objects referenced by the
finalizable object may themselves be already finalized; finalizers are
enqueued for objects when they become unreachable, which can concern
whole subgraphs of objects at once.
The usual way for an embedder to know when the queue of finalizable
object is non-empty is to call `gc_set_finalizer_callback` to
provide a function that will be invoked when there are pending
finalizers.
Arranging to call `gc_pop_finalizable` and doing something with the
finalizable object and closure is the responsibility of the embedder.
The embedder's finalization action can end up invoking arbitrary code,
so unless the embedder imposes some kind of restriction on what
finalizers can do, generally speaking finalizers should be run in a
dedicated thread instead of recursively from within whatever mutator
thread caused GC. Setting up such a thread is the responsibility of the
mutator. `gc_pop_finalizable` is thread-safe, allowing multiple
finalization threads if that is appropriate.
`gc_allocate_finalizer` returns a finalizer, which is a fresh GC-managed
heap object. The mutator should then directly attach it to an object
using `gc_finalizer_attach`. When the finalizer is fired, it becomes
available to the mutator via `gc_pop_finalizable`.

Binary file not shown.

After

Width:  |  Height:  |  Size: 169 KiB

View file

@ -0,0 +1,127 @@
# Whippet performance tracing
Whippet includes support for run-time tracing via
[LTTng](https://LTTng.org) user-space tracepoints. This allows you to
get a detailed look at how Whippet is performing on your system.
Tracing support is currently limited to Linux systems.
## Getting started
First, you need to build Whippet with LTTng support. Usually this is as
easy as building it in an environment where the `lttng-ust` library is
present, as determined by `pkg-config --libs lttng-ust`. You can know
if your Whippet has tracing support by seeing if the resulting binaries
are dynamically linked to `liblttng-ust`.
If we take as an example the `mt-gcbench` test in the Whippet source
tree, we would have:
```
$ ldd bin/mt-gcbench.pcc | grep lttng
...
liblttng-ust.so.1 => ...
...
```
### Capturing traces
Actually capturing traces is a little annoying; it's not as easy as
`perf run`. The [LTTng
documentation](https://lttng.org/docs/v2.13/#doc-controlling-tracing) is
quite thorough, but here is a summary.
First, create your tracing session:
```
$ lttng create
Session auto-20250214-091153 created.
Traces will be output to ~/lttng-traces/auto-20250214-091153
```
You run all these commands as your own user; they don't require root
permissions or system-wide modifications, as all of the Whippet
tracepoints are user-space tracepoints (UST).
Just having an LTTng session created won't do anything though; you need
to configure the session. Monotonic nanosecond-resolution timestamps
are already implicitly part of each event. We also want to have process
and thread IDs for all events:
```
$ lttng add-context --userspace --type=vpid --type=vtid
ust context vpid added to all channels
ust context vtid added to all channels
```
Now enable Whippet events:
```
$ lttng enable-event --userspace 'whippet:*'
ust event whippet:* created in channel channel0
```
And now, start recording:
```
$ lttng start
Tracing started for session auto-20250214-091153
```
With this, traces will be captured for our program of interest:
```
$ bin/mt-gcbench.pcc 2.5 8
...
```
Now stop the trace:
```
$ lttng stop
Waiting for data availability
Tracing stopped for session auto-20250214-091153
```
Whew. If we did it right, our data is now in
`~/lttng-traces/auto-20250214-091153`.
### Visualizing traces
LTTng produces traces in the [Common Trace Format
(CTF)](https://diamon.org/ctf/). My favorite trace viewing tool is the
family of web-based trace viewers derived from `chrome://tracing`. The
best of these appear to be [the Firefox
profiler](https://profiler.firefox.com) and
[Perfetto](https://ui.perfetto.dev). Unfortunately neither of these can
work with CTF directly, so we instead need to run a trace converter.
Oddly, there is no trace converter that can read CTF and write something
that Perfetto (e.g.) can read. However there is a [JSON-based tracing
format that these tools can
read](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0#heading=h.yr4qxyxotyw),
and [Python bindings for Babeltrace, a library that works with
CTF](https://babeltrace.org/), so that's what we do:
```
$ python3 ctf_to_json.py ~/lttng-traces/auto-20250214-091153 > trace.json
```
While Firefox Profiler can load this file, it works better on Perfetto,
as the Whippet events are visually rendered on their respective threads.
![Screenshot of part of Perfetto UI showing a minor GC](./perfetto-minor-gc.png)
### Expanding the set of events
As of February 2025,
the current set of tracepoints includes the [heap
events](https://github.com/wingo/whippet/blob/main/doc/manual.md#statistics)
and some detailed internals of the parallel tracer. We expect this set
of tracepoints to expand over time.
### Overhead of tracepoints
When tracepoints are compiled in but no events are enabled, tracepoints
appear to have no impact on run-time. When event collection is on, for
x86-64 hardware, [emitting a tracepoint event takes about
100ns](https://discuss.systems/@DesnoyersMa/113986344940256872).

207
libguile/whippet/embed.am Normal file
View file

@ -0,0 +1,207 @@
# Automake snippet for embedding Whippet in an autotools project.
#
# The including Makefile.am needs to do this, assuming Whippet is in the
# whippet/ subdirectory:
#
# noinst_LTLIBRARIES =
# WHIPPET_EMBEDDER_CPPFLAGS = -include src/my-embedder.h
# include whippet/embed.am
#
# my-embedder.h should provide the various hooks that Whippet needs to
# specialize itself to the embedder's object representation.
#
# The result is a libwhippet.la. To compile and link against it:
#
# AM_CFLAGS = $(WHIPPET_CPPFLAGS) $(WHIPPET_CFLAGS) $(WHIPPET_TO_EMBEDDER_CPPFLAGS)
# LDADD = libwhippet.la
# AM_LDFLAGS = $(WHIPPET_TO_EMBEDDER_LDFLAGS)
#
# The assumption is that the embedder will build a single copy of
# Whippet, specialized against a single collector, a single set of
# embedder hooks, and a single target platform. The collector and
# platform should be chosen at configure-time. Because Automake really
# wants the set of source files to be visible to it at automake-time, we
# need to implement these conditions via AM_CONDITIONAL in a
# configure.ac. For example for a parallel-mmc configuration on
# gnu-linux, we would need:
#
# AM_SUBST(WHIPPET_COLLECTOR, parallel-mmc)
# AM_CONDITIONAL(WHIPPET_COLLECTOR_SEMI, 0)
# AM_CONDITIONAL(WHIPPET_COLLECTOR_PCC, 0)
# AM_CONDITIONAL(WHIPPET_COLLECTOR_BDW, 0)
# AM_CONDITIONAL(WHIPPET_COLLECTOR_MMC, 1)
# AM_CONDITIONAL(WHIPPET_PLATFORM_GNU_LINUX, 1)
#
# Then there are other conditionals for compilation options:
#
# AM_CONDITIONAL(WHIPPET_ENABLE_DEBUG, 0)
# AM_CONDITIONAL(WHIPPET_USE_LTTNG, 1)
#
# Finally, LTO should be enabled, for best performance. This should be
# added to CFLAGS at configure-time.
#
# Getting all of this in there is gnarly. See the example configure.ac
# for one take on the topic.
noinst_LTLIBRARIES += libwhippet-common.la libwhippet.la
libwhippet_common_la_SOURCES = \
%D%/src/gc-options-internal.h \
%D%/src/gc-options.c \
%D%/src/gc-stack.c \
%D%/src/gc-stack.h \
%D%/src/gc-tracepoint.c
if WHIPPET_PLATFORM_GNU_LINUX
libwhippet_common_la_SOURCES += %D%/src/gc-platform-gnu-linux.c
endif
libwhippet_la_SOURCES = \
%D%/src/adaptive-heap-sizer.h \
%D%/src/address-hash.h \
%D%/src/address-map.h \
%D%/src/address-set.h \
%D%/src/assert.h \
%D%/src/background-thread.h \
%D%/src/copy-space.h \
%D%/src/debug.h \
%D%/src/extents.h \
%D%/src/field-set.h \
%D%/src/freelist.h \
%D%/src/gc-align.h \
%D%/src/gc-ephemeron-internal.h \
%D%/src/gc-ephemeron.c \
%D%/src/gc-finalizer-internal.h \
%D%/src/gc-finalizer.c \
%D%/src/gc-internal.h \
%D%/src/gc-lock.h \
%D%/src/gc-platform.h \
%D%/src/gc-trace.h \
%D%/src/growable-heap-sizer.h \
%D%/src/heap-sizer.h \
%D%/src/large-object-space.h \
%D%/src/local-worklist.h \
%D%/src/nofl-space.h \
%D%/src/parallel-tracer.h \
%D%/src/root.h \
%D%/src/root-worklist.h \
%D%/src/serial-tracer.h \
%D%/src/shared-worklist.h \
%D%/src/simple-worklist.h \
%D%/src/spin.h \
%D%/src/splay-tree.h \
%D%/src/swar.h \
%D%/src/tracer.h
WHIPPET_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
WHIPPET_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
WHIPPET_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS_generational_pcc = $(WHIPPET_CFLAGS_pcc) -DGC_GENERATIONAL=1
WHIPPET_CFLAGS_mmc = \
-DGC_PRECISE_ROOTS=1
WHIPPET_CFLAGS_generational_mmc = \
-DGC_PRECISE_ROOTS=1 -DGC_GENERATIONAL=1
WHIPPET_CFLAGS_parallel_mmc = \
-DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS_parallel_generational_mmc = \
-DGC_PRECISE_ROOTS=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS_stack_conservative_mmc = \
-DGC_CONSERVATIVE_ROOTS=1
WHIPPET_CFLAGS_stack_conservative_generational_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_GENERATIONAL=1
WHIPPET_CFLAGS_stack_conservative_parallel_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS_stack_conservative_parallel_generational_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS_heap_conservative_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
WHIPPET_CFLAGS_heap_conservative_generational_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_GENERATIONAL=1
WHIPPET_CFLAGS_heap_conservative_parallel_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS_heap_conservative_parallel_generational_mmc = \
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1
WHIPPET_CFLAGS = $(WHIPPET_CFLAGS_$(subst -,_,$(WHIPPET_COLLECTOR)))
WHIPPET_IMPL_CFLAGS =
WHIPPET_LIBS = -lm
WHIPPET_CPPFLAGS = -I$(srcdir)/%D%/api
WHIPPET_TO_EMBEDDER_CPPFLAGS = $(WHIPPET_CPPFLAGS)
if WHIPPET_ENABLE_DEBUG
WHIPPET_CFLAGS += -DGC_DEBUG=1
endif
if WHIPPET_COLLECTOR_SEMI
libwhippet_la_SOURCES += %D%/src/semi.c
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/semi-attrs.h
endif
if WHIPPET_COLLECTOR_PCC
libwhippet_la_SOURCES += %D%/src/pcc.c
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/pcc-attrs.h
endif
if WHIPPET_COLLECTOR_BDW
libwhippet_la_SOURCES += %D%/src/bdw.c
WHIPPET_IMPL_CFLAGS += $(WHIPPET_BDW_CFLAGS)
WHIPPET_LIBS += $(WHIPPET_BDW_LIBS)
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/bdw-attrs.h
endif
if WHIPPET_COLLECTOR_MMC
libwhippet_la_SOURCES += %D%/src/mmc.c
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/mmc-attrs.h
endif
# add to cflags: -flto -fvisibility=hidden -fno-strict-aliasing
libwhippet_common_la_CPPFLAGS = $(WHIPPET_CPPFLAGS)
libwhippet_common_la_CFLAGS = -Wall -Wno-unused $(CFLAGS)
libwhippet_common_la_CFLAGS += $(WHIPPET_CFLAGS)
libwhippet_common_la_LDFLAGS = -lpthread $(LDFLAGS)
libwhippet_common_la_LIBADD = $(LIBS)
if WHIPPET_USE_LTTNG
libwhippet_common_la_CPPFLAGS += $(WHIPPET_LTTNG_CFLAGS) -DGC_TRACEPOINT_LTTNG=1
WHIPPET_LIBS += $(WHIPPET_LTTNG_LIBS)
endif
if !WHIPPET_ENABLE_DEBUG
libwhippet_common_la_CFLAGS += -DNDEBUG
endif
libwhippet_la_CPPFLAGS = $(libwhippet_common_la_CPPFLAGS) $(WHIPPET_EMBEDDER_CPPFLAGS)
libwhippet_la_CFLAGS = $(libwhippet_common_la_CFLAGS)
libwhippet_la_CFLAGS += $(WHIPPET_IMPL_CFLAGS)
libwhippet_la_LDFLAGS = $(libwhippet_common_la_LDFLAGS) $(WHIPPET_LIBS)
libwhippet_la_LIBADD = libwhippet-common.la
noinst_HEADERS = \
%D%/api/bdw-attrs.h \
%D%/api/gc-allocation-kind.h \
%D%/api/gc-api.h \
%D%/api/gc-assert.h \
%D%/api/gc-attrs.h \
%D%/api/gc-basic-stats.h \
%D%/api/gc-collection-kind.h \
%D%/api/gc-config.h \
%D%/api/gc-conservative-ref.h \
%D%/api/gc-edge.h \
%D%/api/gc-embedder-api.h \
%D%/api/gc-ephemeron.h \
%D%/api/gc-event-listener-chain.h \
%D%/api/gc-event-listener.h \
%D%/api/gc-finalizer.h \
%D%/api/gc-forwarding.h \
%D%/api/gc-histogram.h \
%D%/api/gc-inline.h \
%D%/api/gc-lttng.h \
%D%/api/gc-null-event-listener.h \
%D%/api/gc-options.h \
%D%/api/gc-ref.h \
%D%/api/gc-tracepoint.h \
%D%/api/gc-visibility.h \
%D%/api/mmc-attrs.h \
%D%/api/pcc-attrs.h \
%D%/api/semi-attrs.h

105
libguile/whippet/embed.mk Normal file
View file

@ -0,0 +1,105 @@
GC_COLLECTOR ?= semi
DEFAULT_BUILD := opt
BUILD_CFLAGS_opt = -O2 -g -DNDEBUG
BUILD_CFLAGS_optdebug = -O2 -g -DGC_DEBUG=1
BUILD_CFLAGS_debug = -O0 -g -DGC_DEBUG=1
GC_BUILD_CFLAGS = $(BUILD_CFLAGS_$(or $(GC_BUILD),$(DEFAULT_BUILD)))
V ?= 1
v_0 = @
v_1 =
GC_USE_LTTNG_0 :=
GC_USE_LTTNG_1 := 1
GC_USE_LTTNG := $(shell pkg-config --exists lttng-ust && echo 1 || echo 0)
GC_LTTNG_CPPFLAGS := $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)), $(shell pkg-config --cflags lttng-ust),)
GC_LTTNG_LIBS := $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)), $(shell pkg-config --libs lttng-ust),)
GC_TRACEPOINT_CPPFLAGS = $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)),$(GC_LTTNG_CPPFLAGS) -DGC_TRACEPOINT_LTTNG=1,)
GC_TRACEPOINT_LIBS = $(GC_LTTNG_LIBS)
GC_V = $(v_$(V))
GC_CC = gcc
GC_CFLAGS = -Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(GC_BUILD_CFLAGS)
GC_CPPFLAGS = -I$(WHIPPET)api $(GC_TRACEPOINT_CPPFLAGS)
GC_LDFLAGS = -lpthread -flto=auto $(GC_TRACEPOINT_LIBS)
GC_DEPFLAGS =
GC_COMPILE = $(GC_V)$(GC_CC) $(GC_CFLAGS) $(GC_CPPFLAGS) $(GC_DEPFLAGS) -o $@
GC_LINK = $(GC_V)$(GC_CC) $(GC_LDFLAGS) -o $@
GC_PLATFORM = gnu-linux
GC_OBJDIR =
$(GC_OBJDIR)gc-platform.o: $(WHIPPET)src/gc-platform-$(GC_PLATFORM).c
$(GC_COMPILE) -c $<
$(GC_OBJDIR)gc-stack.o: $(WHIPPET)src/gc-stack.c
$(GC_COMPILE) -c $<
$(GC_OBJDIR)gc-options.o: $(WHIPPET)src/gc-options.c
$(GC_COMPILE) -c $<
$(GC_OBJDIR)gc-tracepoint.o: $(WHIPPET)src/gc-tracepoint.c
$(GC_COMPILE) -c $<
$(GC_OBJDIR)gc-ephemeron.o: $(WHIPPET)src/gc-ephemeron.c
$(GC_COMPILE) $(EMBEDDER_TO_GC_CFLAGS) -c $<
$(GC_OBJDIR)gc-finalizer.o: $(WHIPPET)src/gc-finalizer.c
$(GC_COMPILE) $(EMBEDDER_TO_GC_CFLAGS) -c $<
GC_STEM_bdw = bdw
GC_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
GC_IMPL_CFLAGS_bdw = `pkg-config --cflags bdw-gc`
GC_LIBS_bdw = `pkg-config --libs bdw-gc`
GC_STEM_semi = semi
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
GC_LIBS_semi = -lm
GC_STEM_pcc = pcc
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
GC_LIBS_pcc = -lm
GC_STEM_generational_pcc = $(GC_STEM_pcc)
GC_CFLAGS_generational_pcc = $(GC_CFLAGS_pcc) -DGC_GENERATIONAL=1
GC_LIBS_generational_pcc = $(GC_LIBS_pcc)
define mmc_variant
GC_STEM_$(1) = mmc
GC_CFLAGS_$(1) = $(2)
GC_LIBS_$(1) = -lm
endef
define generational_mmc_variants
$(call mmc_variant,$(1)mmc,$(2))
$(call mmc_variant,$(1)generational_mmc,$(2) -DGC_GENERATIONAL=1)
endef
define parallel_mmc_variants
$(call generational_mmc_variants,$(1),$(2))
$(call generational_mmc_variants,$(1)parallel_,$(2) -DGC_PARALLEL=1)
endef
define trace_mmc_variants
$(call parallel_mmc_variants,,-DGC_PRECISE_ROOTS=1)
$(call parallel_mmc_variants,stack_conservative_,-DGC_CONSERVATIVE_ROOTS=1)
$(call parallel_mmc_variants,heap_conservative_,-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1)
endef
$(eval $(call trace_mmc_variants))
gc_var = $($(1)$(subst -,_,$(2)))
gc_impl = $(call gc_var,GC_STEM_,$(1)).c
gc_attrs = $(call gc_var,GC_STEM_,$(1))-attrs.h
gc_cflags = $(call gc_var,GC_CFLAGS_,$(1))
gc_impl_cflags = $(call gc_var,GC_IMPL_CFLAGS_,$(1))
gc_libs = $(call gc_var,GC_LIBS_,$(1))
GC_IMPL = $(call gc_impl,$(GC_COLLECTOR))
GC_CFLAGS += $(call gc_cflags,$(GC_COLLECTOR))
GC_IMPL_CFLAGS = $(call gc_impl_cflags,$(GC_COLLECTOR))
GC_ATTRS = $(WHIPPET)api/$(call gc_attrs,$(GC_COLLECTOR))
GC_TO_EMBEDDER_CFLAGS = -include $(GC_ATTRS)
GC_LIBS = $(call gc_libs,$(GC_COLLECTOR))
$(GC_OBJDIR)gc-impl.o: $(WHIPPET)src/$(call gc_impl,$(GC_COLLECTOR))
$(GC_COMPILE) $(GC_IMPL_CFLAGS) $(EMBEDDER_TO_GC_CFLAGS) -c $<
GC_OBJS=$(foreach O,gc-platform.o gc-stack.o gc-options.o gc-tracepoint.o gc-ephemeron.o gc-finalizer.o gc-impl.o,$(GC_OBJDIR)$(O))

View file

@ -0,0 +1,11 @@
(use-modules (guix packages))
(specifications->manifest
'("bash"
"coreutils"
"gcc-toolchain"
"lttng-ust"
"glibc"
"libgc"
"make"
"pkg-config"))

View file

@ -0,0 +1,171 @@
#ifndef ADAPTIVE_HEAP_SIZER_H
#define ADAPTIVE_HEAP_SIZER_H
#include <math.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include "assert.h"
#include "background-thread.h"
#include "debug.h"
#include "gc-config.h"
#include "gc-platform.h"
#include "heap-sizer.h"
// This is the MemBalancer algorithm from "Optimal Heap Limits for Reducing
// Browser Memory Use" by Marisa Kirisame, Pranav Shenoy, and Pavel Panchekha
// (https://arxiv.org/abs/2204.10455).
//
// This implementation differs slightly in that the constant "c" of the paper
// has been extracted outside the radical, and notionally reversed: it is a
// unitless "expansiveness" parameter whose domain is [0,+∞]. Also there are
// minimum and maximum heap size multipliers, and a minimum amount of free
// space. The initial collection rate is an informed guess. The initial
// allocation rate estimate is high, considering that allocation rates are often
// high on program startup.
struct gc_adaptive_heap_sizer {
uint64_t (*get_allocation_counter)(struct gc_heap *heap);
void (*set_heap_size)(struct gc_heap *heap, size_t size);
struct gc_heap *heap;
uint64_t smoothed_pause_time;
uint64_t smoothed_live_bytes;
uint64_t live_bytes;
double smoothed_allocation_rate;
double collection_smoothing_factor;
double allocation_smoothing_factor;
double minimum_multiplier;
double maximum_multiplier;
double minimum_free_space;
double expansiveness;
#if GC_PARALLEL
pthread_mutex_t lock;
#endif
int background_task_id;
uint64_t last_bytes_allocated;
uint64_t last_heartbeat;
};
static void
gc_adaptive_heap_sizer_lock(struct gc_adaptive_heap_sizer *sizer) {
#if GC_PARALLEL
pthread_mutex_lock(&sizer->lock);
#endif
}
static void
gc_adaptive_heap_sizer_unlock(struct gc_adaptive_heap_sizer *sizer) {
#if GC_PARALLEL
pthread_mutex_unlock(&sizer->lock);
#endif
}
// With lock
static uint64_t
gc_adaptive_heap_sizer_calculate_size(struct gc_adaptive_heap_sizer *sizer) {
double allocation_rate = sizer->smoothed_allocation_rate;
double collection_rate =
(double)sizer->smoothed_pause_time / (double)sizer->smoothed_live_bytes;
double radicand = sizer->live_bytes * allocation_rate / collection_rate;
double multiplier = 1.0 + sizer->expansiveness * sqrt(radicand);
if (isnan(multiplier) || multiplier < sizer->minimum_multiplier)
multiplier = sizer->minimum_multiplier;
else if (multiplier > sizer->maximum_multiplier)
multiplier = sizer->maximum_multiplier;
uint64_t size = sizer->live_bytes * multiplier;
if (size - sizer->live_bytes < sizer->minimum_free_space)
size = sizer->live_bytes + sizer->minimum_free_space;
return size;
}
static uint64_t
gc_adaptive_heap_sizer_set_expansiveness(struct gc_adaptive_heap_sizer *sizer,
double expansiveness) {
gc_adaptive_heap_sizer_lock(sizer);
sizer->expansiveness = expansiveness;
uint64_t heap_size = gc_adaptive_heap_sizer_calculate_size(sizer);
gc_adaptive_heap_sizer_unlock(sizer);
return heap_size;
}
static void
gc_adaptive_heap_sizer_on_gc(struct gc_adaptive_heap_sizer *sizer,
size_t live_bytes, uint64_t pause_ns,
void (*set_heap_size)(struct gc_heap*, size_t)) {
gc_adaptive_heap_sizer_lock(sizer);
sizer->live_bytes = live_bytes;
sizer->smoothed_live_bytes *= 1.0 - sizer->collection_smoothing_factor;
sizer->smoothed_live_bytes += sizer->collection_smoothing_factor * live_bytes;
sizer->smoothed_pause_time *= 1.0 - sizer->collection_smoothing_factor;
sizer->smoothed_pause_time += sizer->collection_smoothing_factor * pause_ns;
set_heap_size(sizer->heap, gc_adaptive_heap_sizer_calculate_size(sizer));
gc_adaptive_heap_sizer_unlock(sizer);
}
static void
gc_adaptive_heap_sizer_background_task(void *data) {
struct gc_adaptive_heap_sizer *sizer = data;
gc_adaptive_heap_sizer_lock(sizer);
uint64_t bytes_allocated =
sizer->get_allocation_counter(sizer->heap);
// bytes_allocated being 0 means the request failed; retry later.
if (bytes_allocated) {
uint64_t heartbeat = gc_platform_monotonic_nanoseconds();
double rate = (double) (bytes_allocated - sizer->last_bytes_allocated) /
(double) (heartbeat - sizer->last_heartbeat);
// Just smooth the rate, under the assumption that the denominator is almost
// always 1.
sizer->smoothed_allocation_rate *= 1.0 - sizer->allocation_smoothing_factor;
sizer->smoothed_allocation_rate += rate * sizer->allocation_smoothing_factor;
sizer->last_heartbeat = heartbeat;
sizer->last_bytes_allocated = bytes_allocated;
sizer->set_heap_size(sizer->heap,
gc_adaptive_heap_sizer_calculate_size(sizer));
}
gc_adaptive_heap_sizer_unlock(sizer);
}
static struct gc_adaptive_heap_sizer*
gc_make_adaptive_heap_sizer(struct gc_heap *heap, double expansiveness,
uint64_t (*get_allocation_counter)(struct gc_heap*),
void (*set_heap_size)(struct gc_heap*, size_t),
struct gc_background_thread *thread) {
struct gc_adaptive_heap_sizer *sizer;
sizer = malloc(sizeof(*sizer));
if (!sizer)
GC_CRASH();
memset(sizer, 0, sizeof(*sizer));
sizer->get_allocation_counter = get_allocation_counter;
sizer->set_heap_size = set_heap_size;
sizer->heap = heap;
// Baseline estimate of GC speed: 10 MB/ms, or 10 bytes/ns. However since we
// observe this speed by separately noisy measurements, we have to provide
// defaults for numerator and denominator; estimate 2ms for initial GC pauses
// for 20 MB of live data during program startup.
sizer->smoothed_pause_time = 2 * 1000 * 1000;
sizer->smoothed_live_bytes = 20 * 1024 * 1024;
// Baseline estimate of allocation rate during startup: 50 MB in 10ms, or 5
// bytes/ns.
sizer->smoothed_allocation_rate = 5;
sizer->collection_smoothing_factor = 0.5;
sizer->allocation_smoothing_factor = 0.95;
sizer->minimum_multiplier = 1.1;
sizer->maximum_multiplier = 5;
sizer->minimum_free_space = 4 * 1024 * 1024;
sizer->expansiveness = expansiveness;
sizer->last_bytes_allocated = get_allocation_counter(heap);
sizer->last_heartbeat = gc_platform_monotonic_nanoseconds();
#if GC_PARALLEL
pthread_mutex_init(&thread->lock, NULL);
sizer->background_task_id =
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_MIDDLE,
gc_adaptive_heap_sizer_background_task,
sizer);
#else
sizer->background_task_id = -1;
#endif
return sizer;
}
#endif // ADAPTIVE_HEAP_SIZER_H

View file

@ -0,0 +1,45 @@
#ifndef ADDRESS_HASH_H
#define ADDRESS_HASH_H
#include <stdint.h>
static uintptr_t hash_address(uintptr_t x) {
if (sizeof (x) < 8) {
// Chris Wellon's lowbias32, from https://nullprogram.com/blog/2018/07/31/.
x ^= x >> 16;
x *= 0x7feb352dU;
x ^= x >> 15;
x *= 0x846ca68bU;
x ^= x >> 16;
return x;
} else {
// Sebastiano Vigna's splitmix64 integer mixer, from
// https://prng.di.unimi.it/splitmix64.c.
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9U;
x ^= x >> 27;
x *= 0x94d049bb133111ebU;
x ^= x >> 31;
return x;
}
}
// Inverse of hash_address from https://nullprogram.com/blog/2018/07/31/.
static uintptr_t unhash_address(uintptr_t x) {
if (sizeof (x) < 8) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
} else {
x ^= x >> 31 ^ x >> 62;
x *= 0x319642b2d24d8ec3U;
x ^= x >> 27 ^ x >> 54;
x *= 0x96de1b173f119089U;
x ^= x >> 30 ^ x >> 60;
return x;
}
}
#endif // ADDRESS_HASH_H

View file

@ -0,0 +1,213 @@
#ifndef ADDRESS_MAP_H
#define ADDRESS_MAP_H
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include "address-hash.h"
#include "gc-assert.h"
struct hash_map_entry {
uintptr_t k;
uintptr_t v;
};
struct hash_map {
struct hash_map_entry *data;
size_t size; // total number of slots
size_t n_items; // number of items in set
uint8_t *bits; // bitvector indicating set slots
};
static void hash_map_clear(struct hash_map *map) {
memset(map->bits, 0, map->size / 8);
map->n_items = 0;
}
// Size must be a power of 2.
static void hash_map_init(struct hash_map *map, size_t size) {
map->size = size;
map->data = malloc(sizeof(struct hash_map_entry) * size);
if (!map->data) GC_CRASH();
map->bits = malloc(size / 8);
if (!map->bits) GC_CRASH();
hash_map_clear(map);
}
static void hash_map_destroy(struct hash_map *map) {
free(map->data);
free(map->bits);
}
static size_t hash_map_slot_index(struct hash_map *map, size_t idx) {
return idx & (map->size - 1);
}
static struct hash_map_entry* hash_map_slot_entry(struct hash_map *map,
size_t idx) {
return &map->data[hash_map_slot_index(map, idx)];
}
static int hash_map_slot_is_empty(struct hash_map *map, size_t idx) {
idx = hash_map_slot_index(map, idx);
return (map->bits[idx / 8] & (1 << (idx % 8))) == 0;
}
static void hash_map_slot_acquire(struct hash_map *map, size_t idx) {
idx = hash_map_slot_index(map, idx);
map->bits[idx / 8] |= (1 << (idx % 8));
map->n_items++;
}
static void hash_map_slot_release(struct hash_map *map, size_t idx) {
idx = hash_map_slot_index(map, idx);
map->bits[idx / 8] &= ~(1 << (idx % 8));
map->n_items--;
}
static size_t hash_map_slot_distance(struct hash_map *map, size_t idx) {
return hash_map_slot_index(map, idx - hash_map_slot_entry(map, idx)->k);
}
static int hash_map_should_shrink(struct hash_map *map) {
return map->size > 8 && map->n_items <= (map->size >> 3);
}
static int hash_map_should_grow(struct hash_map *map) {
return map->n_items >= map->size - (map->size >> 3);
}
static void hash_map_do_insert(struct hash_map *map, uintptr_t k, uintptr_t v) {
size_t displacement = 0;
while (!hash_map_slot_is_empty(map, k + displacement)
&& displacement < hash_map_slot_distance(map, k + displacement))
displacement++;
while (!hash_map_slot_is_empty(map, k + displacement)
&& displacement == hash_map_slot_distance(map, k + displacement)) {
if (hash_map_slot_entry(map, k + displacement)->k == k) {
hash_map_slot_entry(map, k + displacement)->v = v;
return;
}
displacement++;
}
size_t idx = k + displacement;
size_t slots_to_move = 0;
while (!hash_map_slot_is_empty(map, idx + slots_to_move))
slots_to_move++;
hash_map_slot_acquire(map, idx + slots_to_move);
while (slots_to_move--)
*hash_map_slot_entry(map, idx + slots_to_move + 1) =
*hash_map_slot_entry(map, idx + slots_to_move);
*hash_map_slot_entry(map, idx) = (struct hash_map_entry){ k, v };
}
static void hash_map_populate(struct hash_map *dst, struct hash_map *src) {
for (size_t i = 0; i < src->size; i++)
if (!hash_map_slot_is_empty(src, i))
hash_map_do_insert(dst, hash_map_slot_entry(src, i)->k,
hash_map_slot_entry(src, i)->v);
}
static void hash_map_grow(struct hash_map *map) {
struct hash_map fresh;
hash_map_init(&fresh, map->size << 1);
hash_map_populate(&fresh, map);
hash_map_destroy(map);
memcpy(map, &fresh, sizeof(fresh));
}
static void hash_map_shrink(struct hash_map *map) {
struct hash_map fresh;
hash_map_init(&fresh, map->size >> 1);
hash_map_populate(&fresh, map);
hash_map_destroy(map);
memcpy(map, &fresh, sizeof(fresh));
}
static void hash_map_insert(struct hash_map *map, uintptr_t k, uintptr_t v) {
if (hash_map_should_grow(map))
hash_map_grow(map);
hash_map_do_insert(map, k, v);
}
static void hash_map_remove(struct hash_map *map, uintptr_t k) {
size_t slot = k;
while (!hash_map_slot_is_empty(map, slot) && hash_map_slot_entry(map, slot)->k != k)
slot++;
if (hash_map_slot_is_empty(map, slot))
__builtin_trap();
while (!hash_map_slot_is_empty(map, slot + 1)
&& hash_map_slot_distance(map, slot + 1)) {
*hash_map_slot_entry(map, slot) = *hash_map_slot_entry(map, slot + 1);
slot++;
}
hash_map_slot_release(map, slot);
if (hash_map_should_shrink(map))
hash_map_shrink(map);
}
static int hash_map_contains(struct hash_map *map, uintptr_t k) {
for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) {
if (hash_map_slot_entry(map, slot)->k == k)
return 1;
if (hash_map_slot_distance(map, slot) < (slot - k))
return 0;
}
return 0;
}
static uintptr_t hash_map_lookup(struct hash_map *map, uintptr_t k, uintptr_t default_) {
for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) {
if (hash_map_slot_entry(map, slot)->k == k)
return hash_map_slot_entry(map, slot)->v;
if (hash_map_slot_distance(map, slot) < (slot - k))
break;
}
return default_;
}
static inline void hash_map_for_each (struct hash_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) __attribute__((always_inline));
static inline void hash_map_for_each(struct hash_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) {
for (size_t i = 0; i < map->size; i++)
if (!hash_map_slot_is_empty(map, i))
f(hash_map_slot_entry(map, i)->k, hash_map_slot_entry(map, i)->v, data);
}
struct address_map {
struct hash_map hash_map;
};
static void address_map_init(struct address_map *map) {
hash_map_init(&map->hash_map, 8);
}
static void address_map_destroy(struct address_map *map) {
hash_map_destroy(&map->hash_map);
}
static void address_map_clear(struct address_map *map) {
hash_map_clear(&map->hash_map);
}
static void address_map_add(struct address_map *map, uintptr_t addr, uintptr_t v) {
hash_map_insert(&map->hash_map, hash_address(addr), v);
}
static void address_map_remove(struct address_map *map, uintptr_t addr) {
hash_map_remove(&map->hash_map, hash_address(addr));
}
static int address_map_contains(struct address_map *map, uintptr_t addr) {
return hash_map_contains(&map->hash_map, hash_address(addr));
}
static uintptr_t address_map_lookup(struct address_map *map, uintptr_t addr,
uintptr_t default_) {
return hash_map_lookup(&map->hash_map, hash_address(addr), default_);
}
struct address_map_for_each_data {
void (*f)(uintptr_t, uintptr_t, void *);
void *data;
};
static void address_map_do_for_each(uintptr_t k, uintptr_t v, void *data) {
struct address_map_for_each_data *for_each_data = data;
for_each_data->f(unhash_address(k), v, for_each_data->data);
}
static inline void address_map_for_each (struct address_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) __attribute__((always_inline));
static inline void address_map_for_each (struct address_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) {
struct address_map_for_each_data for_each_data = { f, data };
hash_map_for_each(&map->hash_map, address_map_do_for_each, &for_each_data);
}
#endif // ADDRESS_MAP_H

View file

@ -0,0 +1,214 @@
#ifndef ADDRESS_SET_H
#define ADDRESS_SET_H
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include "address-hash.h"
#include "gc-assert.h"
struct hash_set {
uintptr_t *data;
size_t size; // total number of slots
size_t n_items; // number of items in set
uint8_t *bits; // bitvector indicating set slots
};
static void hash_set_clear(struct hash_set *set) {
memset(set->bits, 0, set->size / 8);
set->n_items = 0;
}
// Size must be a power of 2.
static void hash_set_init(struct hash_set *set, size_t size) {
set->size = size;
set->data = malloc(sizeof(uintptr_t) * size);
if (!set->data) GC_CRASH();
set->bits = malloc(size / 8);
if (!set->bits) GC_CRASH();
hash_set_clear(set);
}
static void hash_set_destroy(struct hash_set *set) {
free(set->data);
free(set->bits);
}
static size_t hash_set_slot_index(struct hash_set *set, size_t idx) {
return idx & (set->size - 1);
}
static int hash_set_slot_is_empty(struct hash_set *set, size_t idx) {
idx = hash_set_slot_index(set, idx);
return (set->bits[idx / 8] & (1 << (idx % 8))) == 0;
}
static uintptr_t hash_set_slot_ref(struct hash_set *set, size_t idx) {
return set->data[hash_set_slot_index(set, idx)];
}
static void hash_set_slot_set(struct hash_set *set, size_t idx, uintptr_t v) {
set->data[hash_set_slot_index(set, idx)] = v;
}
static void hash_set_slot_acquire(struct hash_set *set, size_t idx) {
idx = hash_set_slot_index(set, idx);
set->bits[idx / 8] |= (1 << (idx % 8));
set->n_items++;
}
static void hash_set_slot_release(struct hash_set *set, size_t idx) {
idx = hash_set_slot_index(set, idx);
set->bits[idx / 8] &= ~(1 << (idx % 8));
set->n_items--;
}
static size_t hash_set_slot_distance(struct hash_set *set, size_t idx) {
return hash_set_slot_index(set, idx - hash_set_slot_ref(set, idx));
}
static int hash_set_should_shrink(struct hash_set *set) {
return set->size > 8 && set->n_items <= (set->size >> 3);
}
static int hash_set_should_grow(struct hash_set *set) {
return set->n_items >= set->size - (set->size >> 3);
}
static void hash_set_do_insert(struct hash_set *set, uintptr_t v) {
size_t displacement = 0;
while (!hash_set_slot_is_empty(set, v + displacement)
&& displacement < hash_set_slot_distance(set, v + displacement))
displacement++;
while (!hash_set_slot_is_empty(set, v + displacement)
&& displacement == hash_set_slot_distance(set, v + displacement)) {
if (hash_set_slot_ref(set, v + displacement) == v)
return;
displacement++;
}
size_t idx = v + displacement;
size_t slots_to_move = 0;
while (!hash_set_slot_is_empty(set, idx + slots_to_move))
slots_to_move++;
hash_set_slot_acquire(set, idx + slots_to_move);
while (slots_to_move--)
hash_set_slot_set(set, idx + slots_to_move + 1,
hash_set_slot_ref(set, idx + slots_to_move));
hash_set_slot_set(set, idx, v);
}
static void hash_set_populate(struct hash_set *dst, struct hash_set *src) {
for (size_t i = 0; i < src->size; i++)
if (!hash_set_slot_is_empty(src, i))
hash_set_do_insert(dst, hash_set_slot_ref(src, i));
}
static void hash_set_grow(struct hash_set *set) {
struct hash_set fresh;
hash_set_init(&fresh, set->size << 1);
hash_set_populate(&fresh, set);
hash_set_destroy(set);
memcpy(set, &fresh, sizeof(fresh));
}
static void hash_set_shrink(struct hash_set *set) {
struct hash_set fresh;
hash_set_init(&fresh, set->size >> 1);
hash_set_populate(&fresh, set);
hash_set_destroy(set);
memcpy(set, &fresh, sizeof(fresh));
}
static void hash_set_insert(struct hash_set *set, uintptr_t v) {
if (hash_set_should_grow(set))
hash_set_grow(set);
hash_set_do_insert(set, v);
}
static void hash_set_remove(struct hash_set *set, uintptr_t v) {
size_t slot = v;
while (!hash_set_slot_is_empty(set, slot) && hash_set_slot_ref(set, slot) != v)
slot++;
if (hash_set_slot_is_empty(set, slot))
__builtin_trap();
while (!hash_set_slot_is_empty(set, slot + 1)
&& hash_set_slot_distance(set, slot + 1)) {
hash_set_slot_set(set, slot, hash_set_slot_ref(set, slot + 1));
slot++;
}
hash_set_slot_release(set, slot);
if (hash_set_should_shrink(set))
hash_set_shrink(set);
}
static int hash_set_contains(struct hash_set *set, uintptr_t v) {
for (size_t slot = v; !hash_set_slot_is_empty(set, slot); slot++) {
if (hash_set_slot_ref(set, slot) == v)
return 1;
if (hash_set_slot_distance(set, slot) < (slot - v))
return 0;
}
return 0;
}
static inline void hash_set_find(struct hash_set *set,
int (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
static inline void hash_set_find(struct hash_set *set,
int (*f)(uintptr_t, void*), void *data) {
for (size_t i = 0; i < set->size; i++)
if (!hash_set_slot_is_empty(set, i))
if (f(hash_set_slot_ref(set, i), data))
return;
}
struct address_set {
struct hash_set hash_set;
};
static void address_set_init(struct address_set *set) {
hash_set_init(&set->hash_set, 8);
}
static void address_set_destroy(struct address_set *set) {
hash_set_destroy(&set->hash_set);
}
static void address_set_clear(struct address_set *set) {
hash_set_clear(&set->hash_set);
}
static void address_set_add(struct address_set *set, uintptr_t addr) {
hash_set_insert(&set->hash_set, hash_address(addr));
}
static void address_set_remove(struct address_set *set, uintptr_t addr) {
hash_set_remove(&set->hash_set, hash_address(addr));
}
static int address_set_contains(struct address_set *set, uintptr_t addr) {
return hash_set_contains(&set->hash_set, hash_address(addr));
}
static void address_set_union(struct address_set *set, struct address_set *other) {
while (set->hash_set.size < other->hash_set.size)
hash_set_grow(&set->hash_set);
hash_set_populate(&set->hash_set, &other->hash_set);
}
struct address_set_for_each_data {
void (*f)(uintptr_t, void *);
void *data;
};
static int address_set_do_for_each(uintptr_t v, void *data) {
struct address_set_for_each_data *for_each_data = data;
for_each_data->f(unhash_address(v), for_each_data->data);
return 0;
}
static inline void address_set_for_each(struct address_set *set,
void (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
static inline void address_set_for_each(struct address_set *set,
void (*f)(uintptr_t, void*), void *data) {
struct address_set_for_each_data for_each_data = { f, data };
hash_set_find(&set->hash_set, address_set_do_for_each, &for_each_data);
}
struct address_set_find_data {
int (*f)(uintptr_t, void *);
void *data;
};
static int address_set_do_find(uintptr_t v, void *data) {
struct address_set_find_data *find_data = data;
return find_data->f(unhash_address(v), find_data->data);
}
static inline void address_set_find(struct address_set *set,
int (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
static inline void address_set_find(struct address_set *set,
int (*f)(uintptr_t, void*), void *data) {
struct address_set_find_data find_data = { f, data };
hash_set_find(&set->hash_set, address_set_do_find, &find_data);
}
#endif // ADDRESS_SET_H

View file

@ -0,0 +1,16 @@
#ifndef ASSERT_H
#define ASSERT_H
#define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
#define UNLIKELY(e) __builtin_expect(e, 0)
#define LIKELY(e) __builtin_expect(e, 1)
#ifndef NDEBUG
#define ASSERT(x) do { if (UNLIKELY(!(x))) __builtin_trap(); } while (0)
#else
#define ASSERT(x) do { } while (0)
#endif
#define ASSERT_EQ(a,b) ASSERT((a) == (b))
#endif // ASSERT_H

View file

@ -0,0 +1,155 @@
#ifndef BACKGROUND_THREAD_H
#define BACKGROUND_THREAD_H
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include "assert.h"
#include "debug.h"
enum {
GC_BACKGROUND_TASK_START = 0,
GC_BACKGROUND_TASK_MIDDLE = 100,
GC_BACKGROUND_TASK_END = 200
};
struct gc_background_task {
int id;
int priority;
void (*run)(void *data);
void *data;
};
enum gc_background_thread_state {
GC_BACKGROUND_THREAD_STARTING,
GC_BACKGROUND_THREAD_RUNNING,
GC_BACKGROUND_THREAD_STOPPING
};
struct gc_background_thread {
size_t count;
size_t capacity;
struct gc_background_task *tasks;
int next_id;
enum gc_background_thread_state state;
pthread_t thread;
pthread_mutex_t lock;
pthread_cond_t cond;
};
static void*
gc_background_thread(void *data) {
struct gc_background_thread *thread = data;
pthread_mutex_lock(&thread->lock);
while (thread->state == GC_BACKGROUND_THREAD_STARTING)
pthread_cond_wait(&thread->cond, &thread->lock);
struct timespec ts;
if (clock_gettime(CLOCK_REALTIME, &ts)) {
perror("background thread: failed to get time!");
return NULL;
}
while (thread->state == GC_BACKGROUND_THREAD_RUNNING) {
ts.tv_sec += 1;
pthread_cond_timedwait(&thread->cond, &thread->lock, &ts);
if (thread->state == GC_BACKGROUND_THREAD_RUNNING)
for (size_t i = 0; i < thread->count; i++)
thread->tasks[i].run(thread->tasks[i].data);
}
pthread_mutex_unlock(&thread->lock);
return NULL;
}
static struct gc_background_thread*
gc_make_background_thread(void) {
struct gc_background_thread *thread;
thread = malloc(sizeof(*thread));
if (!thread)
GC_CRASH();
memset(thread, 0, sizeof(*thread));
thread->tasks = NULL;
thread->count = 0;
thread->capacity = 0;
thread->state = GC_BACKGROUND_THREAD_STARTING;
pthread_mutex_init(&thread->lock, NULL);
pthread_cond_init(&thread->cond, NULL);
if (pthread_create(&thread->thread, NULL, gc_background_thread, thread)) {
perror("spawning background thread failed");
GC_CRASH();
}
return thread;
}
static void
gc_background_thread_start(struct gc_background_thread *thread) {
pthread_mutex_lock(&thread->lock);
GC_ASSERT_EQ(thread->state, GC_BACKGROUND_THREAD_STARTING);
thread->state = GC_BACKGROUND_THREAD_RUNNING;
pthread_mutex_unlock(&thread->lock);
pthread_cond_signal(&thread->cond);
}
static int
gc_background_thread_add_task(struct gc_background_thread *thread,
int priority, void (*run)(void *data),
void *data) {
pthread_mutex_lock(&thread->lock);
if (thread->count == thread->capacity) {
size_t new_capacity = thread->capacity * 2 + 1;
struct gc_background_task *new_tasks =
realloc(thread->tasks, sizeof(struct gc_background_task) * new_capacity);
if (!new_tasks) {
perror("ran out of space for background tasks!");
GC_CRASH();
}
thread->capacity = new_capacity;
thread->tasks = new_tasks;
}
size_t insert = 0;
for (; insert < thread->count; insert++) {
if (priority < thread->tasks[insert].priority)
break;
}
size_t bytes_to_move =
(thread->count - insert) * sizeof(struct gc_background_task);
memmove(&thread->tasks[insert + 1], &thread->tasks[insert], bytes_to_move);
int id = thread->next_id++;
thread->tasks[insert].id = id;
thread->tasks[insert].priority = priority;
thread->tasks[insert].run = run;
thread->tasks[insert].data = data;
thread->count++;
pthread_mutex_unlock(&thread->lock);
return id;
}
static void
gc_background_thread_remove_task(struct gc_background_thread *thread,
int id) {
pthread_mutex_lock(&thread->lock);
size_t remove = 0;
for (; remove < thread->count; remove++) {
if (thread->tasks[remove].id == id)
break;
}
if (remove == thread->count)
GC_CRASH();
size_t bytes_to_move =
(thread->count - (remove + 1)) * sizeof(struct gc_background_task);
memmove(&thread->tasks[remove], &thread->tasks[remove + 1], bytes_to_move);
pthread_mutex_unlock(&thread->lock);
}
static void
gc_destroy_background_thread(struct gc_background_thread *thread) {
pthread_mutex_lock(&thread->lock);
GC_ASSERT(thread->state == GC_BACKGROUND_THREAD_RUNNING);
thread->state = GC_BACKGROUND_THREAD_STOPPING;
pthread_mutex_unlock(&thread->lock);
pthread_cond_signal(&thread->cond);
pthread_join(thread->thread, NULL);
free(thread->tasks);
free(thread);
}
#endif // BACKGROUND_THREAD_H

647
libguile/whippet/src/bdw.c Normal file
View file

@ -0,0 +1,647 @@
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gc-api.h"
#include "gc-ephemeron.h"
#include "gc-tracepoint.h"
#define GC_IMPL 1
#include "gc-internal.h"
#include "bdw-attrs.h"
#if GC_PRECISE_ROOTS
#error bdw-gc is a conservative collector
#endif
#if !GC_CONSERVATIVE_ROOTS
#error bdw-gc is a conservative collector
#endif
#if !GC_CONSERVATIVE_TRACE
#error bdw-gc is a conservative collector
#endif
// When pthreads are used, let `libgc' know about it and redirect
// allocation calls such as `GC_MALLOC ()' to (contention-free, faster)
// thread-local allocation.
#define GC_THREADS 1
#define GC_REDIRECT_TO_LOCAL 1
// Don't #define pthread routines to their GC_pthread counterparts.
// Instead we will be careful inside the benchmarks to use API to
// register threads with libgc.
#define GC_NO_THREAD_REDIRECTS 1
#include <gc/gc.h>
#include <gc/gc_inline.h> /* GC_generic_malloc_many */
#include <gc/gc_mark.h> /* GC_generic_malloc */
#define GC_INLINE_GRANULE_WORDS 2
#define GC_INLINE_GRANULE_BYTES (sizeof(void *) * GC_INLINE_GRANULE_WORDS)
/* A freelist set contains GC_INLINE_FREELIST_COUNT pointers to singly
linked lists of objects of different sizes, the ith one containing
objects i + 1 granules in size. This setting of
GC_INLINE_FREELIST_COUNT will hold freelists for allocations of
up to 256 bytes. */
#define GC_INLINE_FREELIST_COUNT (256U / GC_INLINE_GRANULE_BYTES)
struct gc_heap {
struct gc_heap *freelist; // see mark_heap
pthread_mutex_t lock;
struct gc_heap_roots *roots;
struct gc_mutator *mutators;
struct gc_event_listener event_listener;
struct gc_finalizer_state *finalizer_state;
gc_finalizer_callback have_finalizers;
void *event_listener_data;
};
struct gc_mutator {
void *freelists[GC_INLINE_FREELIST_COUNT];
void *pointerless_freelists[GC_INLINE_FREELIST_COUNT];
struct gc_heap *heap;
struct gc_mutator_roots *roots;
struct gc_mutator *next; // with heap lock
struct gc_mutator **prev; // with heap lock
void *event_listener_data;
};
struct gc_heap *__the_bdw_gc_heap;
#define HEAP_EVENT(event, ...) do { \
__the_bdw_gc_heap->event_listener.event(__the_bdw_gc_heap->event_listener_data, \
##__VA_ARGS__); \
GC_TRACEPOINT(event, ##__VA_ARGS__); \
} while (0)
#define MUTATOR_EVENT(mut, event, ...) do { \
__the_bdw_gc_heap->event_listener.event(mut->event_listener_data, \
##__VA_ARGS__); \
GC_TRACEPOINT(event, ##__VA_ARGS__); \
} while (0)
static inline size_t gc_inline_bytes_to_freelist_index(size_t bytes) {
return (bytes - 1U) / GC_INLINE_GRANULE_BYTES;
}
static inline size_t gc_inline_freelist_object_size(size_t idx) {
return (idx + 1U) * GC_INLINE_GRANULE_BYTES;
}
struct gc_heap* gc_mutator_heap(struct gc_mutator *mutator) {
return __the_bdw_gc_heap;
}
uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap) {
GC_CRASH();
}
uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap) {
GC_CRASH();
}
// The values of these must match the internal POINTERLESS and NORMAL
// definitions in libgc, for which unfortunately there are no external
// definitions. Alack.
enum gc_inline_kind {
GC_INLINE_KIND_POINTERLESS,
GC_INLINE_KIND_NORMAL
};
static inline void *
allocate_small(void **freelist, size_t idx, enum gc_inline_kind kind) {
void *head = *freelist;
if (!head) {
size_t bytes = gc_inline_freelist_object_size(idx);
GC_generic_malloc_many(bytes, kind, freelist);
head = *freelist;
if (GC_UNLIKELY (!head)) {
fprintf(stderr, "ran out of space, heap size %zu\n",
GC_get_heap_size());
GC_CRASH();
}
}
*freelist = *(void **)(head);
if (kind == GC_INLINE_KIND_POINTERLESS)
memset(head, 0, gc_inline_freelist_object_size(idx));
return head;
}
void* gc_allocate_slow(struct gc_mutator *mut, size_t size,
enum gc_allocation_kind kind) {
GC_ASSERT(size != 0);
if (size <= gc_allocator_large_threshold()) {
size_t idx = gc_inline_bytes_to_freelist_index(size);
void **freelists;
enum gc_inline_kind freelist_kind;
switch (kind) {
case GC_ALLOCATION_TAGGED:
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
return allocate_small(&mut->freelists[idx], idx, GC_INLINE_KIND_NORMAL);
case GC_ALLOCATION_TAGGED_POINTERLESS:
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
return allocate_small(&mut->pointerless_freelists[idx], idx,
GC_INLINE_KIND_POINTERLESS);
default:
GC_CRASH();
}
} else {
switch (kind) {
case GC_ALLOCATION_TAGGED:
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
return GC_malloc(size);
case GC_ALLOCATION_TAGGED_POINTERLESS:
case GC_ALLOCATION_UNTAGGED_POINTERLESS: {
void *ret = GC_malloc_atomic(size);
memset(ret, 0, size);
return ret;
}
default:
GC_CRASH();
}
}
}
void gc_pin_object(struct gc_mutator *mut, struct gc_ref ref) {
// Nothing to do.
}
void gc_collect(struct gc_mutator *mut,
enum gc_collection_kind requested_kind) {
switch (requested_kind) {
case GC_COLLECTION_MINOR:
GC_collect_a_little();
break;
case GC_COLLECTION_ANY:
case GC_COLLECTION_MAJOR:
GC_gcollect();
break;
case GC_COLLECTION_COMPACTING:
GC_gcollect_and_unmap();
break;
default:
GC_CRASH();
}
}
int gc_object_is_old_generation_slow(struct gc_mutator *mut,
struct gc_ref obj) {
return 0;
}
void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj,
size_t obj_size, struct gc_edge edge,
struct gc_ref new_val) {
}
int* gc_safepoint_flag_loc(struct gc_mutator *mut) { GC_CRASH(); }
void gc_safepoint_slow(struct gc_mutator *mut) { GC_CRASH(); }
struct bdw_mark_state {
struct GC_ms_entry *mark_stack_ptr;
struct GC_ms_entry *mark_stack_limit;
};
static void bdw_mark_edge(struct gc_edge edge, struct gc_heap *heap,
void *visit_data) {
struct bdw_mark_state *state = visit_data;
uintptr_t addr = gc_ref_value(gc_edge_ref(edge));
state->mark_stack_ptr = GC_MARK_AND_PUSH ((void *) addr,
state->mark_stack_ptr,
state->mark_stack_limit,
NULL);
}
static int heap_gc_kind;
static int mutator_gc_kind;
static int ephemeron_gc_kind;
static int finalizer_gc_kind;
// In BDW-GC, we can't hook into the mark phase to call
// gc_trace_ephemerons_for_object, so the advertised ephemeron strategy
// doesn't really work. The primitives that we have are mark functions,
// which run during GC and can't allocate; finalizers, which run after
// GC and can allocate but can't add to the connectivity graph; and
// disappearing links, which are cleared at the end of marking, in the
// stop-the-world phase. It does not appear to be possible to implement
// ephemerons using these primitives. Instead fall back to weak-key
// tables.
struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) {
return GC_generic_malloc(gc_ephemeron_size(), ephemeron_gc_kind);
}
unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
return GC_get_gc_no();
}
void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
struct gc_ref key, struct gc_ref value) {
gc_ephemeron_init_internal(mut->heap, ephemeron, key, value);
if (GC_base((void*)gc_ref_value(key))) {
struct gc_ref *loc = gc_edge_loc(gc_ephemeron_key_edge(ephemeron));
GC_register_disappearing_link((void**)loc);
}
}
int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
// Pretend the key is traced, to avoid adding this ephemeron to the
// global table.
return 1;
}
struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) {
return GC_generic_malloc(gc_finalizer_size(), finalizer_gc_kind);
}
static void finalize_object(void *obj, void *data) {
struct gc_finalizer *f = data;
gc_finalizer_externally_fired(__the_bdw_gc_heap->finalizer_state, f);
}
void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
unsigned priority, struct gc_ref object,
struct gc_ref closure) {
// Don't bother much about the actual finalizer; just delegate to BDW-GC.
GC_finalization_proc prev = NULL;
void *prev_data = NULL;
gc_finalizer_init_internal(finalizer, object, closure);
gc_finalizer_externally_activated(finalizer);
GC_register_finalizer_no_order(gc_ref_heap_object(object), finalize_object,
finalizer, &prev, &prev_data);
// FIXME: Allow multiple finalizers per object.
GC_ASSERT(prev == NULL);
GC_ASSERT(prev_data == NULL);
}
struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) {
GC_invoke_finalizers();
return gc_finalizer_state_pop(mut->heap->finalizer_state);
}
void gc_set_finalizer_callback(struct gc_heap *heap,
gc_finalizer_callback callback) {
heap->have_finalizers = callback;
}
static void have_finalizers(void) {
struct gc_heap *heap = __the_bdw_gc_heap;
if (heap->have_finalizers)
heap->have_finalizers(heap, 1);
}
static struct GC_ms_entry *
mark_ephemeron(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
struct GC_ms_entry *mark_stack_limit, GC_word env) {
struct bdw_mark_state state = {
mark_stack_ptr,
mark_stack_limit,
};
struct gc_ephemeron *ephemeron = (struct gc_ephemeron*) addr;
// If this ephemeron is on a freelist, its first word will be a
// freelist link and everything else will be NULL.
if (!gc_ref_value(gc_edge_ref(gc_ephemeron_value_edge(ephemeron)))) {
bdw_mark_edge(gc_edge(addr), NULL, &state);
return state.mark_stack_ptr;
}
if (!gc_ref_value(gc_edge_ref(gc_ephemeron_key_edge(ephemeron)))) {
// If the key died in a previous collection, the disappearing link
// will have been cleared. Mark the ephemeron as dead.
gc_ephemeron_mark_dead(ephemeron);
}
gc_trace_ephemeron(ephemeron, bdw_mark_edge, NULL, &state);
return state.mark_stack_ptr;
}
static struct GC_ms_entry *
mark_finalizer(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
struct GC_ms_entry *mark_stack_limit, GC_word env) {
struct bdw_mark_state state = {
mark_stack_ptr,
mark_stack_limit,
};
struct gc_finalizer *finalizer = (struct gc_finalizer*) addr;
// If this ephemeron is on a freelist, its first word will be a
// freelist link and everything else will be NULL.
if (!gc_ref_value(gc_finalizer_object(finalizer))) {
bdw_mark_edge(gc_edge(addr), NULL, &state);
return state.mark_stack_ptr;
}
gc_trace_finalizer(finalizer, bdw_mark_edge, NULL, &state);
return state.mark_stack_ptr;
}
static struct GC_ms_entry *
mark_heap(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
struct GC_ms_entry *mark_stack_limit, GC_word env) {
struct bdw_mark_state state = {
mark_stack_ptr,
mark_stack_limit,
};
struct gc_heap *heap = (struct gc_heap*) addr;
// If this heap is on a freelist... well probably we are screwed, BDW
// isn't really made to do multiple heaps in a process. But still, in
// this case, the first word is the freelist and the rest are null.
if (heap->freelist) {
bdw_mark_edge(gc_edge(addr), NULL, &state);
return state.mark_stack_ptr;
}
if (heap->roots)
gc_trace_heap_roots(heap->roots, bdw_mark_edge, heap, &state);
gc_visit_finalizer_roots(heap->finalizer_state, bdw_mark_edge, heap, &state);
state.mark_stack_ptr = GC_MARK_AND_PUSH (heap->mutators,
state.mark_stack_ptr,
state.mark_stack_limit,
NULL);
return state.mark_stack_ptr;
}
static struct GC_ms_entry *
mark_mutator(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
struct GC_ms_entry *mark_stack_limit, GC_word env) {
struct bdw_mark_state state = {
mark_stack_ptr,
mark_stack_limit,
};
struct gc_mutator *mut = (struct gc_mutator*) addr;
// If this mutator is on a freelist, its first word will be a
// freelist link and everything else will be NULL.
if (!mut->heap) {
bdw_mark_edge(gc_edge(addr), NULL, &state);
return state.mark_stack_ptr;
}
for (int i = 0; i < GC_INLINE_FREELIST_COUNT; i++)
state.mark_stack_ptr = GC_MARK_AND_PUSH (mut->freelists[i],
state.mark_stack_ptr,
state.mark_stack_limit,
NULL);
for (int i = 0; i < GC_INLINE_FREELIST_COUNT; i++)
for (void *head = mut->pointerless_freelists[i]; head; head = *(void**)head)
state.mark_stack_ptr = GC_MARK_AND_PUSH (head,
state.mark_stack_ptr,
state.mark_stack_limit,
NULL);
if (mut->roots)
gc_trace_mutator_roots(mut->roots, bdw_mark_edge, mut->heap, &state);
state.mark_stack_ptr = GC_MARK_AND_PUSH (mut->next,
state.mark_stack_ptr,
state.mark_stack_limit,
NULL);
return state.mark_stack_ptr;
}
static inline struct gc_mutator *add_mutator(struct gc_heap *heap) {
struct gc_mutator *ret =
GC_generic_malloc(sizeof(struct gc_mutator), mutator_gc_kind);
ret->heap = heap;
ret->event_listener_data =
heap->event_listener.mutator_added(heap->event_listener_data);
pthread_mutex_lock(&heap->lock);
ret->next = heap->mutators;
ret->prev = &heap->mutators;
if (ret->next)
ret->next->prev = &ret->next;
heap->mutators = ret;
pthread_mutex_unlock(&heap->lock);
return ret;
}
struct gc_options {
struct gc_common_options common;
};
int gc_option_from_string(const char *str) {
return gc_common_option_from_string(str);
}
struct gc_options* gc_allocate_options(void) {
struct gc_options *ret = malloc(sizeof(struct gc_options));
gc_init_common_options(&ret->common);
return ret;
}
int gc_options_set_int(struct gc_options *options, int option, int value) {
return gc_common_options_set_int(&options->common, option, value);
}
int gc_options_set_size(struct gc_options *options, int option,
size_t value) {
return gc_common_options_set_size(&options->common, option, value);
}
int gc_options_set_double(struct gc_options *options, int option,
double value) {
return gc_common_options_set_double(&options->common, option, value);
}
int gc_options_parse_and_set(struct gc_options *options, int option,
const char *value) {
return gc_common_options_parse_and_set(&options->common, option, value);
}
struct gc_pending_ephemerons *
gc_heap_pending_ephemerons(struct gc_heap *heap) {
GC_CRASH();
return NULL;
}
static void on_collection_event(GC_EventType event) {
switch (event) {
case GC_EVENT_START: {
HEAP_EVENT(requesting_stop);
HEAP_EVENT(waiting_for_stop);
break;
}
case GC_EVENT_MARK_START:
HEAP_EVENT(mutators_stopped);
HEAP_EVENT(prepare_gc, GC_COLLECTION_MAJOR);
break;
case GC_EVENT_MARK_END:
HEAP_EVENT(roots_traced);
HEAP_EVENT(heap_traced);
break;
case GC_EVENT_RECLAIM_START:
break;
case GC_EVENT_RECLAIM_END:
// Sloppily attribute finalizers and eager reclamation to
// ephemerons.
HEAP_EVENT(ephemerons_traced);
HEAP_EVENT(live_data_size, GC_get_heap_size() - GC_get_free_bytes());
break;
case GC_EVENT_END:
HEAP_EVENT(restarting_mutators);
break;
case GC_EVENT_PRE_START_WORLD:
case GC_EVENT_POST_STOP_WORLD:
// Can't rely on these, as they are only fired when threads are
// enabled.
break;
case GC_EVENT_THREAD_SUSPENDED:
case GC_EVENT_THREAD_UNSUSPENDED:
// No nice way to map back to the mutator.
break;
default:
break;
}
}
static void on_heap_resize(GC_word size) {
HEAP_EVENT(heap_resized, size);
}
uint64_t gc_allocation_counter(struct gc_heap *heap) {
return GC_get_total_bytes();
}
int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
struct gc_heap **heap, struct gc_mutator **mutator,
struct gc_event_listener event_listener,
void *event_listener_data) {
// Root the heap, which will also cause all mutators to be marked.
GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_INLINE_GRANULE_BYTES);
GC_ASSERT_EQ(gc_allocator_large_threshold(),
GC_INLINE_FREELIST_COUNT * GC_INLINE_GRANULE_BYTES);
GC_ASSERT_EQ(__the_bdw_gc_heap, NULL);
if (!options) options = gc_allocate_options();
// Ignore stack base for main thread.
switch (options->common.heap_size_policy) {
case GC_HEAP_SIZE_FIXED:
GC_set_max_heap_size(options->common.heap_size);
break;
case GC_HEAP_SIZE_GROWABLE: {
if (options->common.maximum_heap_size)
GC_set_max_heap_size(options->common.maximum_heap_size);
// BDW uses a pretty weird heap-sizing heuristic:
//
// heap-size = live-data * (1 + (2 / GC_free_space_divisor))
// heap-size-multiplier = heap-size/live-data = 1 + 2/GC_free_space_divisor
// GC_free_space_divisor = 2/(heap-size-multiplier-1)
//
// (Assumption: your heap is mostly "composite", i.e. not
// "atomic". See bdw's alloc.c:min_bytes_allocd.)
double fsd = 2.0/(options->common.heap_size_multiplier - 1);
// But, the divisor is an integer. WTF. This caps the effective
// maximum heap multiplier at 3. Oh well.
GC_set_free_space_divisor(fsd + 0.51);
break;
}
case GC_HEAP_SIZE_ADAPTIVE:
default:
fprintf(stderr, "adaptive heap sizing unsupported by bdw-gc\n");
return 0;
}
GC_set_all_interior_pointers (0);
GC_set_finalize_on_demand (1);
GC_set_finalizer_notifier(have_finalizers);
// Not part of 7.3, sigh. Have to set an env var.
// GC_set_markers_count(options->common.parallelism);
char markers[21] = {0,}; // 21 bytes enough for 2**64 in decimal + NUL.
snprintf(markers, sizeof(markers), "%d", options->common.parallelism);
setenv("GC_MARKERS", markers, 1);
GC_init();
size_t current_heap_size = GC_get_heap_size();
if (options->common.heap_size > current_heap_size)
GC_expand_hp(options->common.heap_size - current_heap_size);
GC_allow_register_threads();
{
int add_size_to_descriptor = 0;
int clear_memory = 1;
heap_gc_kind = GC_new_kind(GC_new_free_list(),
GC_MAKE_PROC(GC_new_proc(mark_heap), 0),
add_size_to_descriptor, clear_memory);
mutator_gc_kind = GC_new_kind(GC_new_free_list(),
GC_MAKE_PROC(GC_new_proc(mark_mutator), 0),
add_size_to_descriptor, clear_memory);
ephemeron_gc_kind = GC_new_kind(GC_new_free_list(),
GC_MAKE_PROC(GC_new_proc(mark_ephemeron), 0),
add_size_to_descriptor, clear_memory);
finalizer_gc_kind = GC_new_kind(GC_new_free_list(),
GC_MAKE_PROC(GC_new_proc(mark_finalizer), 0),
add_size_to_descriptor, clear_memory);
}
*heap = GC_generic_malloc(sizeof(struct gc_heap), heap_gc_kind);
pthread_mutex_init(&(*heap)->lock, NULL);
(*heap)->event_listener = event_listener;
(*heap)->event_listener_data = event_listener_data;
(*heap)->finalizer_state = gc_make_finalizer_state();
__the_bdw_gc_heap = *heap;
HEAP_EVENT(init, GC_get_heap_size());
GC_set_on_collection_event(on_collection_event);
GC_set_on_heap_resize(on_heap_resize);
*mutator = add_mutator(*heap);
// Sanity check.
if (!GC_is_visible (&__the_bdw_gc_heap))
abort ();
return 1;
}
struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base,
struct gc_heap *heap) {
struct GC_stack_base base = { stack_base };
GC_register_my_thread(&base);
return add_mutator(heap);
}
void gc_finish_for_thread(struct gc_mutator *mut) {
pthread_mutex_lock(&mut->heap->lock);
MUTATOR_EVENT(mut, mutator_removed);
*mut->prev = mut->next;
if (mut->next)
mut->next->prev = mut->prev;
pthread_mutex_unlock(&mut->heap->lock);
GC_unregister_my_thread();
}
void* gc_call_without_gc(struct gc_mutator *mut,
void* (*f)(void*),
void *data) {
return GC_do_blocking(f, data);
}
void gc_mutator_set_roots(struct gc_mutator *mut,
struct gc_mutator_roots *roots) {
mut->roots = roots;
}
void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
heap->roots = roots;
}
void gc_heap_set_extern_space(struct gc_heap *heap,
struct gc_extern_space *space) {
}

View file

@ -0,0 +1,979 @@
#ifndef COPY_SPACE_H
#define COPY_SPACE_H
#include <pthread.h>
#include <stdlib.h>
#include "gc-api.h"
#define GC_IMPL 1
#include "gc-internal.h"
#include "assert.h"
#include "background-thread.h"
#include "debug.h"
#include "extents.h"
#include "gc-align.h"
#include "gc-attrs.h"
#include "gc-inline.h"
#include "gc-lock.h"
#include "gc-platform.h"
#include "spin.h"
// A copy space: a block-structured space that traces via evacuation.
#define COPY_SPACE_SLAB_SIZE (64 * 1024 * 1024)
#define COPY_SPACE_REGION_SIZE (64 * 1024)
#define COPY_SPACE_BLOCK_SIZE (2 * COPY_SPACE_REGION_SIZE)
#define COPY_SPACE_BLOCKS_PER_SLAB \
(COPY_SPACE_SLAB_SIZE / COPY_SPACE_BLOCK_SIZE)
#define COPY_SPACE_HEADER_BYTES_PER_BLOCK \
(COPY_SPACE_BLOCK_SIZE / COPY_SPACE_BLOCKS_PER_SLAB)
#define COPY_SPACE_HEADER_BLOCKS_PER_SLAB 1
#define COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB \
(COPY_SPACE_BLOCKS_PER_SLAB - COPY_SPACE_HEADER_BLOCKS_PER_SLAB)
#define COPY_SPACE_HEADER_BYTES_PER_SLAB \
(COPY_SPACE_HEADER_BYTES_PER_BLOCK * COPY_SPACE_HEADER_BLOCKS_PER_SLAB)
struct copy_space_slab;
struct copy_space_slab_header {
union {
struct {
struct copy_space_slab *next;
struct copy_space_slab *prev;
unsigned incore_block_count;
};
uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_SLAB];
};
};
STATIC_ASSERT_EQ(sizeof(struct copy_space_slab_header),
COPY_SPACE_HEADER_BYTES_PER_SLAB);
// Really just the block header.
struct copy_space_block {
union {
struct {
struct copy_space_block *next;
uint8_t in_core;
uint8_t all_zeroes[2];
uint8_t is_survivor[2];
size_t allocated; // For partly-empty blocks.
};
uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_BLOCK];
};
};
STATIC_ASSERT_EQ(sizeof(struct copy_space_block),
COPY_SPACE_HEADER_BYTES_PER_BLOCK);
struct copy_space_region {
char data[COPY_SPACE_REGION_SIZE];
};
struct copy_space_block_payload {
struct copy_space_region regions[2];
};
struct copy_space_slab {
struct copy_space_slab_header header;
struct copy_space_block headers[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB];
struct copy_space_block_payload blocks[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB];
};
STATIC_ASSERT_EQ(sizeof(struct copy_space_slab), COPY_SPACE_SLAB_SIZE);
static inline struct copy_space_block*
copy_space_block_for_addr(uintptr_t addr) {
uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE);
struct copy_space_slab *slab = (struct copy_space_slab*) base;
uintptr_t block_idx =
(addr / COPY_SPACE_BLOCK_SIZE) % COPY_SPACE_BLOCKS_PER_SLAB;
return &slab->headers[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB];
}
static inline struct copy_space_block*
copy_space_block_header(struct copy_space_block_payload *payload) {
return copy_space_block_for_addr((uintptr_t) payload);
}
static inline struct copy_space_block_payload*
copy_space_block_payload(struct copy_space_block *block) {
uintptr_t addr = (uintptr_t) block;
uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE);
struct copy_space_slab *slab = (struct copy_space_slab*) base;
uintptr_t block_idx =
(addr / COPY_SPACE_HEADER_BYTES_PER_BLOCK) % COPY_SPACE_BLOCKS_PER_SLAB;
return &slab->blocks[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB];
}
static uint8_t
copy_space_object_region(struct gc_ref obj) {
return (gc_ref_value(obj) / COPY_SPACE_REGION_SIZE) & 1;
}
#define COPY_SPACE_PAGE_OUT_QUEUE_SIZE 4
struct copy_space_block_list {
struct copy_space_block *head;
};
struct copy_space_block_stack {
struct copy_space_block_list list;
};
enum copy_space_flags {
COPY_SPACE_ATOMIC_FORWARDING = 1,
COPY_SPACE_ALIGNED = 2,
COPY_SPACE_HAS_FIELD_LOGGING_BITS = 4,
};
struct copy_space {
pthread_mutex_t lock;
struct copy_space_block_stack empty;
struct copy_space_block_stack partly_full;
struct copy_space_block_list full ALIGNED_TO_AVOID_FALSE_SHARING;
size_t allocated_bytes;
size_t fragmentation;
struct copy_space_block_stack paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE]
ALIGNED_TO_AVOID_FALSE_SHARING;
ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING;
// The rest of these members are only changed rarely and with the heap
// lock.
uint8_t active_region ALIGNED_TO_AVOID_FALSE_SHARING;
uint8_t atomic_forward;
uint8_t in_gc;
uint32_t flags;
size_t allocated_bytes_at_last_gc;
size_t fragmentation_at_last_gc;
struct extents *extents;
struct copy_space_slab **slabs;
size_t nslabs;
};
enum copy_space_forward_result {
// We went to forward an edge, but the target was already forwarded, so we
// just updated the edge.
COPY_SPACE_FORWARD_UPDATED,
// We went to forward an edge and evacuated the referent to a new location.
COPY_SPACE_FORWARD_EVACUATED,
// We went to forward an edge but failed to acquire memory for its new
// location.
COPY_SPACE_FORWARD_FAILED,
};
struct copy_space_allocator {
uintptr_t hp;
uintptr_t limit;
struct copy_space_block *block;
};
static struct gc_lock
copy_space_lock(struct copy_space *space) {
return gc_lock_acquire(&space->lock);
}
static void
copy_space_block_list_push(struct copy_space_block_list *list,
struct copy_space_block *block) {
struct copy_space_block *next =
atomic_load_explicit(&list->head, memory_order_acquire);
do {
block->next = next;
} while (!atomic_compare_exchange_weak(&list->head, &next, block));
}
static struct copy_space_block*
copy_space_block_list_pop(struct copy_space_block_list *list) {
struct copy_space_block *head =
atomic_load_explicit(&list->head, memory_order_acquire);
struct copy_space_block *next;
do {
if (!head)
return NULL;
} while (!atomic_compare_exchange_weak(&list->head, &head, head->next));
head->next = NULL;
return head;
}
static void
copy_space_block_stack_push(struct copy_space_block_stack *stack,
struct copy_space_block *block,
const struct gc_lock *lock) {
struct copy_space_block *next = stack->list.head;
block->next = next;
stack->list.head = block;
}
static struct copy_space_block*
copy_space_block_stack_pop(struct copy_space_block_stack *stack,
const struct gc_lock *lock) {
struct copy_space_block *head = stack->list.head;
if (head) {
stack->list.head = head->next;
head->next = NULL;
}
return head;
}
static struct copy_space_block*
copy_space_pop_empty_block(struct copy_space *space,
const struct gc_lock *lock) {
struct copy_space_block *ret = copy_space_block_stack_pop(&space->empty,
lock);
if (ret) {
ret->allocated = 0;
ret->is_survivor[space->active_region] = 0;
}
return ret;
}
static void
copy_space_push_empty_block(struct copy_space *space,
struct copy_space_block *block,
const struct gc_lock *lock) {
copy_space_block_stack_push(&space->empty, block, lock);
}
static struct copy_space_block*
copy_space_pop_full_block(struct copy_space *space) {
return copy_space_block_list_pop(&space->full);
}
static void
copy_space_push_full_block(struct copy_space *space,
struct copy_space_block *block) {
if (space->in_gc)
block->is_survivor[space->active_region] = 1;
copy_space_block_list_push(&space->full, block);
}
static struct copy_space_block*
copy_space_pop_partly_full_block(struct copy_space *space,
const struct gc_lock *lock) {
return copy_space_block_stack_pop(&space->partly_full, lock);
}
static void
copy_space_push_partly_full_block(struct copy_space *space,
struct copy_space_block *block,
const struct gc_lock *lock) {
copy_space_block_stack_push(&space->partly_full, block, lock);
}
static void
copy_space_page_out_block(struct copy_space *space,
struct copy_space_block *block,
const struct gc_lock *lock) {
copy_space_block_stack_push
(block->in_core
? &space->paged_out[0]
: &space->paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE-1],
block,
lock);
}
static struct copy_space_block*
copy_space_page_in_block(struct copy_space *space,
const struct gc_lock *lock) {
for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) {
struct copy_space_block *block =
copy_space_block_stack_pop(&space->paged_out[age], lock);
if (block) return block;
}
return NULL;
}
static ssize_t
copy_space_request_release_memory(struct copy_space *space, size_t bytes) {
return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes;
}
static int
copy_space_page_out_blocks_until_memory_released(struct copy_space *space) {
ssize_t pending = atomic_load(&space->bytes_to_page_out);
struct gc_lock lock = copy_space_lock(space);
while (pending > 0) {
struct copy_space_block *block = copy_space_pop_empty_block(space, &lock);
if (!block) break;
copy_space_page_out_block(space, block, &lock);
pending = (atomic_fetch_sub(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE)
- COPY_SPACE_BLOCK_SIZE);
}
gc_lock_release(&lock);
return pending <= 0;
}
static ssize_t
copy_space_maybe_reacquire_memory(struct copy_space *space, size_t bytes) {
ssize_t pending =
atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes;
struct gc_lock lock = copy_space_lock(space);
while (pending + COPY_SPACE_BLOCK_SIZE <= 0) {
struct copy_space_block *block = copy_space_page_in_block(space, &lock);
if (!block) break;
copy_space_push_empty_block(space, block, &lock);
pending = (atomic_fetch_add(&space->bytes_to_page_out,
COPY_SPACE_BLOCK_SIZE)
+ COPY_SPACE_BLOCK_SIZE);
}
gc_lock_release(&lock);
return pending;
}
static void
copy_space_reacquire_memory(struct copy_space *space, size_t bytes) {
ssize_t pending = copy_space_maybe_reacquire_memory(space, bytes);
GC_ASSERT(pending + COPY_SPACE_BLOCK_SIZE > 0);
}
static inline int
copy_space_contains_address(struct copy_space *space, uintptr_t addr) {
return extents_contain_addr(space->extents, addr);
}
static inline int
copy_space_contains(struct copy_space *space, struct gc_ref ref) {
return copy_space_contains_address(space, gc_ref_value(ref));
}
static int
copy_space_has_field_logging_bits(struct copy_space *space) {
return space->flags & COPY_SPACE_HAS_FIELD_LOGGING_BITS;
}
static size_t
copy_space_field_logging_blocks(struct copy_space *space) {
if (!copy_space_has_field_logging_bits(space))
return 0;
size_t bytes = COPY_SPACE_SLAB_SIZE / sizeof (uintptr_t) / 8;
size_t blocks =
align_up(bytes, COPY_SPACE_BLOCK_SIZE) / COPY_SPACE_BLOCK_SIZE;
return blocks;
}
static uint8_t*
copy_space_field_logged_byte(struct gc_edge edge) {
uintptr_t addr = gc_edge_address(edge);
uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE);
base += offsetof(struct copy_space_slab, blocks);
uintptr_t field = (addr & (COPY_SPACE_SLAB_SIZE - 1)) / sizeof(uintptr_t);
uintptr_t byte = field / 8;
return (uint8_t*) (base + byte);
}
static uint8_t
copy_space_field_logged_bit(struct gc_edge edge) {
// Each byte has 8 bytes, covering 8 fields.
size_t field = gc_edge_address(edge) / sizeof(uintptr_t);
return 1 << (field % 8);
}
static void
copy_space_clear_field_logged_bits_for_region(struct copy_space *space,
void *region_base) {
uintptr_t addr = (uintptr_t)region_base;
GC_ASSERT_EQ(addr, align_down(addr, COPY_SPACE_REGION_SIZE));
GC_ASSERT(copy_space_contains_address(space, addr));
if (copy_space_has_field_logging_bits(space))
memset(copy_space_field_logged_byte(gc_edge(region_base)),
0,
COPY_SPACE_REGION_SIZE / sizeof(uintptr_t) / 8);
}
static void
copy_space_clear_field_logged_bits_for_block(struct copy_space *space,
struct copy_space_block *block) {
struct copy_space_block_payload *payload = copy_space_block_payload(block);
copy_space_clear_field_logged_bits_for_region(space, &payload->regions[0]);
copy_space_clear_field_logged_bits_for_region(space, &payload->regions[1]);
}
static inline void
copy_space_allocator_set_block(struct copy_space_allocator *alloc,
struct copy_space_block *block,
int active_region) {
struct copy_space_block_payload *payload = copy_space_block_payload(block);
struct copy_space_region *region = &payload->regions[active_region];
alloc->block = block;
alloc->hp = (uintptr_t)&region[0];
alloc->limit = (uintptr_t)&region[1];
}
static inline int
copy_space_allocator_acquire_block(struct copy_space_allocator *alloc,
struct copy_space_block *block,
int active_region) {
if (block) {
copy_space_allocator_set_block(alloc, block, active_region);
return 1;
}
return 0;
}
static int
copy_space_allocator_acquire_empty_block(struct copy_space_allocator *alloc,
struct copy_space *space) {
struct gc_lock lock = copy_space_lock(space);
struct copy_space_block *block = copy_space_pop_empty_block(space, &lock);
gc_lock_release(&lock);
if (copy_space_allocator_acquire_block(alloc, block, space->active_region)) {
block->in_core = 1;
if (block->all_zeroes[space->active_region]) {
block->all_zeroes[space->active_region] = 0;
} else {
memset((char*)alloc->hp, 0, COPY_SPACE_REGION_SIZE);
copy_space_clear_field_logged_bits_for_region(space, (void*)alloc->hp);
}
return 1;
}
return 0;
}
static int
copy_space_allocator_acquire_partly_full_block(struct copy_space_allocator *alloc,
struct copy_space *space) {
struct gc_lock lock = copy_space_lock(space);
struct copy_space_block *block = copy_space_pop_partly_full_block(space,
&lock);
gc_lock_release(&lock);
if (copy_space_allocator_acquire_block(alloc, block, space->active_region)) {
alloc->hp += block->allocated;
return 1;
}
return 0;
}
static void
copy_space_allocator_release_full_block(struct copy_space_allocator *alloc,
struct copy_space *space) {
size_t fragmentation = alloc->limit - alloc->hp;
size_t allocated = COPY_SPACE_REGION_SIZE - alloc->block->allocated;
atomic_fetch_add_explicit(&space->allocated_bytes, allocated,
memory_order_relaxed);
if (fragmentation)
atomic_fetch_add_explicit(&space->fragmentation, fragmentation,
memory_order_relaxed);
copy_space_push_full_block(space, alloc->block);
alloc->hp = alloc->limit = 0;
alloc->block = NULL;
}
static void
copy_space_allocator_release_partly_full_block(struct copy_space_allocator *alloc,
struct copy_space *space) {
size_t allocated = alloc->hp & (COPY_SPACE_REGION_SIZE - 1);
if (allocated) {
atomic_fetch_add_explicit(&space->allocated_bytes,
allocated - alloc->block->allocated,
memory_order_relaxed);
alloc->block->allocated = allocated;
struct gc_lock lock = copy_space_lock(space);
copy_space_push_partly_full_block(space, alloc->block, &lock);
gc_lock_release(&lock);
} else {
// In this case, hp was bumped all the way to the limit, in which
// case allocated wraps to 0; the block is full.
atomic_fetch_add_explicit(&space->allocated_bytes,
COPY_SPACE_REGION_SIZE - alloc->block->allocated,
memory_order_relaxed);
copy_space_push_full_block(space, alloc->block);
}
alloc->hp = alloc->limit = 0;
alloc->block = NULL;
}
static inline struct gc_ref
copy_space_allocate(struct copy_space_allocator *alloc,
struct copy_space *space,
size_t size) {
GC_ASSERT(size > 0);
GC_ASSERT(size <= gc_allocator_large_threshold());
size = align_up(size, gc_allocator_small_granule_size());
if (alloc->hp + size <= alloc->limit)
goto done;
if (alloc->block)
copy_space_allocator_release_full_block(alloc, space);
while (copy_space_allocator_acquire_partly_full_block(alloc, space)) {
if (alloc->hp + size <= alloc->limit)
goto done;
copy_space_allocator_release_full_block(alloc, space);
}
if (!copy_space_allocator_acquire_empty_block(alloc, space))
return gc_ref_null();
// The newly acquired block is empty and is therefore large enough for
// a small allocation.
done:
struct gc_ref ret = gc_ref(alloc->hp);
alloc->hp += size;
return ret;
}
static struct copy_space_block*
copy_space_append_block_lists(struct copy_space_block *head,
struct copy_space_block *tail) {
if (!head) return tail;
if (tail) {
struct copy_space_block *walk = head;
while (walk->next)
walk = walk->next;
walk->next = tail;
}
return head;
}
static void
copy_space_flip(struct copy_space *space) {
// Mutators stopped, can access nonatomically.
struct copy_space_block* flip = space->full.head;
flip = copy_space_append_block_lists(space->partly_full.list.head, flip);
flip = copy_space_append_block_lists(space->empty.list.head, flip);
space->empty.list.head = flip;
space->partly_full.list.head = NULL;
space->full.head = NULL;
space->allocated_bytes = 0;
space->fragmentation = 0;
space->active_region ^= 1;
space->in_gc = 1;
}
static inline void
copy_space_allocator_init(struct copy_space_allocator *alloc) {
memset(alloc, 0, sizeof(*alloc));
}
static inline void
copy_space_allocator_finish(struct copy_space_allocator *alloc,
struct copy_space *space) {
if (alloc->block)
copy_space_allocator_release_partly_full_block(alloc, space);
}
static void
copy_space_finish_gc(struct copy_space *space, int is_minor_gc) {
// Mutators stopped, can access nonatomically.
if (is_minor_gc) {
// Avoid mixing survivors and new objects on the same blocks.
struct copy_space_allocator alloc;
copy_space_allocator_init(&alloc);
while (copy_space_allocator_acquire_partly_full_block(&alloc, space))
copy_space_allocator_release_full_block(&alloc, space);
copy_space_allocator_finish(&alloc, space);
}
space->allocated_bytes_at_last_gc = space->allocated_bytes;
space->fragmentation_at_last_gc = space->fragmentation;
space->in_gc = 0;
}
static size_t
copy_space_can_allocate(struct copy_space *space, size_t bytes) {
// With lock!
size_t count = 0;
for (struct copy_space_block *empties = space->empty.list.head;
empties && count < bytes;
empties = empties->next) {
count += COPY_SPACE_REGION_SIZE;
}
return count;
}
static void
copy_space_add_to_allocation_counter(struct copy_space *space,
uint64_t *counter) {
*counter += space->allocated_bytes - space->allocated_bytes_at_last_gc;
}
static void
copy_space_gc_during_evacuation(void *data) {
// If space is really tight and reordering of objects during
// evacuation resulted in more end-of-block fragmentation and thus
// block use than before collection started, we can actually run out
// of memory while collecting. We should probably attempt to expand
// the heap here, at least by a single block; it's better than the
// alternatives.
fprintf(stderr, "Out of memory\n");
GC_CRASH();
}
static inline enum copy_space_forward_result
copy_space_forward_atomic(struct copy_space *space, struct gc_edge edge,
struct gc_ref old_ref,
struct copy_space_allocator *alloc) {
struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref);
retry:
if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED)
gc_atomic_forward_acquire(&fwd);
switch (fwd.state) {
case GC_FORWARDING_STATE_NOT_FORWARDED:
default:
// Impossible.
GC_CRASH();
case GC_FORWARDING_STATE_ACQUIRED: {
// We claimed the object successfully; evacuating is up to us.
size_t bytes = gc_atomic_forward_object_size(&fwd);
struct gc_ref new_ref = copy_space_allocate(alloc, space, bytes);
if (gc_ref_is_null(new_ref)) {
gc_atomic_forward_abort(&fwd);
return COPY_SPACE_FORWARD_FAILED;
}
// Copy object contents before committing, as we don't know what
// part of the object (if any) will be overwritten by the
// commit.
memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), bytes);
gc_atomic_forward_commit(&fwd, new_ref);
gc_edge_update(edge, new_ref);
return COPY_SPACE_FORWARD_EVACUATED;
}
case GC_FORWARDING_STATE_BUSY:
// Someone else claimed this object first. Spin until new address
// known, or evacuation aborts.
for (size_t spin_count = 0;; spin_count++) {
if (gc_atomic_forward_retry_busy(&fwd))
goto retry;
yield_for_spin(spin_count);
}
GC_CRASH(); // Unreachable.
case GC_FORWARDING_STATE_FORWARDED:
// The object has been evacuated already. Update the edge;
// whoever forwarded the object will make sure it's eventually
// traced.
gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
return COPY_SPACE_FORWARD_UPDATED;
}
}
static int
copy_space_forward_if_traced_atomic(struct copy_space *space,
struct gc_edge edge,
struct gc_ref old_ref) {
struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref);
retry:
switch (fwd.state) {
case GC_FORWARDING_STATE_NOT_FORWARDED:
return 0;
case GC_FORWARDING_STATE_BUSY:
// Someone else claimed this object first. Spin until new address
// known.
for (size_t spin_count = 0;; spin_count++) {
if (gc_atomic_forward_retry_busy(&fwd))
goto retry;
yield_for_spin(spin_count);
}
GC_CRASH(); // Unreachable.
case GC_FORWARDING_STATE_FORWARDED:
gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
return 1;
default:
GC_CRASH();
}
}
static inline enum copy_space_forward_result
copy_space_forward_nonatomic(struct copy_space *space, struct gc_edge edge,
struct gc_ref old_ref,
struct copy_space_allocator *alloc) {
uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref);
if (forwarded) {
gc_edge_update(edge, gc_ref(forwarded));
return COPY_SPACE_FORWARD_UPDATED;
} else {
size_t size;
gc_trace_object(old_ref, NULL, NULL, NULL, &size);
struct gc_ref new_ref = copy_space_allocate(alloc, space, size);
if (gc_ref_is_null(new_ref))
return COPY_SPACE_FORWARD_FAILED;
memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), size);
gc_object_forward_nonatomic(old_ref, new_ref);
gc_edge_update(edge, new_ref);
return COPY_SPACE_FORWARD_EVACUATED;
}
}
static int
copy_space_forward_if_traced_nonatomic(struct copy_space *space,
struct gc_edge edge,
struct gc_ref old_ref) {
uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref);
if (forwarded) {
gc_edge_update(edge, gc_ref(forwarded));
return 1;
}
return 0;
}
static inline enum copy_space_forward_result
copy_space_forward(struct copy_space *src_space, struct copy_space *dst_space,
struct gc_edge edge,
struct gc_ref old_ref,
struct copy_space_allocator *dst_alloc) {
GC_ASSERT(copy_space_contains(src_space, old_ref));
GC_ASSERT(src_space != dst_space
|| copy_space_object_region(old_ref) != src_space->active_region);
if (GC_PARALLEL && src_space->atomic_forward)
return copy_space_forward_atomic(dst_space, edge, old_ref, dst_alloc);
return copy_space_forward_nonatomic(dst_space, edge, old_ref, dst_alloc);
}
static inline int
copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge,
struct gc_ref old_ref) {
GC_ASSERT(copy_space_contains(space, old_ref));
GC_ASSERT(copy_space_object_region(old_ref) != space->active_region);
if (GC_PARALLEL && space->atomic_forward)
return copy_space_forward_if_traced_atomic(space, edge, old_ref);
return copy_space_forward_if_traced_nonatomic(space, edge, old_ref);
}
static int
copy_space_is_aligned(struct copy_space *space) {
return space->flags & COPY_SPACE_ALIGNED;
}
static int
copy_space_fixed_size(struct copy_space *space) {
// If the extent is aligned, it is fixed.
return copy_space_is_aligned(space);
}
static inline uintptr_t
copy_space_low_aligned_address(struct copy_space *space) {
GC_ASSERT(copy_space_is_aligned(space));
GC_ASSERT_EQ(space->extents->size, 1);
return space->extents->ranges[0].lo_addr;
}
static inline uintptr_t
copy_space_high_aligned_address(struct copy_space *space) {
GC_ASSERT(copy_space_is_aligned(space));
GC_ASSERT_EQ(space->extents->size, 1);
return space->extents->ranges[0].hi_addr;
}
static inline int
copy_space_contains_address_aligned(struct copy_space *space, uintptr_t addr) {
uintptr_t low_addr = copy_space_low_aligned_address(space);
uintptr_t high_addr = copy_space_high_aligned_address(space);
uintptr_t size = high_addr - low_addr;
return (addr - low_addr) < size;
}
static inline int
copy_space_contains_edge_aligned(struct copy_space *space,
struct gc_edge edge) {
return copy_space_contains_address_aligned(space, gc_edge_address(edge));
}
static inline int
copy_space_should_promote(struct copy_space *space, struct gc_ref ref) {
GC_ASSERT(copy_space_contains(space, ref));
uintptr_t addr = gc_ref_value(ref);
struct copy_space_block *block = copy_space_block_for_addr(gc_ref_value(ref));
GC_ASSERT_EQ(copy_space_object_region(ref), space->active_region ^ 1);
return block->is_survivor[space->active_region ^ 1];
}
static int
copy_space_contains_edge(struct copy_space *space, struct gc_edge edge) {
return copy_space_contains_address(space, gc_edge_address(edge));
}
static int
copy_space_remember_edge(struct copy_space *space, struct gc_edge edge) {
GC_ASSERT(copy_space_contains_edge(space, edge));
uint8_t* loc = copy_space_field_logged_byte(edge);
uint8_t bit = copy_space_field_logged_bit(edge);
uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
do {
if (byte & bit) return 0;
} while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit,
memory_order_acq_rel,
memory_order_acquire));
return 1;
}
static int
copy_space_forget_edge(struct copy_space *space, struct gc_edge edge) {
GC_ASSERT(copy_space_contains_edge(space, edge));
uint8_t* loc = copy_space_field_logged_byte(edge);
uint8_t bit = copy_space_field_logged_bit(edge);
uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
do {
if (!(byte & bit)) return 0;
} while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte&~bit,
memory_order_acq_rel,
memory_order_acquire));
return 1;
}
static size_t copy_space_is_power_of_two(size_t n) {
GC_ASSERT(n != 0);
return (n & (n - 1)) == 0;
}
static size_t copy_space_round_up_power_of_two(size_t n) {
if (copy_space_is_power_of_two(n))
return n;
return 1ULL << (sizeof(size_t) * 8 - __builtin_clzll(n));
}
static struct copy_space_slab*
copy_space_allocate_slabs(size_t nslabs, uint32_t flags) {
size_t size = nslabs * COPY_SPACE_SLAB_SIZE;
size_t alignment = COPY_SPACE_SLAB_SIZE;
if (flags & COPY_SPACE_ALIGNED) {
GC_ASSERT(copy_space_is_power_of_two(size));
alignment = size;
}
return gc_platform_acquire_memory(size, alignment);
}
static void
copy_space_add_slabs(struct copy_space *space, struct copy_space_slab *slabs,
size_t nslabs) {
size_t old_size = space->nslabs * sizeof(struct copy_space_slab*);
size_t additional_size = nslabs * sizeof(struct copy_space_slab*);
space->extents = extents_adjoin(space->extents, slabs,
nslabs * sizeof(struct copy_space_slab));
space->slabs = realloc(space->slabs, old_size + additional_size);
if (!space->slabs)
GC_CRASH();
while (nslabs--)
space->slabs[space->nslabs++] = slabs++;
}
static void
copy_space_shrink(struct copy_space *space, size_t bytes) {
ssize_t pending = copy_space_request_release_memory(space, bytes);
copy_space_page_out_blocks_until_memory_released(space);
// It still may be the case we need to page out more blocks. Only collection
// can help us then!
}
static size_t
copy_space_first_payload_block(struct copy_space *space) {
return copy_space_field_logging_blocks(space);
}
static void
copy_space_expand(struct copy_space *space, size_t bytes) {
GC_ASSERT(!copy_space_fixed_size(space));
ssize_t to_acquire = -copy_space_maybe_reacquire_memory(space, bytes);
if (to_acquire <= 0) return;
size_t reserved = align_up(to_acquire, COPY_SPACE_SLAB_SIZE);
size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE;
struct copy_space_slab *slabs =
copy_space_allocate_slabs(nslabs, space->flags);
copy_space_add_slabs(space, slabs, nslabs);
struct gc_lock lock = copy_space_lock(space);
for (size_t slab = 0; slab < nslabs; slab++) {
for (size_t idx = copy_space_first_payload_block(space);
idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB;
idx++) {
struct copy_space_block *block = &slabs[slab].headers[idx];
block->all_zeroes[0] = block->all_zeroes[1] = 1;
block->in_core = 0;
copy_space_page_out_block(space, block, &lock);
reserved -= COPY_SPACE_BLOCK_SIZE;
}
}
gc_lock_release(&lock);
copy_space_reacquire_memory(space, 0);
}
static void
copy_space_advance_page_out_queue(void *data) {
struct copy_space *space = data;
struct gc_lock lock = copy_space_lock(space);
for (int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) {
while (1) {
struct copy_space_block *block =
copy_space_block_stack_pop(&space->paged_out[age], &lock);
if (!block) break;
copy_space_block_stack_push(&space->paged_out[age + 1], block, &lock);
}
}
gc_lock_release(&lock);
}
static void
copy_space_page_out_blocks(void *data) {
struct copy_space *space = data;
int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 2;
struct gc_lock lock = copy_space_lock(space);
while (1) {
struct copy_space_block *block =
copy_space_block_stack_pop(&space->paged_out[age], &lock);
if (!block) break;
block->in_core = 0;
block->all_zeroes[0] = block->all_zeroes[1] = 1;
gc_platform_discard_memory(copy_space_block_payload(block),
COPY_SPACE_BLOCK_SIZE);
copy_space_clear_field_logged_bits_for_block(space, block);
copy_space_block_stack_push(&space->paged_out[age + 1], block, &lock);
}
gc_lock_release(&lock);
}
static int
copy_space_init(struct copy_space *space, size_t size, uint32_t flags,
struct gc_background_thread *thread) {
size = align_up(size, COPY_SPACE_BLOCK_SIZE);
size_t reserved = align_up(size, COPY_SPACE_SLAB_SIZE);
if (flags & COPY_SPACE_ALIGNED)
reserved = copy_space_round_up_power_of_two(reserved);
size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE;
struct copy_space_slab *slabs = copy_space_allocate_slabs(nslabs, flags);
if (!slabs)
return 0;
pthread_mutex_init(&space->lock, NULL);
space->empty.list.head = NULL;
space->partly_full.list.head = NULL;
space->full.head = NULL;
for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++)
space->paged_out[age].list.head = NULL;
space->allocated_bytes = 0;
space->fragmentation = 0;
space->bytes_to_page_out = 0;
space->active_region = 0;
space->atomic_forward = flags & COPY_SPACE_ATOMIC_FORWARDING;
space->flags = flags;
space->allocated_bytes_at_last_gc = 0;
space->fragmentation_at_last_gc = 0;
space->extents = extents_allocate((flags & COPY_SPACE_ALIGNED) ? 1 : 10);
copy_space_add_slabs(space, slabs, nslabs);
struct gc_lock lock = copy_space_lock(space);
for (size_t slab = 0; slab < nslabs; slab++) {
for (size_t idx = copy_space_first_payload_block(space);
idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB;
idx++) {
struct copy_space_block *block = &slabs[slab].headers[idx];
block->all_zeroes[0] = block->all_zeroes[1] = 1;
block->in_core = 0;
block->is_survivor[0] = block->is_survivor[1] = 0;
if (reserved > size) {
copy_space_page_out_block(space, block, &lock);
reserved -= COPY_SPACE_BLOCK_SIZE;
} else {
copy_space_push_empty_block(space, block, &lock);
}
}
}
gc_lock_release(&lock);
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START,
copy_space_advance_page_out_queue,
space);
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END,
copy_space_page_out_blocks,
space);
return 1;
}
#endif // COPY_SPACE_H

View file

@ -0,0 +1,10 @@
#ifndef DEBUG_H
#define DEBUG_H
#ifndef NDEBUG
#define DEBUG(...) fprintf (stderr, "DEBUG: " __VA_ARGS__)
#else
#define DEBUG(...) do { } while (0)
#endif
#endif // DEBUG_H

View file

@ -0,0 +1,88 @@
#ifndef EXTENTS_H
#define EXTENTS_H
#include <stdint.h>
#include <stdlib.h>
#include "gc-assert.h"
struct extent_range {
uintptr_t lo_addr;
uintptr_t hi_addr;
};
struct extents {
size_t size;
size_t capacity;
struct extent_range ranges[];
};
static inline int
extents_contain_addr(struct extents *extents, uintptr_t addr) {
size_t lo = 0;
size_t hi = extents->size;
while (lo != hi) {
size_t mid = (lo + hi) / 2;
struct extent_range range = extents->ranges[mid];
if (addr < range.lo_addr) {
hi = mid;
} else if (addr < range.hi_addr) {
return 1;
} else {
lo = mid + 1;
}
}
return 0;
}
static struct extents*
extents_allocate(size_t capacity) {
size_t byte_size =
sizeof(struct extents) + sizeof(struct extent_range) * capacity;
struct extents *ret = malloc(byte_size);
if (!ret) __builtin_trap();
memset(ret, 0, byte_size);
ret->capacity = capacity;
return ret;
}
static struct extents*
extents_insert(struct extents *old, size_t idx, struct extent_range range) {
if (old->size < old->capacity) {
size_t bytes_to_move = sizeof(struct extent_range) * (old->size - idx);
memmove(&old->ranges[idx + 1], &old->ranges[idx], bytes_to_move);
old->ranges[idx] = range;
old->size++;
return old;
} else {
struct extents *new_ = extents_allocate(old->capacity * 2 + 1);
memcpy(&new_->ranges[0], &old->ranges[0],
sizeof(struct extent_range) * idx);
memcpy(&new_->ranges[idx + 1], &old->ranges[idx],
sizeof(struct extent_range) * (old->size - idx));
new_->ranges[idx] = range;
new_->size = old->size + 1;
free(old);
return new_;
}
}
static struct extents*
extents_adjoin(struct extents *extents, void *lo_addr, size_t size) {
size_t i;
struct extent_range range = { (uintptr_t)lo_addr, (uintptr_t)lo_addr + size };
for (i = 0; i < extents->size; i++) {
if (range.hi_addr < extents->ranges[i].lo_addr) {
break;
} else if (range.hi_addr == extents->ranges[i].lo_addr) {
extents->ranges[i].lo_addr = range.lo_addr;
return extents;
} else if (range.lo_addr == extents->ranges[i].hi_addr) {
extents->ranges[i].hi_addr = range.hi_addr;
return extents;
}
}
return extents_insert(extents, i, range);
}
#endif // EXTENTS_H

View file

@ -0,0 +1,229 @@
#ifndef FIELD_SET_H
#define FIELD_SET_H
#include <pthread.h>
#include <stdatomic.h>
#include <stdlib.h>
#include "assert.h"
#include "gc-edge.h"
#include "gc-lock.h"
#include "tracer.h"
#define GC_EDGE_BUFFER_CAPACITY 510
struct gc_edge_buffer {
struct gc_edge_buffer *next;
size_t size;
struct gc_edge edges[GC_EDGE_BUFFER_CAPACITY];
};
// Lock-free.
struct gc_edge_buffer_list {
struct gc_edge_buffer *head;
};
// With a lock.
struct gc_edge_buffer_stack {
struct gc_edge_buffer_list list;
};
struct gc_field_set {
struct gc_edge_buffer_list full;
struct gc_edge_buffer_stack partly_full;
struct gc_edge_buffer_list empty;
size_t count;
pthread_mutex_t lock;
};
struct gc_field_set_writer {
struct gc_edge_buffer *buf;
struct gc_field_set *set;
};
static void
gc_edge_buffer_list_push(struct gc_edge_buffer_list *list,
struct gc_edge_buffer *buf) {
GC_ASSERT(!buf->next);
struct gc_edge_buffer *next =
atomic_load_explicit(&list->head, memory_order_relaxed);
do {
buf->next = next;
} while (!atomic_compare_exchange_weak_explicit(&list->head, &next, buf,
memory_order_acq_rel,
memory_order_acquire));
}
static struct gc_edge_buffer*
gc_edge_buffer_list_pop(struct gc_edge_buffer_list *list) {
struct gc_edge_buffer *head =
atomic_load_explicit(&list->head, memory_order_acquire);
struct gc_edge_buffer *next;
do {
if (!head) return NULL;
next = head->next;
} while (!atomic_compare_exchange_weak_explicit(&list->head, &head, next,
memory_order_acq_rel,
memory_order_acquire));
head->next = NULL;
return head;
}
static void
gc_edge_buffer_stack_push(struct gc_edge_buffer_stack *stack,
struct gc_edge_buffer *buf,
const struct gc_lock *lock) {
GC_ASSERT(!buf->next);
buf->next = stack->list.head;
stack->list.head = buf;
}
static struct gc_edge_buffer*
gc_edge_buffer_stack_pop(struct gc_edge_buffer_stack *stack,
const struct gc_lock *lock) {
struct gc_edge_buffer *head = stack->list.head;
if (head) {
stack->list.head = head->next;
head->next = NULL;
}
return head;
}
static void
gc_field_set_init(struct gc_field_set *set) {
memset(set, 0, sizeof(*set));
pthread_mutex_init(&set->lock, NULL);
}
static struct gc_edge_buffer*
gc_field_set_acquire_buffer(struct gc_field_set *set) {
struct gc_edge_buffer *ret;
ret = gc_edge_buffer_list_pop(&set->empty);
if (ret) return ret;
struct gc_lock lock = gc_lock_acquire(&set->lock);
ret = gc_edge_buffer_stack_pop(&set->partly_full, &lock);
gc_lock_release(&lock);
if (ret) return ret;
// atomic inc count
ret = malloc(sizeof(*ret));
if (!ret) {
perror("Failed to allocate remembered set");
GC_CRASH();
}
memset(ret, 0, sizeof(*ret));
return ret;
}
static void
gc_field_set_release_buffer(struct gc_field_set *set,
struct gc_edge_buffer *buf) {
if (buf->size == GC_EDGE_BUFFER_CAPACITY) {
gc_edge_buffer_list_push(&set->full, buf);
} else {
struct gc_lock lock = gc_lock_acquire(&set->lock);
gc_edge_buffer_stack_push(&set->partly_full, buf, &lock);
gc_lock_release(&lock);
}
}
static void
gc_field_set_add_roots(struct gc_field_set *set, struct gc_tracer *tracer) {
struct gc_edge_buffer *buf;
struct gc_lock lock = gc_lock_acquire(&set->lock);
while ((buf = gc_edge_buffer_stack_pop(&set->partly_full, &lock)))
gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
while ((buf = gc_edge_buffer_list_pop(&set->full)))
gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
gc_lock_release(&lock);
}
static void
gc_field_set_clear(struct gc_field_set *set,
void (*forget_edge)(struct gc_edge, struct gc_heap*),
struct gc_heap *heap) {
struct gc_edge_buffer *partly_full = set->partly_full.list.head;
struct gc_edge_buffer *full = set->full.head;
// Clear the full and partly full sets now so that if a collector
// wanted to it could re-add an edge to the remembered set.
set->partly_full.list.head = NULL;
set->full.head = NULL;
struct gc_edge_buffer *buf, *next;
for (buf = partly_full; buf; buf = next) {
next = buf->next;
buf->next = NULL;
if (forget_edge)
for (size_t i = 0; i < buf->size; i++)
forget_edge(buf->edges[i], heap);
buf->size = 0;
gc_edge_buffer_list_push(&set->empty, buf);
}
for (buf = full; buf; buf = next) {
next = buf->next;
buf->next = NULL;
if (forget_edge)
for (size_t i = 0; i < buf->size; i++)
forget_edge(buf->edges[i], heap);
buf->size = 0;
gc_edge_buffer_list_push(&set->empty, buf);
}
}
static inline void
gc_field_set_visit_edge_buffer(struct gc_field_set *set,
struct gc_edge_buffer *buf,
int (*visit)(struct gc_edge,
struct gc_heap*,
void *data),
struct gc_heap *heap,
void *data) GC_ALWAYS_INLINE;
static inline void
gc_field_set_visit_edge_buffer(struct gc_field_set *set,
struct gc_edge_buffer *buf,
int (*visit)(struct gc_edge,
struct gc_heap*,
void *data),
struct gc_heap *heap,
void *data) {
size_t i = 0;
while (i < buf->size) {
if (visit(buf->edges[i], heap, data))
i++;
else
buf->edges[i] = buf->edges[--buf->size];
}
gc_field_set_release_buffer(set, buf);
}
static void
gc_field_set_writer_release_buffer(struct gc_field_set_writer *writer) {
if (writer->buf) {
gc_field_set_release_buffer(writer->set, writer->buf);
writer->buf = NULL;
}
}
static void
gc_field_set_writer_init(struct gc_field_set_writer *writer,
struct gc_field_set *set) {
writer->set = set;
writer->buf = NULL;
}
static void
gc_field_set_writer_add_edge(struct gc_field_set_writer *writer,
struct gc_edge edge) {
struct gc_edge_buffer *buf = writer->buf;
if (GC_UNLIKELY(!buf))
writer->buf = buf = gc_field_set_acquire_buffer(writer->set);
GC_ASSERT(buf->size < GC_EDGE_BUFFER_CAPACITY);
buf->edges[buf->size++] = edge;
if (GC_UNLIKELY(buf->size == GC_EDGE_BUFFER_CAPACITY)) {
gc_edge_buffer_list_push(&writer->set->full, buf);
writer->buf = NULL;
}
}
#endif // FIELD_SET_H

View file

@ -0,0 +1,31 @@
#ifndef FREELIST_H
#define FREELIST_H
// A size-segregated freelist with linear-log buckets à la
// https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/.
#include "gc-assert.h"
#include "gc-histogram.h"
#include <string.h>
#define DEFINE_FREELIST(name, max_value_bits, precision, node) \
struct name { node buckets[((max_value_bits) << (precision)) + 1]; }; \
static inline size_t name##_num_size_classes(void) { \
return ((max_value_bits) << (precision)) + 1; \
} \
static inline uint64_t name##_bucket_min_val(size_t idx) { \
GC_ASSERT(idx < name##_num_size_classes()); \
return gc_histogram_bucket_min_val((precision), idx); \
} \
static inline void name##_init(struct name *f) { \
memset(f, 0, sizeof(*f)); \
} \
static inline size_t name##_size_class(uint64_t val) { \
return gc_histogram_bucket((max_value_bits), (precision), val); \
} \
static inline node* name##_bucket(struct name *f, uint64_t val) { \
return &f->buckets[name##_size_class(val)]; \
}
#endif // FREELIST_H

View file

@ -0,0 +1,22 @@
#ifndef GC_ALIGN_H
#define GC_ALIGN_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include <stdint.h>
static inline uintptr_t align_down(uintptr_t addr, size_t align) {
return addr & ~(align - 1);
}
static inline uintptr_t align_up(uintptr_t addr, size_t align) {
return align_down(addr + align - 1, align);
}
// Poor man's equivalent of std::hardware_destructive_interference_size.
#define AVOID_FALSE_SHARING 128
#define ALIGNED_TO_AVOID_FALSE_SHARING \
__attribute__((aligned(AVOID_FALSE_SHARING)))
#endif // GC_ALIGN_H

View file

@ -0,0 +1,55 @@
#ifndef GC_EPHEMERON_INTERNAL_H
#define GC_EPHEMERON_INTERNAL_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include "gc-ephemeron.h"
struct gc_pending_ephemerons;
// API implemented by collector, for use by ephemerons:
GC_INTERNAL int gc_visit_ephemeron_key(struct gc_edge edge,
struct gc_heap *heap);
GC_INTERNAL struct gc_pending_ephemerons*
gc_heap_pending_ephemerons(struct gc_heap *heap);
GC_INTERNAL unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap);
// API implemented by ephemerons, for use by collector:
GC_INTERNAL struct gc_edge gc_ephemeron_key_edge(struct gc_ephemeron *eph);
GC_INTERNAL struct gc_edge gc_ephemeron_value_edge(struct gc_ephemeron *eph);
GC_INTERNAL struct gc_pending_ephemerons*
gc_prepare_pending_ephemerons(struct gc_pending_ephemerons *state,
size_t target_size, double slop);
GC_INTERNAL void
gc_resolve_pending_ephemerons(struct gc_ref obj, struct gc_heap *heap);
GC_INTERNAL void
gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state,
struct gc_heap *heap, size_t shard,
size_t nshards);
GC_INTERNAL struct gc_ephemeron*
gc_pop_resolved_ephemerons(struct gc_heap *heap);
GC_INTERNAL void
gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data);
GC_INTERNAL void
gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state,
size_t shard, size_t nshards);
GC_INTERNAL void gc_ephemeron_init_internal(struct gc_heap *heap,
struct gc_ephemeron *ephemeron,
struct gc_ref key,
struct gc_ref value);
#endif // GC_EPHEMERON_INTERNAL_H

View file

@ -0,0 +1,583 @@
#include <math.h>
#include <stdatomic.h>
#include <stdlib.h>
#define GC_IMPL 1
#include "address-hash.h"
#include "debug.h"
#include "gc-embedder-api.h"
#include "gc-ephemeron-internal.h"
// # Overview
//
// An ephemeron is a conjunction consisting of the ephemeron object
// itself, a "key" object, and a "value" object. If the ephemeron and
// the key are live, then the value is kept live and can be looked up
// given the ephemeron object.
//
// Sometimes we write this as E×K⇒V, indicating that you need both E and
// K to get V. We'll use this notation in these comments sometimes.
//
// The key and the value of an ephemeron are never modified, except
// possibly via forwarding during GC.
//
// If the key of an ephemeron ever becomes unreachable, the ephemeron
// object will be marked as dead by the collector, and neither key nor
// value will be accessible. Users can also explicitly mark an
// ephemeron as dead.
//
// Users can build collections of ephemerons by chaining them together.
// If an ephemeron ever becomes dead, the ephemeron will be removed from
// the chain by the garbage collector.
//
// # Tracing algorithm
//
// Tracing ephemerons is somewhat complicated. Tracing the live objects
// in a heap is usually a parallelizable fan-out kind of operation,
// requiring minimal synchronization between tracing worker threads.
// However with ephemerons, each worker thread may need to check if
// there is a pending ephemeron E for an object K, marking the
// associated V for later traversal by the tracer. Doing this without
// introducing excessive global serialization points is the motivation
// for the complications that follow.
//
// From the viewpoint of the garbage collector, an ephemeron E×K⇒V has 4
// possible states:
//
// - Traced: An E that was already fully traced as of a given GC epoch.
//
// - Claimed: GC discovers E for the first time in a GC epoch
//
// - Pending: K's liveness is unknown
//
// - Resolved: K is live; V needs tracing
//
// The ephemeron state is kept in an atomic variable. The pending and
// resolved states also have associated atomic list link fields as well;
// it doesn't appear possible to coalesce them into a single field
// without introducing serialization. Finally, there is a bit to
// indicate whether a "traced" ephemeron is live or dead, and a field to
// indicate the epoch at which it was last traced.
//
// Here is a diagram of the state transitions:
//
// ,----->Traced<-----.
// , | | .
// , v / .
// | Claimed |
// | ,-----/ \---. |
// | v v |
// Pending--------->Resolved
//
// Ephemerons are born in the traced state, for the current GC epoch.
//
// When the tracer sees an ephemeron E in the traced state it checks the
// epoch. If the epoch is up to date, E stays in the traced state and
// we are done.
//
// Otherwise, E transitions from traced to claimed. The thread that
// claims E is then responsible for resetting E's pending and resolved
// links, updating E's epoch, and tracing E's user-controlled chain
// link.
//
// If the claiming thread sees that E was already marked dead by a
// previous GC, or explicitly by the user, the ephemeron then
// transitions from back to traced, ready for the next epoch.
//
// If the claiming thread sees K to already be known to be live, then E
// is added to the global resolved set and E's state becomes resolved.
//
// Otherwise the claiming thread publishes K⇒E to the global pending
// ephemeron table, via the pending link, and E transitions to pending.
//
// A pending ephemeron is a link in a buckets-of-chains concurrent hash
// table. If its K is ever determined to be live, it becomes resolved,
// and is added to a global set of resolved ephemerons. At the end of
// GC, any ephemerons still pending are marked dead, transitioning their
// states to traced.
//
// Note that the claiming thread -- the one that publishes K⇒E to the
// global pending ephemeron table -- needs to re-check that K is still
// untraced after adding K⇒E to the pending table, and move to resolved
// if so.
//
// A resolved ephemeron needs its V to be traced. Incidentally its K
// also needs tracing, to relocate any forwarding pointer. The thread
// that pops an ephemeron from the resolved set is responsible for
// tracing and for moving E's state to traced.
//
// # Concurrency
//
// All operations on ephemerons are wait-free. Sometimes only one
// thread can make progress (for example for an ephemeron in the claimed
// state), but no thread will be stalled waiting on other threads to
// proceed.
//
// There is one interesting (from a concurrency point of view) data
// structure used by the implementation of ephemerons, the singly-linked
// list. Actually there are three of these; one is used as a stack and
// the other two is used as sets.
//
// The resolved set is implemented via a global `struct gc_ephemeron
// *resolved` variable. Resolving an ephemeron does an atomic push to
// this stack, via compare-and-swap (CAS); popping from the stack (also
// via CAS) yields an ephemeron for tracing. Ephemerons are added to
// the resolved set at most once per GC cycle, and the resolved set is
// empty outside of GC.
//
// The operations that are supported on atomic stacks are:
//
// push(LOC, E, OFFSET) -> void
//
// The user-visible chain link and the link for the pending ephemeron
// table are used to build atomic sets. In these you can add an
// ephemeron to the beginning of the list, traverse the list link by
// link to the end (indicated by NULL), and remove any list item.
// Removing a list node proceeds in two phases: one, you mark the node
// for removal, by changing the ephemeron's state; then, possibly on a
// subsequent traversal, any predecessor may forward its link past
// removed nodes. Because node values never change and nodes only go
// from live to dead, the live list tail can always be reached by any
// node, even from dead nodes.
//
// The operations that are supported on these atomic lists:
//
// push(LOC, E, OFFSET) -> void
// pop(LOC, OFFSET) -> ephemeron or null
// follow(LOC, OFFSET, STATE_OFFSET, LIVE_STATE) -> ephemeron or null
//
// These operations are all wait-free. The "push" operation is shared
// between stack and set use cases. "pop" is for stack-like use cases.
// The "follow" operation traverses a list, opportunistically eliding
// nodes that have been marked dead, atomically updating the location
// storing the next item.
//
// There are also accessors on ephemerons to their fields:
//
// key(E) -> value or null
// value(E) -> value or null
//
// These operations retrieve the key and value, respectively, provided
// that the ephemeron is not marked dead.
////////////////////////////////////////////////////////////////////////
// Concurrent operations on ephemeron lists
////////////////////////////////////////////////////////////////////////
static void
ephemeron_list_push(struct gc_ephemeron **loc,
struct gc_ephemeron *head,
struct gc_ephemeron** (*get_next)(struct gc_ephemeron*)) {
struct gc_ephemeron *tail = atomic_load_explicit(loc, memory_order_acquire);
while (1) {
// There must be no concurrent readers of HEAD, a precondition that
// we ensure by only publishing HEAD to LOC at most once per cycle.
// Therefore we can use a normal store for the tail pointer.
*get_next(head) = tail;
if (atomic_compare_exchange_weak(loc, &tail, head))
break;
}
}
static struct gc_ephemeron*
ephemeron_list_pop(struct gc_ephemeron **loc,
struct gc_ephemeron** (*get_next)(struct gc_ephemeron*)) {
struct gc_ephemeron *head = atomic_load_explicit(loc, memory_order_acquire);
while (head) {
// Precondition: the result of get_next on an ephemeron is never
// updated concurrently; OK to load non-atomically.
struct gc_ephemeron *tail = *get_next(head);
if (atomic_compare_exchange_weak(loc, &head, tail))
break;
}
return head;
}
static struct gc_ephemeron*
ephemeron_list_follow(struct gc_ephemeron **loc,
struct gc_ephemeron** (*get_next)(struct gc_ephemeron*),
int (*is_live)(struct gc_ephemeron*)) {
struct gc_ephemeron *head = atomic_load_explicit(loc, memory_order_acquire);
if (!head) return NULL;
while (1) {
struct gc_ephemeron *new_head = head;
// Skip past any dead nodes.
while (new_head && !is_live(new_head))
new_head = atomic_load_explicit(get_next(new_head), memory_order_acquire);
if (// If we didn't have to advance past any dead nodes, no need to
// update LOC.
(head == new_head)
// Otherwise if we succeed in updating LOC, we're done.
|| atomic_compare_exchange_strong(loc, &head, new_head)
// Someone else managed to advance LOC; that's fine too.
|| (head == new_head))
return new_head;
// Otherwise we lost a race; loop and retry.
}
}
////////////////////////////////////////////////////////////////////////
// The ephemeron object type
////////////////////////////////////////////////////////////////////////
#ifndef GC_EMBEDDER_EPHEMERON_HEADER
#error Embedder should define GC_EMBEDDER_EPHEMERON_HEADER
#endif
enum {
EPHEMERON_STATE_TRACED,
EPHEMERON_STATE_CLAIMED,
EPHEMERON_STATE_PENDING,
EPHEMERON_STATE_RESOLVED,
};
struct gc_ephemeron {
GC_EMBEDDER_EPHEMERON_HEADER
uint8_t state;
unsigned epoch;
struct gc_ephemeron *chain;
struct gc_ephemeron *pending;
struct gc_ephemeron *resolved;
struct gc_ref key;
struct gc_ref value;
};
size_t gc_ephemeron_size(void) { return sizeof(struct gc_ephemeron); }
struct gc_edge gc_ephemeron_key_edge(struct gc_ephemeron *e) {
return gc_edge(&e->key);
}
struct gc_edge gc_ephemeron_value_edge(struct gc_ephemeron *e) {
return gc_edge(&e->value);
}
////////////////////////////////////////////////////////////////////////
// Operations on the user-controlled chain field
////////////////////////////////////////////////////////////////////////
static struct gc_ephemeron** ephemeron_chain(struct gc_ephemeron *e) {
return &e->chain;
}
static int ephemeron_is_dead(struct gc_ephemeron *e) {
return !atomic_load_explicit(&e->key.value, memory_order_acquire);
}
static int ephemeron_is_not_dead(struct gc_ephemeron *e) {
return !ephemeron_is_dead(e);
}
void gc_ephemeron_chain_push(struct gc_ephemeron **loc,
struct gc_ephemeron *e) {
ephemeron_list_push(loc, e, ephemeron_chain);
}
static struct gc_ephemeron* follow_chain(struct gc_ephemeron **loc) {
return ephemeron_list_follow(loc, ephemeron_chain, ephemeron_is_not_dead);
}
struct gc_ephemeron* gc_ephemeron_chain_head(struct gc_ephemeron **loc) {
return follow_chain(loc);
}
struct gc_ephemeron* gc_ephemeron_chain_next(struct gc_ephemeron *e) {
return follow_chain(ephemeron_chain(e));
}
void gc_ephemeron_mark_dead(struct gc_ephemeron *e) {
atomic_store_explicit(&e->key.value, 0, memory_order_release);
}
////////////////////////////////////////////////////////////////////////
// Operations on the GC-managed pending link
////////////////////////////////////////////////////////////////////////
static struct gc_ephemeron** ephemeron_pending(struct gc_ephemeron *e) {
return &e->pending;
}
static uint8_t ephemeron_state(struct gc_ephemeron *e) {
return atomic_load_explicit(&e->state, memory_order_acquire);
}
static int ephemeron_is_pending(struct gc_ephemeron *e) {
return ephemeron_state(e) == EPHEMERON_STATE_PENDING;
}
static void push_pending(struct gc_ephemeron **loc, struct gc_ephemeron *e) {
ephemeron_list_push(loc, e, ephemeron_pending);
}
static struct gc_ephemeron* follow_pending(struct gc_ephemeron **loc) {
return ephemeron_list_follow(loc, ephemeron_pending, ephemeron_is_pending);
}
////////////////////////////////////////////////////////////////////////
// Operations on the GC-managed resolved link
////////////////////////////////////////////////////////////////////////
static struct gc_ephemeron** ephemeron_resolved(struct gc_ephemeron *e) {
return &e->resolved;
}
static void push_resolved(struct gc_ephemeron **loc, struct gc_ephemeron *e) {
ephemeron_list_push(loc, e, ephemeron_resolved);
}
static struct gc_ephemeron* pop_resolved(struct gc_ephemeron **loc) {
return ephemeron_list_pop(loc, ephemeron_resolved);
}
////////////////////////////////////////////////////////////////////////
// Access to the association
////////////////////////////////////////////////////////////////////////
struct gc_ref gc_ephemeron_key(struct gc_ephemeron *e) {
return gc_ref(atomic_load_explicit(&e->key.value, memory_order_acquire));
}
struct gc_ref gc_ephemeron_value(struct gc_ephemeron *e) {
return ephemeron_is_dead(e) ? gc_ref_null() : e->value;
}
////////////////////////////////////////////////////////////////////////
// Tracing ephemerons
////////////////////////////////////////////////////////////////////////
struct gc_pending_ephemerons {
struct gc_ephemeron* resolved;
size_t nbuckets;
double scale;
struct gc_ephemeron* buckets[0];
};
static const size_t MIN_PENDING_EPHEMERONS_SIZE = 32;
static size_t pending_ephemerons_byte_size(size_t nbuckets) {
return sizeof(struct gc_pending_ephemerons) +
sizeof(struct gc_ephemeron*) * nbuckets;
}
static struct gc_pending_ephemerons*
gc_make_pending_ephemerons(size_t byte_size) {
size_t nbuckets = byte_size / sizeof(struct gc_ephemeron*);
if (nbuckets < MIN_PENDING_EPHEMERONS_SIZE)
nbuckets = MIN_PENDING_EPHEMERONS_SIZE;
struct gc_pending_ephemerons *ret =
malloc(pending_ephemerons_byte_size(nbuckets));
if (!ret)
return NULL;
ret->resolved = NULL;
ret->nbuckets = nbuckets;
ret->scale = nbuckets / pow(2.0, sizeof(uintptr_t) * 8);
for (size_t i = 0; i < nbuckets; i++)
ret->buckets[i] = NULL;
return ret;
}
struct gc_pending_ephemerons*
gc_prepare_pending_ephemerons(struct gc_pending_ephemerons *state,
size_t target_byte_size, double slop) {
size_t existing =
state ? pending_ephemerons_byte_size(state->nbuckets) : 0;
slop += 1.0;
if (existing * slop > target_byte_size && existing < target_byte_size * slop)
return state;
struct gc_pending_ephemerons *new_state =
gc_make_pending_ephemerons(target_byte_size);
if (!new_state)
return state;
free(state);
return new_state;
}
static struct gc_ephemeron**
pending_ephemeron_bucket(struct gc_pending_ephemerons *state,
struct gc_ref ref) {
uintptr_t hash = hash_address(gc_ref_value(ref));
size_t idx = hash * state->scale;
GC_ASSERT(idx < state->nbuckets);
return &state->buckets[idx];
}
static void
add_pending_ephemeron(struct gc_pending_ephemerons *state,
struct gc_ephemeron *e) {
struct gc_ephemeron **bucket = pending_ephemeron_bucket(state, e->key);
atomic_store_explicit(&e->state, EPHEMERON_STATE_PENDING,
memory_order_release);
push_pending(bucket, e);
}
static void maybe_resolve_ephemeron(struct gc_pending_ephemerons *state,
struct gc_ephemeron *e) {
uint8_t expected = EPHEMERON_STATE_PENDING;
if (atomic_compare_exchange_strong(&e->state, &expected,
EPHEMERON_STATE_RESOLVED))
push_resolved(&state->resolved, e);
}
// Precondition: OBJ has already been copied to tospace, but OBJ is a
// fromspace ref.
void gc_resolve_pending_ephemerons(struct gc_ref obj, struct gc_heap *heap) {
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
struct gc_ephemeron **bucket = pending_ephemeron_bucket(state, obj);
for (struct gc_ephemeron *link = follow_pending(bucket);
link;
link = follow_pending(&link->pending)) {
if (gc_ref_value(obj) == gc_ref_value(link->key)) {
gc_visit_ephemeron_key(gc_ephemeron_key_edge(link), heap);
// PENDING -> RESOLVED, if it was pending.
maybe_resolve_ephemeron(state, link);
}
}
}
void gc_trace_ephemeron(struct gc_ephemeron *e,
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data) {
unsigned epoch = gc_heap_ephemeron_trace_epoch(heap);
uint8_t expected = EPHEMERON_STATE_TRACED;
// TRACED[_] -> CLAIMED[_].
if (!atomic_compare_exchange_strong(&e->state, &expected,
EPHEMERON_STATE_CLAIMED))
return;
if (e->epoch == epoch) {
// CLAIMED[epoch] -> TRACED[epoch].
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
memory_order_release);
return;
}
// CLAIMED[!epoch] -> CLAIMED[epoch].
e->epoch = epoch;
e->pending = NULL;
e->resolved = NULL;
// Trace chain successors, eliding any intermediate dead links. Note
// that there is a race between trace-time evacuation of the next link
// in the chain and any mutation of that link pointer by the mutator
// (which can only be to advance the chain forward past dead links).
// Collectors using this API have to eliminate this race, for example
// by not evacuating while the mutator is running.
follow_chain(&e->chain);
visit(gc_edge(&e->chain), heap, trace_data);
// Similarly there is a race between the mutator marking an ephemeron
// as dead and here; the consequence would be that we treat an
// ephemeron as live when it's not, but only for this cycle. No big
// deal.
if (ephemeron_is_dead(e)) {
// CLAIMED[epoch] -> TRACED[epoch].
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
memory_order_release);
return;
}
// If K is live, trace V and we are done.
if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap)) {
visit(gc_ephemeron_value_edge(e), heap, trace_data);
// CLAIMED[epoch] -> TRACED[epoch].
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
memory_order_release);
return;
}
// Otherwise K is not yet traced, so we don't know if it is live.
// Publish the ephemeron to a global table.
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
// CLAIMED[epoch] -> PENDING.
add_pending_ephemeron(state, e);
// Given an ephemeron E×K⇒V, there is a race between marking K and E.
// One thread could go to mark E and see that K is unmarked, so we get
// here. Meanwhile another thread could go to mark K and not see E in
// the global table yet. Therefore after publishing E, we have to
// check the mark on K again.
if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap))
// K visited by another thread while we published E; PENDING ->
// RESOLVED, if still PENDING.
maybe_resolve_ephemeron(state, e);
}
void
gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state,
struct gc_heap *heap, size_t shard,
size_t nshards) {
GC_ASSERT(shard < nshards);
size_t start = state->nbuckets * 1.0 * shard / nshards;
size_t end = state->nbuckets * 1.0 * (shard + 1) / nshards;
for (size_t idx = start; idx < end; idx++) {
for (struct gc_ephemeron *e = follow_pending(&state->buckets[idx]);
e;
e = follow_pending(&e->pending)) {
if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap))
// PENDING -> RESOLVED, if PENDING.
maybe_resolve_ephemeron(state, e);
}
}
}
struct gc_ephemeron*
gc_pop_resolved_ephemerons(struct gc_heap *heap) {
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
return atomic_exchange(&state->resolved, NULL);
}
void
gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data) {
for (; resolved; resolved = resolved->resolved) {
visit(gc_ephemeron_value_edge(resolved), heap, trace_data);
// RESOLVED -> TRACED.
atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED,
memory_order_release);
}
}
void
gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state,
size_t shard, size_t nshards) {
GC_ASSERT(shard < nshards);
size_t start = state->nbuckets * 1.0 * shard / nshards;
size_t end = state->nbuckets * 1.0 * (shard + 1) / nshards;
for (size_t idx = start; idx < end; idx++) {
struct gc_ephemeron **bucket = &state->buckets[idx];
for (struct gc_ephemeron *e = follow_pending(bucket);
e;
e = follow_pending(&e->pending)) {
// PENDING -> TRACED, but dead.
atomic_store_explicit(&e->key.value, 0, memory_order_release);
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
memory_order_release);
}
atomic_store_explicit(bucket, NULL, memory_order_release);
}
}
////////////////////////////////////////////////////////////////////////
// Allocation & initialization
////////////////////////////////////////////////////////////////////////
void gc_ephemeron_init_internal(struct gc_heap *heap,
struct gc_ephemeron *ephemeron,
struct gc_ref key, struct gc_ref value) {
// Caller responsible for any write barrier, though really the
// assumption is that the ephemeron is younger than the key and the
// value.
ephemeron->state = EPHEMERON_STATE_TRACED;
ephemeron->epoch = gc_heap_ephemeron_trace_epoch(heap) - 1;
ephemeron->chain = NULL;
ephemeron->pending = NULL;
ephemeron->resolved = NULL;
ephemeron->key = key;
ephemeron->value = value;
}

View file

@ -0,0 +1,65 @@
#ifndef GC_FINALIZER_INTERNAL_H
#define GC_FINALIZER_INTERNAL_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include "gc-finalizer.h"
#include "root.h"
struct gc_finalizer_state;
GC_INTERNAL
struct gc_finalizer_state* gc_make_finalizer_state(void);
GC_INTERNAL
void gc_finalizer_init_internal(struct gc_finalizer *f,
struct gc_ref object,
struct gc_ref closure);
GC_INTERNAL
void gc_finalizer_attach_internal(struct gc_finalizer_state *state,
struct gc_finalizer *f,
unsigned priority);
GC_INTERNAL
void gc_finalizer_externally_activated(struct gc_finalizer *f);
GC_INTERNAL
void gc_finalizer_externally_fired(struct gc_finalizer_state *state,
struct gc_finalizer *finalizer);
GC_INTERNAL
struct gc_finalizer* gc_finalizer_state_pop(struct gc_finalizer_state *state);
GC_INTERNAL
void gc_finalizer_fire(struct gc_finalizer **fired_list_loc,
struct gc_finalizer *finalizer);
GC_INTERNAL
void gc_finalizer_state_set_callback(struct gc_finalizer_state *state,
gc_finalizer_callback callback);
GC_INTERNAL
size_t gc_visit_finalizer_roots(struct gc_finalizer_state *state,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data);
GC_INTERNAL
size_t gc_resolve_finalizers(struct gc_finalizer_state *state,
size_t priority,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data);
GC_INTERNAL
void gc_notify_finalizers(struct gc_finalizer_state *state,
struct gc_heap *heap);
#endif // GC_FINALIZER_INTERNAL_H

View file

@ -0,0 +1,307 @@
#include <math.h>
#include <stdatomic.h>
#include <stdlib.h>
#include <string.h>
#define GC_IMPL 1
#include "debug.h"
#include "gc-embedder-api.h"
#include "gc-ephemeron-internal.h" // for gc_visit_ephemeron_key
#include "gc-finalizer-internal.h"
// # Overview
//
// See gc-finalizer.h for a overview of finalizers from the user and
// embedder point of view.
//
// ## Tracing
//
// From the perspecive of the collector implementation, finalizers are
// GC-managed objects, allowing their size to be accounted for within
// the heap size. They get traced during collection, allowing for
// relocation of their object references, and allowing the finalizer
// object itself to be evacuated if appropriate.
//
// The collector holds on to outstanding finalizers in a *finalizer
// state*, which holds one *finalizer table* for each priority. We
// don't need to look up finalizers by object, so we could just hold
// them in a big list, but to facilitate parallelism we slice them
// across some number of shards, where the "next" pointer is part of the
// finalizer object.
//
// There are a number of ways you could imagine integrating finalizers
// into a system. The way Whippet does it goes like this. See
// https://wingolog.org/archives/2022/10/31/ephemerons-and-finalizers
// and
// https://wingolog.org/archives/2024/07/22/finalizers-guardians-phantom-references-et-cetera
// for some further discussion.
//
// 1. The collector should begin a cycle by adding all shards from all
// priorities to the root set. When the embedder comes across a
// finalizer (as it will, because we added them to the root set),
// it traces it via gc_trace_finalizer(), which will visit the
// finalizer's closure and its "next" pointer.
//
// 2. After the full trace, and then the fix-point on pending
// ephemerons, for each priority from 0 upwards:
//
// i. Visit each finalizable object in the table. If the object
// was as-yet unvisited, then it is unreachable and thus
// finalizable; the finalizer is added to the global "fired"
// list, and changes state from "attached" to "fired".
// Otherwise it is re-added to the finalizer table.
//
// ii. If any finalizer was added to the fired list, then those
// objects were also added to the grey worklist; run tracing
// again until the grey set is empty, including ephemerons.
//
// 3. Finally, call the finalizer callback if the list of fired finalizers is
// nonempty.
//
// ## Concurrency
//
// The finalizer table is wait-free. It keeps a count of active finalizers, and
// chooses a bucket based on the count modulo the number of buckets. Adding a
// finalizer to the table is an atomic push on a linked list. The table is
// completely rebuilt during the GC pause, redistributing survivor entries
// across the buckets, and pushing all finalizable entries onto the single
// "fired" linked list.
//
// The fired list is also wait-free. As noted above, it is built
// during the pause, and mutators pop items off of it atomically.
//
// ## Generations
//
// It would be ideal if a young generation had its own finalizer table.
// Promoting an object would require promoting its finalizer to the old
// finalizer table. Not yet implemented (but would be nice).
#ifndef GC_EMBEDDER_FINALIZER_HEADER
#error Embedder should define GC_EMBEDDER_FINALIZER_HEADER
#endif
enum finalizer_state {
FINALIZER_STATE_INIT = 0, // Finalizer is newborn.
FINALIZER_STATE_ACTIVE, // Finalizer is ours and in the finalizer table.
FINALIZER_STATE_FIRED, // Finalizer is handed back to mutator.
};
struct gc_finalizer {
GC_EMBEDDER_FINALIZER_HEADER
enum finalizer_state state;
struct gc_ref object;
struct gc_ref closure;
struct gc_finalizer *next;
};
// Enough buckets to parallelize closure marking. No need to look up a
// finalizer for a given object.
#define BUCKET_COUNT 32
struct gc_finalizer_table {
size_t finalizer_count;
struct gc_finalizer* buckets[BUCKET_COUNT];
};
struct gc_finalizer_state {
gc_finalizer_callback have_finalizers;
struct gc_finalizer *fired;
size_t fired_this_cycle;
size_t table_count;
struct gc_finalizer_table tables[0];
};
// public
size_t gc_finalizer_size(void) { return sizeof(struct gc_finalizer); }
struct gc_ref gc_finalizer_object(struct gc_finalizer *f) { return f->object; }
struct gc_ref gc_finalizer_closure(struct gc_finalizer *f) { return f->closure; }
// internal
struct gc_finalizer_state* gc_make_finalizer_state(void) {
size_t ntables = gc_finalizer_priority_count();
size_t size = (sizeof(struct gc_finalizer_state) +
sizeof(struct gc_finalizer_table) * ntables);
struct gc_finalizer_state *ret = malloc(size);
if (!ret)
return NULL;
memset(ret, 0, size);
ret->table_count = ntables;
return ret;
}
static void finalizer_list_push(struct gc_finalizer **loc,
struct gc_finalizer *head) {
struct gc_finalizer *tail = atomic_load_explicit(loc, memory_order_acquire);
do {
head->next = tail;
} while (!atomic_compare_exchange_weak(loc, &tail, head));
}
static struct gc_finalizer* finalizer_list_pop(struct gc_finalizer **loc) {
struct gc_finalizer *head = atomic_load_explicit(loc, memory_order_acquire);
do {
if (!head) return NULL;
} while (!atomic_compare_exchange_weak(loc, &head, head->next));
head->next = NULL;
return head;
}
static void add_finalizer_to_table(struct gc_finalizer_table *table,
struct gc_finalizer *f) {
size_t count = atomic_fetch_add_explicit(&table->finalizer_count, 1,
memory_order_relaxed);
struct gc_finalizer **loc = &table->buckets[count % BUCKET_COUNT];
finalizer_list_push(loc, f);
}
// internal
void gc_finalizer_init_internal(struct gc_finalizer *f,
struct gc_ref object,
struct gc_ref closure) {
// Caller responsible for any write barrier, though really the
// assumption is that the finalizer is younger than the key and the
// value.
if (f->state != FINALIZER_STATE_INIT)
GC_CRASH();
GC_ASSERT(gc_ref_is_null(f->object));
f->object = object;
f->closure = closure;
}
// internal
void gc_finalizer_attach_internal(struct gc_finalizer_state *state,
struct gc_finalizer *f,
unsigned priority) {
// Caller responsible for any write barrier, though really the
// assumption is that the finalizer is younger than the key and the
// value.
if (f->state != FINALIZER_STATE_INIT)
GC_CRASH();
if (gc_ref_is_null(f->object))
GC_CRASH();
f->state = FINALIZER_STATE_ACTIVE;
GC_ASSERT(priority < state->table_count);
add_finalizer_to_table(&state->tables[priority], f);
}
// internal
struct gc_finalizer* gc_finalizer_state_pop(struct gc_finalizer_state *state) {
return finalizer_list_pop(&state->fired);
}
static void
add_fired_finalizer(struct gc_finalizer_state *state,
struct gc_finalizer *f) {
if (f->state != FINALIZER_STATE_ACTIVE)
GC_CRASH();
f->state = FINALIZER_STATE_FIRED;
finalizer_list_push(&state->fired, f);
}
// internal
void
gc_finalizer_externally_activated(struct gc_finalizer *f) {
if (f->state != FINALIZER_STATE_INIT)
GC_CRASH();
f->state = FINALIZER_STATE_ACTIVE;
}
// internal
void
gc_finalizer_externally_fired(struct gc_finalizer_state *state,
struct gc_finalizer *f) {
add_fired_finalizer(state, f);
}
// internal
size_t gc_visit_finalizer_roots(struct gc_finalizer_state *state,
void (*visit)(struct gc_edge,
struct gc_heap*,
void *),
struct gc_heap *heap,
void *visit_data) {
size_t count = 0;
for (size_t tidx = 0; tidx < state->table_count; tidx++) {
struct gc_finalizer_table *table = &state->tables[tidx];
if (table->finalizer_count) {
count += table->finalizer_count;
for (size_t bidx = 0; bidx < BUCKET_COUNT; bidx++)
visit(gc_edge(&table->buckets[bidx]), heap, visit_data);
}
}
visit(gc_edge(&state->fired), heap, visit_data);
return count;
}
// public
void gc_trace_finalizer(struct gc_finalizer *f,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *trace_data) {
if (f->state != FINALIZER_STATE_ACTIVE)
visit(gc_edge(&f->object), heap, trace_data);
visit(gc_edge(&f->closure), heap, trace_data);
visit(gc_edge(&f->next), heap, trace_data);
}
// Sweeping is currently serial. It could run in parallel but we want to
// resolve all finalizers before shading any additional node. Perhaps we should
// relax this restriction though; if the user attaches two finalizers to the
// same object, it's probably OK to only have one finalizer fire per cycle.
// internal
size_t gc_resolve_finalizers(struct gc_finalizer_state *state,
size_t priority,
void (*visit)(struct gc_edge edge,
struct gc_heap *heap,
void *visit_data),
struct gc_heap *heap,
void *visit_data) {
GC_ASSERT(priority < state->table_count);
struct gc_finalizer_table *table = &state->tables[priority];
size_t finalizers_fired = 0;
// Visit each finalizer in the table. If its object was already visited,
// re-add the finalizer to the table. Otherwise enqueue its object edge for
// tracing and mark the finalizer as fired.
if (table->finalizer_count) {
struct gc_finalizer_table scratch = { 0, };
for (size_t bidx = 0; bidx < BUCKET_COUNT; bidx++) {
struct gc_finalizer *next;
for (struct gc_finalizer *f = table->buckets[bidx]; f; f = next) {
next = f->next;
f->next = NULL;
struct gc_edge edge = gc_edge(&f->object);
if (gc_visit_ephemeron_key(edge, heap)) {
add_finalizer_to_table(&scratch, f);
} else {
finalizers_fired++;
visit(edge, heap, visit_data);
add_fired_finalizer(state, f);
}
}
}
memcpy(table, &scratch, sizeof(*table));
}
state->fired_this_cycle += finalizers_fired;
return finalizers_fired;
}
// internal
void gc_notify_finalizers(struct gc_finalizer_state *state,
struct gc_heap *heap) {
if (state->fired_this_cycle && state->have_finalizers) {
state->have_finalizers(heap, state->fired_this_cycle);
state->fired_this_cycle = 0;
}
}
// internal
void gc_finalizer_state_set_callback(struct gc_finalizer_state *state,
gc_finalizer_callback callback) {
state->have_finalizers = callback;
}

View file

@ -0,0 +1,16 @@
#ifndef GC_INTERNAL_H
#define GC_INTERNAL_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include "gc-ephemeron-internal.h"
#include "gc-finalizer-internal.h"
#include "gc-options-internal.h"
uint64_t gc_heap_total_bytes_allocated(struct gc_heap *heap);
void gc_mutator_adjust_heap_size(struct gc_mutator *mut, uint64_t new_size);
#endif // GC_INTERNAL_H

View file

@ -0,0 +1,24 @@
#ifndef GC_LOCK_H
#define GC_LOCK_H
#include <pthread.h>
#include "gc-assert.h"
struct gc_lock {
pthread_mutex_t *lock;
};
static struct gc_lock
gc_lock_acquire(pthread_mutex_t *lock) {
pthread_mutex_lock(lock);
return (struct gc_lock){ lock };
}
static void
gc_lock_release(struct gc_lock *lock) {
GC_ASSERT(lock->lock);
pthread_mutex_unlock(lock->lock);
lock->lock = NULL;
}
#endif // GC_LOCK_H

View file

@ -0,0 +1,32 @@
#ifndef GC_OPTIONS_INTERNAL_H
#define GC_OPTIONS_INTERNAL_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include "gc-options.h"
struct gc_common_options {
enum gc_heap_size_policy heap_size_policy;
size_t heap_size;
size_t maximum_heap_size;
double heap_size_multiplier;
double heap_expansiveness;
int parallelism;
};
GC_INTERNAL void gc_init_common_options(struct gc_common_options *options);
GC_INTERNAL int gc_common_option_from_string(const char *str);
GC_INTERNAL int gc_common_options_set_int(struct gc_common_options *options,
int option, int value);
GC_INTERNAL int gc_common_options_set_size(struct gc_common_options *options,
int option, size_t value);
GC_INTERNAL int gc_common_options_set_double(struct gc_common_options *options,
int option, double value);
GC_INTERNAL int gc_common_options_parse_and_set(struct gc_common_options *options,
int option, const char *value);
#endif // GC_OPTIONS_INTERNAL_H

View file

@ -0,0 +1,198 @@
#include <limits.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#define GC_IMPL 1
#include "gc-options-internal.h"
#include "gc-platform.h"
// M(UPPER, lower, repr, type, parser, default, min, max)
#define FOR_EACH_INT_GC_OPTION(M) \
M(HEAP_SIZE_POLICY, heap_size_policy, "heap-size-policy", \
int, heap_size_policy, GC_HEAP_SIZE_FIXED, GC_HEAP_SIZE_FIXED, \
GC_HEAP_SIZE_ADAPTIVE) \
M(PARALLELISM, parallelism, "parallelism", \
int, int, default_parallelism(), 1, 64)
#define FOR_EACH_SIZE_GC_OPTION(M) \
M(HEAP_SIZE, heap_size, "heap-size", \
size, size, 6 * 1024 * 1024, 0, -1) \
M(MAXIMUM_HEAP_SIZE, maximum_heap_size, "maximum-heap-size", \
size, size, 0, 0, -1)
#define FOR_EACH_DOUBLE_GC_OPTION(M) \
M(HEAP_SIZE_MULTIPLIER, heap_size_multiplier, "heap-size-multiplier", \
double, double, 1.75, 1.0, 1e6) \
M(HEAP_EXPANSIVENESS, heap_expansiveness, "heap-expansiveness", \
double, double, 1.0, 0.0, 50.0)
typedef int gc_option_int;
typedef size_t gc_option_size;
typedef double gc_option_double;
#define FOR_EACH_COMMON_GC_OPTION(M) \
FOR_EACH_INT_GC_OPTION(M) \
FOR_EACH_SIZE_GC_OPTION(M) \
FOR_EACH_DOUBLE_GC_OPTION(M)
static int clamp_int(int n, int lo, int hi) {
return n < lo ? lo : n > hi ? hi : n;
}
static size_t clamp_size(size_t n, size_t lo, size_t hi) {
return n < lo ? lo : n > hi ? hi : n;
}
static double clamp_double(double n, double lo, double hi) {
return n < lo ? lo : n > hi ? hi : n;
}
static int default_parallelism(void) {
return clamp_int(gc_platform_processor_count(), 1, 8);
}
void gc_init_common_options(struct gc_common_options *options) {
#define INIT(UPPER, lower, repr, type, parser, default, min, max) \
options->lower = default;
FOR_EACH_COMMON_GC_OPTION(INIT)
#undef INIT
}
int gc_common_option_from_string(const char *str) {
#define GET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \
if (strcmp(str, repr) == 0) return GC_OPTION_##UPPER;
FOR_EACH_COMMON_GC_OPTION(GET_OPTION)
#undef GET_OPTION
return -1;
}
#define SET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \
case GC_OPTION_##UPPER: \
if (value != clamp_##type(value, min, max)) return 0; \
options->lower = value; \
return 1;
#define DEFINE_SETTER(STEM, stem, type) \
int gc_common_options_set_##stem(struct gc_common_options *options, \
int option, type value) { \
switch (option) { \
FOR_EACH_##STEM##_GC_OPTION(SET_OPTION) \
default: return 0; \
} \
}
DEFINE_SETTER(INT, int, int)
DEFINE_SETTER(SIZE, size, size_t)
DEFINE_SETTER(DOUBLE, double, double)
#undef SET_OPTION
#undef DEFINE_SETTER
static int parse_size(const char *arg, size_t *val) {
char *end;
long i = strtol(arg, &end, 0);
if (i < 0 || i == LONG_MAX) return 0;
if (end == arg) return 0;
char delim = *end;
if (delim == 'k' || delim == 'K')
++end, i *= 1024L;
else if (delim == 'm' || delim == 'M')
++end, i *= 1024L * 1024L;
else if (delim == 'g' || delim == 'G')
++end, i *= 1024L * 1024L * 1024L;
else if (delim == 't' || delim == 'T')
++end, i *= 1024L * 1024L * 1024L * 1024L;
if (*end != '\0') return 0;
*val = i;
return 1;
}
static int parse_int(const char *arg, int *val) {
char *end;
long i = strtol(arg, &end, 0);
if (i == LONG_MIN || i == LONG_MAX || end == arg || *end)
return 0;
*val = i;
return 1;
}
static int parse_heap_size_policy(const char *arg, int *val) {
if (strcmp(arg, "fixed") == 0) {
*val = GC_HEAP_SIZE_FIXED;
return 1;
}
if (strcmp(arg, "growable") == 0) {
*val = GC_HEAP_SIZE_GROWABLE;
return 1;
}
if (strcmp(arg, "adaptive") == 0) {
*val = GC_HEAP_SIZE_ADAPTIVE;
return 1;
}
return parse_int(arg, val);
}
static int parse_double(const char *arg, double *val) {
char *end;
double d = strtod(arg, &end);
if (end == arg || *end)
return 0;
*val = d;
return 1;
}
int gc_common_options_parse_and_set(struct gc_common_options *options,
int option, const char *value) {
switch (option) {
#define SET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \
case GC_OPTION_##UPPER: { \
gc_option_##type v; \
if (!parse_##parser(value, &v)) return 0; \
return gc_common_options_set_##type(options, option, v); \
}
FOR_EACH_COMMON_GC_OPTION(SET_OPTION)
default: return 0;
}
}
static int is_lower(char c) { return 'a' <= c && c <= 'z'; }
static int is_digit(char c) { return '0' <= c && c <= '9'; }
static int is_option(char c) { return is_lower(c) || c == '-'; }
static int is_option_end(char c) { return c == '='; }
static int is_value(char c) {
return is_lower(c) || is_digit(c) || c == '-' || c == '+' || c == '.';
}
static int is_value_end(char c) { return c == '\0' || c == ','; }
static char* read_token(char *p, int (*is_tok)(char c), int (*is_end)(char c),
char *delim) {
char c;
for (c = *p; is_tok(c); c = *++p);
if (!is_end(c)) return NULL;
*delim = c;
*p = '\0';
return p + 1;
}
int gc_options_parse_and_set_many(struct gc_options *options,
const char *str) {
if (!*str) return 1;
char *copy = strdup(str);
char *cur = copy;
int ret = 0;
while (1) {
char delim;
char *next = read_token(cur, is_option, is_option_end, &delim);
if (!next) break;
int option = gc_option_from_string(cur);
if (option < 0) break;
cur = next;
next = read_token(cur, is_value, is_value_end, &delim);
if (!next) break;
if (!gc_options_parse_and_set(options, option, cur)) break;
cur = next;
if (delim == '\0') {
ret = 1;
break;
}
}
free(copy);
return ret;
}

View file

@ -0,0 +1,211 @@
// For pthread_getattr_np.
#define _GNU_SOURCE
#include <errno.h>
#include <link.h>
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <sys/mman.h>
#include <time.h>
#include <unistd.h>
#define GC_IMPL 1
#include "debug.h"
#include "gc-align.h"
#include "gc-assert.h"
#include "gc-inline.h"
#include "gc-platform.h"
void gc_platform_init(void) {
// Nothing to do.
}
static uintptr_t fallback_current_thread_stack_base(void) GC_NEVER_INLINE;
static uintptr_t fallback_current_thread_stack_base(void) {
// Sloppily assume that there are very few frames between us and the
// thread entry or main function, and that therefore we haven't
// consumed more than a page of stack; we can then just round up the
// stack pointer to the page boundary.
fprintf(stderr,
"Using fallback strategy to capture stack base for thread %p.\n",
(void*)pthread_self());
int local;
uintptr_t hot = (uintptr_t)&local;
size_t page_size = getpagesize();
return (hot + page_size) & ~(page_size - 1);
}
uintptr_t gc_platform_current_thread_stack_base(void) {
pthread_t me = pthread_self();
pthread_attr_t attr;
int err = pthread_getattr_np(me, &attr);
if (err) {
errno = err;
// This case can occur for the main thread when running in a
// filesystem without /proc/stat.
perror("Failed to capture stack base via pthread_getattr_np");
return fallback_current_thread_stack_base();
}
void *stack_low_addr;
size_t stack_size;
err = pthread_attr_getstack(&attr, &stack_low_addr, &stack_size);
pthread_attr_destroy(&attr);
if (err) {
// Should never occur.
errno = err;
perror("pthread_attr_getstack");
return fallback_current_thread_stack_base();
}
return (uintptr_t)stack_low_addr + stack_size;
}
struct visit_data {
void (*f)(uintptr_t start, uintptr_t end, struct gc_heap *heap, void *data);
struct gc_heap *heap;
void *data;
};
static int visit_roots(struct dl_phdr_info *info, size_t size, void *data) {
struct visit_data *visit_data = data;
uintptr_t object_addr = info->dlpi_addr;
const char *object_name = info->dlpi_name;
const ElfW(Phdr) *program_headers = info->dlpi_phdr;
size_t program_headers_count = info->dlpi_phnum;
// From the loader's perspective, an ELF image is broken up into
// "segments", each of which is described by a "program header".
// Treat all writable data segments as potential edges into the
// GC-managed heap.
//
// Note that there are some RELRO segments which are initially
// writable but then remapped read-only. BDW-GC will exclude these,
// but we just punt for the time being and treat them as roots
for (size_t i = 0; i < program_headers_count; i++) {
const ElfW(Phdr) *p = &program_headers[i];
if (p->p_type == PT_LOAD && (p->p_flags & PF_W)) {
uintptr_t start = p->p_vaddr + object_addr;
uintptr_t end = start + p->p_memsz;
DEBUG("found roots for '%s': [%p,%p)\n", object_name,
(void*)start, (void*)end);
visit_data->f(start, end, visit_data->heap, visit_data->data);
}
}
return 0;
}
void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start,
uintptr_t end,
struct gc_heap*,
void *data),
struct gc_heap *heap,
void *data) {
struct visit_data visit_data = { f, heap, data };
dl_iterate_phdr(visit_roots, &visit_data);
}
int gc_platform_processor_count(void) {
cpu_set_t set;
if (sched_getaffinity(0, sizeof (set), &set) != 0)
return 1;
return CPU_COUNT(&set);
}
uint64_t gc_platform_monotonic_nanoseconds(void) {
struct timespec ts;
if (clock_gettime(CLOCK_MONOTONIC, &ts))
GC_CRASH();
uint64_t s = ts.tv_sec;
uint64_t ns = ts.tv_nsec;
uint64_t ns_per_sec = 1000000000;
return s * ns_per_sec + ns;
}
size_t gc_platform_page_size(void) {
return getpagesize();
}
struct gc_reservation gc_platform_reserve_memory(size_t size,
size_t alignment) {
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
GC_ASSERT_EQ(alignment & (alignment - 1), 0);
GC_ASSERT_EQ(alignment, align_down(alignment, getpagesize()));
size_t extent = size + alignment;
void *mem = mmap(NULL, extent, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("failed to reserve address space");
GC_CRASH();
}
uintptr_t base = (uintptr_t) mem;
uintptr_t end = base + extent;
uintptr_t aligned_base = alignment ? align_up(base, alignment) : base;
uintptr_t aligned_end = aligned_base + size;
if (aligned_base - base)
munmap((void*)base, aligned_base - base);
if (end - aligned_end)
munmap((void*)aligned_end, end - aligned_end);
return (struct gc_reservation){aligned_base, size};
}
void*
gc_platform_acquire_memory_from_reservation(struct gc_reservation reservation,
size_t offset, size_t size) {
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
GC_ASSERT(size <= reservation.size);
GC_ASSERT(offset <= reservation.size - size);
void *mem = mmap((void*)(reservation.base + offset), size,
PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED) {
perror("mmap failed");
return NULL;
}
return mem;
}
void
gc_platform_release_reservation(struct gc_reservation reservation) {
if (munmap((void*)reservation.base, reservation.size) != 0)
perror("failed to unmap memory");
}
void*
gc_platform_acquire_memory(size_t size, size_t alignment) {
struct gc_reservation reservation =
gc_platform_reserve_memory(size, alignment);
return gc_platform_acquire_memory_from_reservation(reservation, 0, size);
}
void gc_platform_release_memory(void *ptr, size_t size) {
GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize()));
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
if (munmap(ptr, size) != 0)
perror("failed to unmap memory");
}
int gc_platform_populate_memory(void *ptr, size_t size) {
GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize()));
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
if (madvise(ptr, size, MADV_WILLNEED) == 0)
return 1;
perror("failed to populate memory");
return 0;
}
int gc_platform_discard_memory(void *ptr, size_t size) {
GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize()));
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
if (madvise(ptr, size, MADV_DONTNEED) == 0)
return 1;
perror("failed to discard memory");
return 0;
}

View file

@ -0,0 +1,48 @@
#ifndef GC_PLATFORM_H
#define GC_PLATFORM_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include <stdint.h>
#include "gc-visibility.h"
struct gc_heap;
GC_INTERNAL void gc_platform_init(void);
GC_INTERNAL uintptr_t gc_platform_current_thread_stack_base(void);
GC_INTERNAL
void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start,
uintptr_t end,
struct gc_heap *heap,
void *data),
struct gc_heap *heap,
void *data);
GC_INTERNAL int gc_platform_processor_count(void);
GC_INTERNAL uint64_t gc_platform_monotonic_nanoseconds(void);
GC_INTERNAL size_t gc_platform_page_size(void);
struct gc_reservation {
uintptr_t base;
size_t size;
};
GC_INTERNAL
struct gc_reservation gc_platform_reserve_memory(size_t size, size_t alignment);
GC_INTERNAL
void*
gc_platform_acquire_memory_from_reservation(struct gc_reservation reservation,
size_t offset, size_t size);
GC_INTERNAL
void gc_platform_release_reservation(struct gc_reservation reservation);
GC_INTERNAL void* gc_platform_acquire_memory(size_t size, size_t alignment);
GC_INTERNAL void gc_platform_release_memory(void *base, size_t size);
GC_INTERNAL int gc_platform_populate_memory(void *addr, size_t size);
GC_INTERNAL int gc_platform_discard_memory(void *addr, size_t size);
#endif // GC_PLATFORM_H

View file

@ -0,0 +1,92 @@
// For pthread_getattr_np.
#define _GNU_SOURCE
#include <pthread.h>
#include <setjmp.h>
#include <stdio.h>
#include <unistd.h>
#define GC_IMPL 1
#include "debug.h"
#include "gc-align.h"
#include "gc-assert.h"
#include "gc-inline.h"
#include "gc-platform.h"
#include "gc-stack.h"
static uintptr_t current_thread_hot_stack_addr(void) {
#ifdef __GNUC__
return (uintptr_t)__builtin_frame_address(0);
#else
uintptr_t local;
return (uintptr_t)&local;
#endif
}
// FIXME: check platform stack growth direction.
#define HOTTER_THAN <=
static void capture_current_thread_hot_stack_addr(struct gc_stack_addr *addr) {
addr->addr = current_thread_hot_stack_addr();
}
static void capture_current_thread_cold_stack_addr(struct gc_stack_addr *addr) {
addr->addr = gc_platform_current_thread_stack_base();
}
void gc_stack_init(struct gc_stack *stack, struct gc_stack_addr *base) {
if (base)
stack->cold = *base;
else
capture_current_thread_cold_stack_addr(&stack->cold);
stack->hot = stack->cold;
}
void gc_stack_capture_hot(struct gc_stack *stack) {
capture_current_thread_hot_stack_addr(&stack->hot);
setjmp(stack->registers);
GC_ASSERT(stack->hot.addr HOTTER_THAN stack->cold.addr);
}
static void* call_with_stack(void* (*)(struct gc_stack_addr*, void*),
struct gc_stack_addr*, void*) GC_NEVER_INLINE;
static void* call_with_stack(void* (*f)(struct gc_stack_addr *, void *),
struct gc_stack_addr *addr, void *arg) {
return f(addr, arg);
}
void* gc_call_with_stack_addr(void* (*f)(struct gc_stack_addr *base,
void *arg),
void *arg) {
struct gc_stack_addr base;
capture_current_thread_hot_stack_addr(&base);
return call_with_stack(f, &base, arg);
}
void gc_stack_visit(struct gc_stack *stack,
void (*visit)(uintptr_t low, uintptr_t high,
struct gc_heap *heap, void *data),
struct gc_heap *heap,
void *data) {
{
uintptr_t low = (uintptr_t)stack->registers;
GC_ASSERT(low == align_down(low, sizeof(uintptr_t)));
uintptr_t high = low + sizeof(jmp_buf);
DEBUG("found mutator register roots for %p: [%p,%p)\n", stack,
(void*)low, (void*)high);
visit(low, high, heap, data);
}
if (0 HOTTER_THAN 1) {
DEBUG("found mutator stack roots for %p: [%p,%p)\n", stack,
(void*)stack->hot.addr, (void*)stack->cold.addr);
visit(align_up(stack->hot.addr, sizeof(uintptr_t)),
align_down(stack->cold.addr, sizeof(uintptr_t)),
heap, data);
} else {
DEBUG("found mutator stack roots for %p: [%p,%p)\n", stack,
(void*)stack->cold.addr, (void*)stack->hot.addr);
visit(align_up(stack->cold.addr, sizeof(uintptr_t)),
align_down(stack->hot.addr, sizeof(uintptr_t)),
heap, data);
}
}

View file

@ -0,0 +1,33 @@
#ifndef GC_STACK_H
#define GC_STACK_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include "gc-inline.h"
#include <setjmp.h>
struct gc_stack_addr {
uintptr_t addr;
};
struct gc_stack {
struct gc_stack_addr cold;
struct gc_stack_addr hot;
jmp_buf registers;
};
struct gc_heap;
GC_INTERNAL void gc_stack_init(struct gc_stack *stack,
struct gc_stack_addr *base);
GC_INTERNAL void gc_stack_capture_hot(struct gc_stack *stack);
GC_INTERNAL void gc_stack_visit(struct gc_stack *stack,
void (*visit)(uintptr_t low, uintptr_t high,
struct gc_heap *heap,
void *data),
struct gc_heap *heap,
void *data);
#endif // GC_STACK_H

View file

@ -0,0 +1,56 @@
#ifndef GC_TRACE_H
#define GC_TRACE_H
#ifndef GC_IMPL
#error internal header file, not part of API
#endif
#include "gc-config.h"
#include "gc-assert.h"
#include "gc-conservative-ref.h"
#include "gc-embedder-api.h"
static inline int gc_has_mutator_conservative_roots(void) {
return GC_CONSERVATIVE_ROOTS;
}
static inline int gc_mutator_conservative_roots_may_be_interior(void) {
return 1;
}
static inline int gc_has_global_conservative_roots(void) {
return GC_CONSERVATIVE_ROOTS;
}
static inline int gc_has_conservative_intraheap_edges(void) {
return GC_CONSERVATIVE_TRACE;
}
static inline int gc_has_conservative_roots(void) {
return gc_has_mutator_conservative_roots() ||
gc_has_global_conservative_roots();
}
enum gc_trace_kind {
GC_TRACE_PRECISELY,
GC_TRACE_NONE,
GC_TRACE_CONSERVATIVELY,
GC_TRACE_EPHEMERON,
};
struct gc_trace_plan {
enum gc_trace_kind kind;
size_t size; // For conservative tracing.
};
static inline int
gc_conservative_ref_might_be_a_heap_object(struct gc_conservative_ref ref,
int possibly_interior) {
// Assume that the minimum page size is 4096, and that the first page
// will contain no heap objects.
if (gc_conservative_ref_value(ref) < 4096)
return 0;
if (possibly_interior)
return 1;
return gc_is_valid_conservative_ref_displacement
(gc_conservative_ref_value(ref) & (sizeof(uintptr_t) - 1));
}
#endif // GC_TRACE_H

View file

@ -0,0 +1,6 @@
#include <assert.h>
#ifdef GC_TRACEPOINT_LTTNG
#define LTTNG_UST_TRACEPOINT_DEFINE
#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
#include "gc-lttng.h"
#endif // GC_TRACEPOINT_LTTNG

View file

@ -0,0 +1,59 @@
#ifndef GROWABLE_HEAP_SIZER_H
#define GROWABLE_HEAP_SIZER_H
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include "assert.h"
#include "heap-sizer.h"
// This is a simple heap-sizing algorithm that will grow the heap if it is
// smaller than a given multiplier of the live data size. It does not shrink
// the heap.
struct gc_growable_heap_sizer {
struct gc_heap *heap;
double multiplier;
pthread_mutex_t lock;
};
static void
gc_growable_heap_sizer_set_multiplier(struct gc_growable_heap_sizer *sizer,
double multiplier) {
pthread_mutex_lock(&sizer->lock);
sizer->multiplier = multiplier;
pthread_mutex_unlock(&sizer->lock);
}
static void
gc_growable_heap_sizer_on_gc(struct gc_growable_heap_sizer *sizer,
size_t heap_size, size_t live_bytes,
uint64_t pause_ns,
void (*set_heap_size)(struct gc_heap*, size_t)) {
pthread_mutex_lock(&sizer->lock);
size_t target_size = live_bytes * sizer->multiplier;
if (target_size > heap_size)
set_heap_size(sizer->heap, target_size);
pthread_mutex_unlock(&sizer->lock);
}
static struct gc_growable_heap_sizer*
gc_make_growable_heap_sizer(struct gc_heap *heap, double multiplier) {
struct gc_growable_heap_sizer *sizer;
sizer = malloc(sizeof(*sizer));
if (!sizer)
GC_CRASH();
memset(sizer, 0, sizeof(*sizer));
sizer->heap = heap;
sizer->multiplier = multiplier;
pthread_mutex_init(&sizer->lock, NULL);
return sizer;
}
static void
gc_destroy_growable_heap_sizer(struct gc_growable_heap_sizer *sizer) {
free(sizer);
}
#endif // GROWABLE_HEAP_SIZER_H

View file

@ -0,0 +1,74 @@
#ifndef HEAP_SIZER_H
#define HEAP_SIZER_H
#include "gc-api.h"
#include "gc-options-internal.h"
#include "growable-heap-sizer.h"
#include "adaptive-heap-sizer.h"
struct gc_heap_sizer {
enum gc_heap_size_policy policy;
union {
struct gc_growable_heap_sizer* growable;
struct gc_adaptive_heap_sizer* adaptive;
};
};
static struct gc_heap_sizer
gc_make_heap_sizer(struct gc_heap *heap,
const struct gc_common_options *options,
uint64_t (*get_allocation_counter_from_thread)(struct gc_heap*),
void (*set_heap_size_from_thread)(struct gc_heap*, size_t),
struct gc_background_thread *thread) {
struct gc_heap_sizer ret = { options->heap_size_policy, };
switch (options->heap_size_policy) {
case GC_HEAP_SIZE_FIXED:
break;
case GC_HEAP_SIZE_GROWABLE:
ret.growable =
gc_make_growable_heap_sizer(heap, options->heap_size_multiplier);
break;
case GC_HEAP_SIZE_ADAPTIVE:
ret.adaptive =
gc_make_adaptive_heap_sizer (heap, options->heap_expansiveness,
get_allocation_counter_from_thread,
set_heap_size_from_thread,
thread);
break;
default:
GC_CRASH();
}
return ret;
}
static void
gc_heap_sizer_on_gc(struct gc_heap_sizer sizer, size_t heap_size,
size_t live_bytes, size_t pause_ns,
void (*set_heap_size)(struct gc_heap*, size_t)) {
switch (sizer.policy) {
case GC_HEAP_SIZE_FIXED:
break;
case GC_HEAP_SIZE_GROWABLE:
gc_growable_heap_sizer_on_gc(sizer.growable, heap_size, live_bytes,
pause_ns, set_heap_size);
break;
case GC_HEAP_SIZE_ADAPTIVE:
if (sizer.adaptive->background_task_id < 0)
gc_adaptive_heap_sizer_background_task(sizer.adaptive);
gc_adaptive_heap_sizer_on_gc(sizer.adaptive, live_bytes, pause_ns,
set_heap_size);
break;
default:
GC_CRASH();
}
}
#endif // HEAP_SIZER_H

View file

@ -0,0 +1,525 @@
#ifndef LARGE_OBJECT_SPACE_H
#define LARGE_OBJECT_SPACE_H
#include <pthread.h>
#include <malloc.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "gc-assert.h"
#include "gc-ref.h"
#include "gc-conservative-ref.h"
#include "gc-trace.h"
#include "address-map.h"
#include "address-set.h"
#include "background-thread.h"
#include "freelist.h"
// A mark-sweep space with generational support.
struct gc_heap;
enum large_object_state {
LARGE_OBJECT_NURSERY = 0,
LARGE_OBJECT_MARKED_BIT = 1,
LARGE_OBJECT_MARK_TOGGLE_BIT = 2,
LARGE_OBJECT_MARK_0 = LARGE_OBJECT_MARKED_BIT,
LARGE_OBJECT_MARK_1 = LARGE_OBJECT_MARKED_BIT | LARGE_OBJECT_MARK_TOGGLE_BIT
};
struct large_object {
uintptr_t addr;
size_t size;
};
struct large_object_node;
struct large_object_live_data {
uint8_t mark;
enum gc_trace_kind trace;
};
struct large_object_dead_data {
uint8_t age;
struct large_object_node **prev;
struct large_object_node *next;
};
struct large_object_data {
uint8_t is_live;
union {
struct large_object_live_data live;
struct large_object_dead_data dead;
};
};
#define SPLAY_TREE_PREFIX large_object_
typedef struct large_object large_object_key_span;
typedef uintptr_t large_object_key;
typedef struct large_object_data large_object_value;
static inline int
large_object_compare(uintptr_t addr, struct large_object obj) {
if (addr < obj.addr) return -1;
if (addr - obj.addr < obj.size) return 0;
return 1;
}
static inline uintptr_t
large_object_span_start(struct large_object obj) {
return obj.addr;
}
#include "splay-tree.h"
DEFINE_FREELIST(large_object_freelist, sizeof(uintptr_t) * 8 - 1, 2,
struct large_object_node*);
struct large_object_space {
// Lock for object_map, quarantine, nursery, and marked.
pthread_mutex_t lock;
// Lock for object_tree.
pthread_mutex_t object_tree_lock;
// Lock for remembered_edges.
pthread_mutex_t remembered_edges_lock;
// Locking order: You must hold the space lock when taking
// object_tree_lock. Take no other lock while holding
// object_tree_lock. remembered_edges_lock is a leaf; take no locks
// when holding it.
// The value for a large_object_node's "mark" field indicating a
// marked object; always nonzero, and alternating between two values
// at every major GC.
uint8_t marked;
// Splay tree of objects, keyed by <addr, size> tuple. Useful when
// looking up object-for-address.
struct large_object_tree object_tree;
// Hash table of objects, where values are pointers to splay tree
// nodes. Useful when you have the object address and just want to
// check something about it (for example its size).
struct address_map object_map;
// In generational configurations, we collect all allocations in the
// last cycle into the nursery.
struct address_map nursery;
// Size-segregated freelist of dead objects. Allocations are first
// served from the quarantine freelist before falling back to the OS
// if needed. Collected objects spend a second or two in quarantine
// before being returned to the OS. This is an optimization to avoid
// mucking about too much with the TLB and so on.
struct large_object_freelist quarantine;
// Set of edges from lospace that may reference young objects,
// possibly in other spaces.
struct address_set remembered_edges;
size_t page_size;
size_t page_size_log2;
size_t total_pages;
size_t free_pages;
size_t live_pages_at_last_collection;
size_t pages_freed_by_last_collection;
int synchronous_release;
};
static size_t
large_object_space_npages(struct large_object_space *space, size_t bytes) {
return (bytes + space->page_size - 1) >> space->page_size_log2;
}
static size_t
large_object_space_size_at_last_collection(struct large_object_space *space) {
return space->live_pages_at_last_collection << space->page_size_log2;
}
static inline int
large_object_space_contains_with_lock(struct large_object_space *space,
struct gc_ref ref) {
return address_map_contains(&space->object_map, gc_ref_value(ref));
}
static inline int
large_object_space_contains(struct large_object_space *space,
struct gc_ref ref) {
pthread_mutex_lock(&space->lock);
int ret = large_object_space_contains_with_lock(space, ref);
pthread_mutex_unlock(&space->lock);
return ret;
}
static inline struct gc_ref
large_object_space_object_containing_edge(struct large_object_space *space,
struct gc_edge edge) {
pthread_mutex_lock(&space->object_tree_lock);
struct large_object_node *node =
large_object_tree_lookup(&space->object_tree, gc_edge_address(edge));
uintptr_t addr = (node && node->value.is_live) ? node->key.addr : 0;
pthread_mutex_unlock(&space->object_tree_lock);
return gc_ref(addr);
}
static void
large_object_space_start_gc(struct large_object_space *space, int is_minor_gc) {
// Take the space lock to prevent
// large_object_space_process_quarantine from concurrently mutating
// the object map.
pthread_mutex_lock(&space->lock);
if (!is_minor_gc) {
space->marked ^= LARGE_OBJECT_MARK_TOGGLE_BIT;
space->live_pages_at_last_collection = 0;
}
}
static inline struct gc_trace_plan
large_object_space_object_trace_plan(struct large_object_space *space,
struct gc_ref ref) {
uintptr_t node_bits =
address_map_lookup(&space->object_map, gc_ref_value(ref), 0);
GC_ASSERT(node_bits);
struct large_object_node *node = (struct large_object_node*) node_bits;
switch (node->value.live.trace) {
case GC_TRACE_PRECISELY:
return (struct gc_trace_plan){ GC_TRACE_PRECISELY, };
case GC_TRACE_NONE:
return (struct gc_trace_plan){ GC_TRACE_NONE, };
#if GC_CONSERVATIVE_TRACE
case GC_TRACE_CONSERVATIVELY: {
return (struct gc_trace_plan){ GC_TRACE_CONSERVATIVELY, node->key.size };
}
// No large ephemerons.
#endif
default:
GC_CRASH();
}
}
static uint8_t*
large_object_node_mark_loc(struct large_object_node *node) {
GC_ASSERT(node->value.is_live);
return &node->value.live.mark;
}
static uint8_t
large_object_node_get_mark(struct large_object_node *node) {
return atomic_load_explicit(large_object_node_mark_loc(node),
memory_order_acquire);
}
static struct large_object_node*
large_object_space_lookup(struct large_object_space *space, struct gc_ref ref) {
return (struct large_object_node*) address_map_lookup(&space->object_map,
gc_ref_value(ref),
0);
}
static int
large_object_space_mark(struct large_object_space *space, struct gc_ref ref) {
struct large_object_node *node = large_object_space_lookup(space, ref);
if (!node)
return 0;
GC_ASSERT(node->value.is_live);
uint8_t *loc = large_object_node_mark_loc(node);
uint8_t mark = atomic_load_explicit(loc, memory_order_relaxed);
do {
if (mark == space->marked)
return 0;
} while (!atomic_compare_exchange_weak_explicit(loc, &mark, space->marked,
memory_order_acq_rel,
memory_order_acquire));
size_t pages = node->key.size >> space->page_size_log2;
atomic_fetch_add(&space->live_pages_at_last_collection, pages);
return 1;
}
static int
large_object_space_is_marked(struct large_object_space *space,
struct gc_ref ref) {
struct large_object_node *node = large_object_space_lookup(space, ref);
if (!node)
return 0;
GC_ASSERT(node->value.is_live);
return atomic_load_explicit(large_object_node_mark_loc(node),
memory_order_acquire) == space->marked;
}
static int
large_object_space_is_survivor(struct large_object_space *space,
struct gc_ref ref) {
GC_ASSERT(large_object_space_contains(space, ref));
pthread_mutex_lock(&space->lock);
int old = large_object_space_is_marked(space, ref);
pthread_mutex_unlock(&space->lock);
return old;
}
static int
large_object_space_remember_edge(struct large_object_space *space,
struct gc_ref obj,
struct gc_edge edge) {
GC_ASSERT(large_object_space_contains(space, obj));
if (!large_object_space_is_survivor(space, obj))
return 0;
uintptr_t edge_addr = gc_edge_address(edge);
int remembered = 0;
pthread_mutex_lock(&space->remembered_edges_lock);
if (!address_set_contains(&space->remembered_edges, edge_addr)) {
address_set_add(&space->remembered_edges, edge_addr);
remembered = 1;
}
pthread_mutex_unlock(&space->remembered_edges_lock);
return remembered;
}
static void
large_object_space_forget_edge(struct large_object_space *space,
struct gc_edge edge) {
uintptr_t edge_addr = gc_edge_address(edge);
pthread_mutex_lock(&space->remembered_edges_lock);
GC_ASSERT(address_set_contains(&space->remembered_edges, edge_addr));
address_set_remove(&space->remembered_edges, edge_addr);
pthread_mutex_unlock(&space->remembered_edges_lock);
}
static void
large_object_space_clear_remembered_edges(struct large_object_space *space) {
address_set_clear(&space->remembered_edges);
}
static void
large_object_space_add_to_freelist(struct large_object_space *space,
struct large_object_node *node) {
node->value.is_live = 0;
struct large_object_dead_data *data = &node->value.dead;
memset(data, 0, sizeof(*data));
data->age = 0;
struct large_object_node **bucket =
large_object_freelist_bucket(&space->quarantine, node->key.size);
data->next = *bucket;
if (data->next)
data->next->value.dead.prev = &data->next;
data->prev = bucket;
*bucket = node;
}
static void
large_object_space_remove_from_freelist(struct large_object_space *space,
struct large_object_node *node) {
GC_ASSERT(!node->value.is_live);
struct large_object_dead_data *dead = &node->value.dead;
GC_ASSERT(dead->prev);
if (dead->next)
dead->next->value.dead.prev = dead->prev;
*dead->prev = dead->next;
dead->prev = NULL;
dead->next = NULL;
}
static void
large_object_space_sweep_one(uintptr_t addr, uintptr_t node_bits,
void *data) {
struct large_object_space *space = data;
struct large_object_node *node = (struct large_object_node*) node_bits;
if (!node->value.is_live)
return;
GC_ASSERT(node->value.is_live);
uint8_t mark = atomic_load_explicit(large_object_node_mark_loc(node),
memory_order_acquire);
if (mark != space->marked)
large_object_space_add_to_freelist(space, node);
}
static void
large_object_space_process_quarantine(void *data) {
struct large_object_space *space = data;
pthread_mutex_lock(&space->lock);
pthread_mutex_lock(&space->object_tree_lock);
for (size_t idx = 0; idx < large_object_freelist_num_size_classes(); idx++) {
struct large_object_node **link = &space->quarantine.buckets[idx];
for (struct large_object_node *node = *link; node; node = *link) {
GC_ASSERT(!node->value.is_live);
if (++node->value.dead.age < 2) {
link = &node->value.dead.next;
} else {
struct large_object obj = node->key;
large_object_space_remove_from_freelist(space, node);
address_map_remove(&space->object_map, obj.addr);
large_object_tree_remove(&space->object_tree, obj.addr);
gc_platform_release_memory((void*)obj.addr, obj.size);
}
}
}
pthread_mutex_unlock(&space->object_tree_lock);
pthread_mutex_unlock(&space->lock);
}
static void
large_object_space_finish_gc(struct large_object_space *space,
int is_minor_gc) {
if (GC_GENERATIONAL) {
address_map_for_each(is_minor_gc ? &space->nursery : &space->object_map,
large_object_space_sweep_one,
space);
address_map_clear(&space->nursery);
} else {
address_map_for_each(&space->object_map,
large_object_space_sweep_one,
space);
}
size_t free_pages =
space->total_pages - space->live_pages_at_last_collection;
space->pages_freed_by_last_collection = free_pages - space->free_pages;
space->free_pages = free_pages;
pthread_mutex_unlock(&space->lock);
if (space->synchronous_release)
large_object_space_process_quarantine(space);
}
static void
large_object_space_add_to_allocation_counter(struct large_object_space *space,
uint64_t *counter) {
size_t pages = space->total_pages - space->free_pages;
pages -= space->live_pages_at_last_collection;
*counter += pages << space->page_size_log2;
}
static inline struct gc_ref
large_object_space_mark_conservative_ref(struct large_object_space *space,
struct gc_conservative_ref ref,
int possibly_interior) {
uintptr_t addr = gc_conservative_ref_value(ref);
if (!possibly_interior) {
// Addr not aligned on page boundary? Not a large object.
// Otherwise strip the displacement to obtain the true base address.
uintptr_t displacement = addr & (space->page_size - 1);
if (!gc_is_valid_conservative_ref_displacement(displacement))
return gc_ref_null();
addr -= displacement;
}
struct large_object_node *node;
if (possibly_interior) {
pthread_mutex_lock(&space->object_tree_lock);
node = large_object_tree_lookup(&space->object_tree, addr);
pthread_mutex_unlock(&space->object_tree_lock);
} else {
node = large_object_space_lookup(space, gc_ref(addr));
}
if (node && node->value.is_live &&
large_object_space_mark(space, gc_ref(node->key.addr)))
return gc_ref(node->key.addr);
return gc_ref_null();
}
static void*
large_object_space_alloc(struct large_object_space *space, size_t npages,
enum gc_trace_kind trace) {
void *ret = NULL;
pthread_mutex_lock(&space->lock);
size_t size = npages << space->page_size_log2;
for (size_t idx = large_object_freelist_size_class(size);
idx < large_object_freelist_num_size_classes();
idx++) {
struct large_object_node *node = space->quarantine.buckets[idx];
while (node && node->key.size < size)
node = node->value.dead.next;
if (node) {
// We found a suitable hole in quarantine. Unlink it from the
// freelist.
large_object_space_remove_from_freelist(space, node);
// Mark the hole as live.
node->value.is_live = 1;
memset(&node->value.live, 0, sizeof(node->value.live));
node->value.live.mark = LARGE_OBJECT_NURSERY;
node->value.live.trace = trace;
// If the hole is actually too big, trim its tail.
if (node->key.size > size) {
struct large_object tail = {node->key.addr + size, node->key.size - size};
struct large_object_data tail_value = {0,};
node->key.size = size;
pthread_mutex_lock(&space->object_tree_lock);
struct large_object_node *tail_node =
large_object_tree_insert(&space->object_tree, tail, tail_value);
pthread_mutex_unlock(&space->object_tree_lock);
uintptr_t tail_node_bits = (uintptr_t)tail_node;
address_map_add(&space->object_map, tail_node->key.addr,
tail_node_bits);
large_object_space_add_to_freelist(space, tail_node);
}
// Add the object to the nursery.
if (GC_GENERATIONAL)
address_map_add(&space->nursery, node->key.addr, (uintptr_t)node);
space->free_pages -= npages;
ret = (void*)node->key.addr;
memset(ret, 0, size);
break;
}
}
// If we didn't find anything in the quarantine, get fresh pages from the OS.
if (!ret) {
ret = gc_platform_acquire_memory(size, 0);
if (ret) {
uintptr_t addr = (uintptr_t)ret;
struct large_object k = { addr, size };
struct large_object_data v = {0,};
v.is_live = 1;
v.live.mark = LARGE_OBJECT_NURSERY;
v.live.trace = trace;
pthread_mutex_lock(&space->object_tree_lock);
struct large_object_node *node =
large_object_tree_insert(&space->object_tree, k, v);
uintptr_t node_bits = (uintptr_t)node;
address_map_add(&space->object_map, addr, node_bits);
space->total_pages += npages;
pthread_mutex_unlock(&space->object_tree_lock);
}
}
pthread_mutex_unlock(&space->lock);
return ret;
}
static int
large_object_space_init(struct large_object_space *space,
struct gc_heap *heap,
struct gc_background_thread *thread) {
memset(space, 0, sizeof(*space));
pthread_mutex_init(&space->lock, NULL);
pthread_mutex_init(&space->object_tree_lock, NULL);
pthread_mutex_init(&space->remembered_edges_lock, NULL);
space->page_size = getpagesize();
space->page_size_log2 = __builtin_ctz(space->page_size);
space->marked = LARGE_OBJECT_MARK_0;
large_object_tree_init(&space->object_tree);
address_map_init(&space->object_map);
address_map_init(&space->nursery);
large_object_freelist_init(&space->quarantine);
address_set_init(&space->remembered_edges);
if (thread)
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START,
large_object_space_process_quarantine,
space);
else
space->synchronous_release = 1;
return 1;
}
#endif // LARGE_OBJECT_SPACE_H

View file

@ -0,0 +1,59 @@
#ifndef LOCAL_WORKLIST_H
#define LOCAL_WORKLIST_H
#include "assert.h"
#define LOCAL_WORKLIST_SIZE 1024
#define LOCAL_WORKLIST_MASK (LOCAL_WORKLIST_SIZE - 1)
#define LOCAL_WORKLIST_SHARE_AMOUNT (LOCAL_WORKLIST_SIZE * 3 / 4)
struct local_worklist {
size_t read;
size_t write;
struct gc_ref data[LOCAL_WORKLIST_SIZE];
};
static inline void
local_worklist_init(struct local_worklist *q) {
q->read = q->write = 0;
}
static inline void
local_worklist_poison(struct local_worklist *q) {
q->read = 0; q->write = LOCAL_WORKLIST_SIZE;
}
static inline size_t
local_worklist_size(struct local_worklist *q) {
return q->write - q->read;
}
static inline int
local_worklist_empty(struct local_worklist *q) {
return local_worklist_size(q) == 0;
}
static inline int
local_worklist_full(struct local_worklist *q) {
return local_worklist_size(q) >= LOCAL_WORKLIST_SIZE;
}
static inline void
local_worklist_push(struct local_worklist *q, struct gc_ref v) {
ASSERT(!local_worklist_full(q));
q->data[q->write++ & LOCAL_WORKLIST_MASK] = v;
}
static inline struct gc_ref
local_worklist_pop(struct local_worklist *q) {
ASSERT(!local_worklist_empty(q));
return q->data[q->read++ & LOCAL_WORKLIST_MASK];
}
static inline size_t
local_worklist_pop_many(struct local_worklist *q, struct gc_ref **objv,
size_t limit) {
size_t avail = local_worklist_size(q);
size_t read = q->read & LOCAL_WORKLIST_MASK;
size_t contig = LOCAL_WORKLIST_SIZE - read;
if (contig < avail) avail = contig;
if (limit < avail) avail = limit;
*objv = q->data + read;
q->read += avail;
return avail;
}
#endif // LOCAL_WORKLIST_H

1266
libguile/whippet/src/mmc.c Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,433 @@
#ifndef PARALLEL_TRACER_H
#define PARALLEL_TRACER_H
#include <pthread.h>
#include <stdatomic.h>
#include <sys/mman.h>
#include <unistd.h>
#include "assert.h"
#include "debug.h"
#include "gc-inline.h"
#include "gc-tracepoint.h"
#include "local-worklist.h"
#include "root-worklist.h"
#include "shared-worklist.h"
#include "spin.h"
#include "tracer.h"
#ifdef VERBOSE_LOGGING
#define LOG(...) fprintf (stderr, "LOG: " __VA_ARGS__)
#else
#define LOG(...) do { } while (0)
#endif
enum trace_worker_state {
TRACE_WORKER_STOPPED,
TRACE_WORKER_IDLE,
TRACE_WORKER_TRACING,
TRACE_WORKER_STOPPING,
TRACE_WORKER_DEAD
};
struct gc_heap;
struct gc_trace_worker {
struct gc_heap *heap;
struct gc_tracer *tracer;
size_t id;
size_t steal_id;
pthread_t thread;
enum trace_worker_state state;
pthread_mutex_t lock;
struct shared_worklist shared;
struct local_worklist local;
struct gc_trace_worker_data *data;
};
static inline struct gc_trace_worker_data*
gc_trace_worker_data(struct gc_trace_worker *worker) {
return worker->data;
}
#define TRACE_WORKERS_MAX_COUNT 8
struct gc_tracer {
struct gc_heap *heap;
atomic_size_t active_tracers;
size_t worker_count;
long epoch;
pthread_mutex_t lock;
pthread_cond_t cond;
int trace_roots_only;
struct root_worklist roots;
struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT];
};
static int
trace_worker_init(struct gc_trace_worker *worker, struct gc_heap *heap,
struct gc_tracer *tracer, size_t id) {
worker->heap = heap;
worker->tracer = tracer;
worker->id = id;
worker->steal_id = 0;
worker->thread = 0;
worker->state = TRACE_WORKER_STOPPED;
pthread_mutex_init(&worker->lock, NULL);
worker->data = NULL;
local_worklist_init(&worker->local);
return shared_worklist_init(&worker->shared);
}
static void trace_worker_trace(struct gc_trace_worker *worker);
static void*
trace_worker_thread(void *data) {
struct gc_trace_worker *worker = data;
struct gc_tracer *tracer = worker->tracer;
long trace_epoch = 0;
pthread_mutex_lock(&worker->lock);
while (1) {
long epoch = atomic_load_explicit(&tracer->epoch, memory_order_acquire);
if (trace_epoch != epoch) {
trace_epoch = epoch;
trace_worker_trace(worker);
}
pthread_cond_wait(&tracer->cond, &worker->lock);
}
return NULL;
}
static int
trace_worker_spawn(struct gc_trace_worker *worker) {
if (pthread_create(&worker->thread, NULL, trace_worker_thread, worker)) {
perror("spawning tracer thread failed");
return 0;
}
return 1;
}
static int
gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
size_t parallelism) {
tracer->heap = heap;
atomic_init(&tracer->active_tracers, 0);
tracer->epoch = 0;
tracer->trace_roots_only = 0;
pthread_mutex_init(&tracer->lock, NULL);
pthread_cond_init(&tracer->cond, NULL);
root_worklist_init(&tracer->roots);
size_t desired_worker_count = parallelism;
ASSERT(desired_worker_count);
if (desired_worker_count > TRACE_WORKERS_MAX_COUNT)
desired_worker_count = TRACE_WORKERS_MAX_COUNT;
if (!trace_worker_init(&tracer->workers[0], heap, tracer, 0))
return 0;
tracer->worker_count++;
for (size_t i = 1; i < desired_worker_count; i++) {
if (!trace_worker_init(&tracer->workers[i], heap, tracer, i))
break;
pthread_mutex_lock(&tracer->workers[i].lock);
if (trace_worker_spawn(&tracer->workers[i]))
tracer->worker_count++;
else
break;
}
return 1;
}
static void gc_tracer_prepare(struct gc_tracer *tracer) {
for (size_t i = 0; i < tracer->worker_count; i++)
tracer->workers[i].steal_id = (i + 1) % tracer->worker_count;
}
static void gc_tracer_release(struct gc_tracer *tracer) {
for (size_t i = 0; i < tracer->worker_count; i++)
shared_worklist_release(&tracer->workers[i].shared);
}
static inline void
gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) {
root_worklist_push(&tracer->roots, root);
}
static inline void
tracer_unpark_all_workers(struct gc_tracer *tracer) {
long old_epoch =
atomic_fetch_add_explicit(&tracer->epoch, 1, memory_order_acq_rel);
long epoch = old_epoch + 1;
DEBUG("starting trace; %zu workers; epoch=%ld\n", tracer->worker_count,
epoch);
GC_TRACEPOINT(trace_unpark_all);
pthread_cond_broadcast(&tracer->cond);
}
static inline void
tracer_maybe_unpark_workers(struct gc_tracer *tracer) {
size_t active =
atomic_load_explicit(&tracer->active_tracers, memory_order_acquire);
if (active < tracer->worker_count)
tracer_unpark_all_workers(tracer);
}
static inline void
tracer_share(struct gc_trace_worker *worker) {
LOG("tracer #%zu: sharing\n", worker->id);
GC_TRACEPOINT(trace_share);
size_t to_share = LOCAL_WORKLIST_SHARE_AMOUNT;
while (to_share) {
struct gc_ref *objv;
size_t count = local_worklist_pop_many(&worker->local, &objv, to_share);
shared_worklist_push_many(&worker->shared, objv, count);
to_share -= count;
}
tracer_maybe_unpark_workers(worker->tracer);
}
static inline void
gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) {
ASSERT(gc_ref_is_heap_object(ref));
if (local_worklist_full(&worker->local))
tracer_share(worker);
local_worklist_push(&worker->local, ref);
}
static struct gc_ref
tracer_steal_from_worker(struct gc_tracer *tracer, size_t id) {
ASSERT(id < tracer->worker_count);
return shared_worklist_steal(&tracer->workers[id].shared);
}
static int
tracer_can_steal_from_worker(struct gc_tracer *tracer, size_t id) {
ASSERT(id < tracer->worker_count);
return shared_worklist_can_steal(&tracer->workers[id].shared);
}
static struct gc_ref
trace_worker_steal_from_any(struct gc_trace_worker *worker,
struct gc_tracer *tracer) {
for (size_t i = 0; i < tracer->worker_count; i++) {
LOG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id);
struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id);
if (!gc_ref_is_null(obj)) {
LOG("tracer #%zu: stealing got %p\n", worker->id,
gc_ref_heap_object(obj));
return obj;
}
worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
}
LOG("tracer #%zu: failed to steal\n", worker->id);
return gc_ref_null();
}
static int
trace_worker_can_steal_from_any(struct gc_trace_worker *worker,
struct gc_tracer *tracer) {
LOG("tracer #%zu: checking if any worker has tasks\n", worker->id);
for (size_t i = 0; i < tracer->worker_count; i++) {
int res = tracer_can_steal_from_worker(tracer, worker->steal_id);
if (res) {
LOG("tracer #%zu: worker #%zu has tasks!\n", worker->id,
worker->steal_id);
return 1;
}
worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
}
LOG("tracer #%zu: nothing to steal\n", worker->id);
return 0;
}
static size_t
trace_worker_should_continue(struct gc_trace_worker *worker, size_t spin_count) {
// Helper workers should park themselves immediately if they have no work.
if (worker->id != 0)
return 0;
struct gc_tracer *tracer = worker->tracer;
if (atomic_load_explicit(&tracer->active_tracers, memory_order_acquire) != 1) {
LOG("checking for termination: tracers active, spinning #%zu\n", spin_count);
yield_for_spin(spin_count);
return 1;
}
// All trace workers have exited except us, the main worker. We are
// probably done, but we need to synchronize to be sure that there is no
// work pending, for example if a worker had a spurious wakeup. Skip
// worker 0 (the main worker).
GC_TRACEPOINT(trace_check_termination_begin);
size_t locked = 1;
while (locked < tracer->worker_count) {
if (pthread_mutex_trylock(&tracer->workers[locked].lock) == 0)
locked++;
else
break;
}
int done = (locked == tracer->worker_count) &&
!trace_worker_can_steal_from_any(worker, tracer);
GC_TRACEPOINT(trace_check_termination_end);
if (done)
return 0;
while (locked > 1)
pthread_mutex_unlock(&tracer->workers[--locked].lock);
LOG("checking for termination: failed to lock, spinning #%zu\n", spin_count);
yield_for_spin(spin_count);
return 1;
}
static struct gc_ref
trace_worker_steal(struct gc_trace_worker *worker) {
struct gc_tracer *tracer = worker->tracer;
// It could be that the worker's local trace queue has simply
// overflowed. In that case avoid contention by trying to pop
// something from the worker's own queue.
{
LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id);
struct gc_ref obj = shared_worklist_try_pop(&worker->shared);
if (!gc_ref_is_null(obj))
return obj;
}
GC_TRACEPOINT(trace_steal);
LOG("tracer #%zu: trying to steal\n", worker->id);
struct gc_ref obj = trace_worker_steal_from_any(worker, tracer);
if (!gc_ref_is_null(obj))
return obj;
return gc_ref_null();
}
static void
trace_with_data(struct gc_tracer *tracer,
struct gc_heap *heap,
struct gc_trace_worker *worker,
struct gc_trace_worker_data *data) {
atomic_fetch_add_explicit(&tracer->active_tracers, 1, memory_order_acq_rel);
worker->data = data;
LOG("tracer #%zu: running trace loop\n", worker->id);
{
LOG("tracer #%zu: tracing roots\n", worker->id);
size_t n = 0;
do {
struct gc_root root = root_worklist_pop(&tracer->roots);
if (root.kind == GC_ROOT_KIND_NONE)
break;
trace_root(root, heap, worker);
n++;
} while (1);
LOG("tracer #%zu: done tracing roots, %zu roots traced\n", worker->id, n);
}
if (tracer->trace_roots_only) {
// Unlike the full trace where work is generated during the trace, a
// roots-only trace consumes work monotonically; any object enqueued as a
// result of marking roots isn't ours to deal with. However we do need to
// synchronize with remote workers to ensure they have completed their
// work items.
if (worker->id == 0) {
for (size_t i = 1; i < tracer->worker_count; i++)
pthread_mutex_lock(&tracer->workers[i].lock);
}
} else {
LOG("tracer #%zu: tracing objects\n", worker->id);
GC_TRACEPOINT(trace_objects_begin);
size_t n = 0;
size_t spin_count = 0;
do {
while (1) {
struct gc_ref ref;
if (!local_worklist_empty(&worker->local)) {
ref = local_worklist_pop(&worker->local);
} else {
ref = trace_worker_steal(worker);
if (gc_ref_is_null(ref))
break;
}
trace_one(ref, heap, worker);
n++;
}
} while (trace_worker_should_continue(worker, spin_count++));
GC_TRACEPOINT(trace_objects_end);
LOG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n);
}
worker->data = NULL;
atomic_fetch_sub_explicit(&tracer->active_tracers, 1, memory_order_acq_rel);
}
static void
trace_worker_trace(struct gc_trace_worker *worker) {
GC_TRACEPOINT(trace_worker_begin);
gc_trace_worker_call_with_data(trace_with_data, worker->tracer,
worker->heap, worker);
GC_TRACEPOINT(trace_worker_end);
}
static inline int
gc_tracer_should_parallelize(struct gc_tracer *tracer) {
if (root_worklist_size(&tracer->roots) > 1)
return 1;
if (tracer->trace_roots_only)
return 0;
size_t nonempty_worklists = 0;
ssize_t parallel_threshold =
LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
for (size_t i = 0; i < tracer->worker_count; i++) {
ssize_t size = shared_worklist_size(&tracer->workers[i].shared);
if (!size)
continue;
nonempty_worklists++;
if (nonempty_worklists > 1)
return 1;
if (size >= parallel_threshold)
return 1;
}
return 0;
}
static inline void
gc_tracer_trace(struct gc_tracer *tracer) {
LOG("starting trace; %zu workers\n", tracer->worker_count);
for (int i = 1; i < tracer->worker_count; i++)
pthread_mutex_unlock(&tracer->workers[i].lock);
if (gc_tracer_should_parallelize(tracer)) {
LOG("waking workers\n");
tracer_unpark_all_workers(tracer);
} else {
LOG("starting in local-only mode\n");
}
trace_worker_trace(&tracer->workers[0]);
root_worklist_reset(&tracer->roots);
LOG("trace finished\n");
}
static inline void
gc_tracer_trace_roots(struct gc_tracer *tracer) {
LOG("starting roots-only trace\n");
GC_TRACEPOINT(trace_roots_begin);
tracer->trace_roots_only = 1;
gc_tracer_trace(tracer);
tracer->trace_roots_only = 0;
GC_TRACEPOINT(trace_roots_end);
GC_ASSERT_EQ(atomic_load(&tracer->active_tracers), 0);
LOG("roots-only trace finished\n");
}
#endif // PARALLEL_TRACER_H

1340
libguile/whippet/src/pcc.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,76 @@
#ifndef ROOT_WORKLIST_H
#define ROOT_WORKLIST_H
#include <stdatomic.h>
#include <sys/mman.h>
#include <unistd.h>
#include "assert.h"
#include "debug.h"
#include "gc-inline.h"
#include "gc-ref.h"
#include "root.h"
// A single-producer, multiple-consumer worklist that has two phases:
// one in which roots are added by the producer, then one in which roots
// are consumed from the worklist. Roots are never added once the
// consumer phase starts.
struct root_worklist {
size_t size;
size_t read;
size_t write;
struct gc_root *buf;
};
void
root_worklist_alloc(struct root_worklist *q) {
q->buf = realloc(q->buf, q->size * sizeof(struct gc_root));
if (!q->buf) {
perror("Failed to grow root worklist");
GC_CRASH();
}
}
static void
root_worklist_init(struct root_worklist *q) {
q->size = 16;
q->read = 0;
q->write = 0;
q->buf = NULL;
root_worklist_alloc(q);
}
static inline void
root_worklist_push(struct root_worklist *q, struct gc_root root) {
if (UNLIKELY(q->write == q->size)) {
q->size *= 2;
root_worklist_alloc(q);
}
q->buf[q->write++] = root;
}
// Not atomic.
static inline size_t
root_worklist_size(struct root_worklist *q) {
return q->write - q->read;
}
static inline struct gc_root
root_worklist_pop(struct root_worklist *q) {
size_t idx = atomic_fetch_add(&q->read, 1);
if (idx < q->write)
return q->buf[idx];
return (struct gc_root){ GC_ROOT_KIND_NONE, };
}
static void
root_worklist_reset(struct root_worklist *q) {
q->read = q->write = 0;
}
static void
root_worklist_destroy(struct root_worklist *q) {
free(q->buf);
}
#endif // ROOT_WORKLIST_H

View file

@ -0,0 +1,81 @@
#ifndef ROOT_H
#define ROOT_H
#include "gc-edge.h"
#include "extents.h"
struct gc_ephemeron;
struct gc_heap;
struct gc_mutator;
struct gc_edge_buffer;
enum gc_root_kind {
GC_ROOT_KIND_NONE,
GC_ROOT_KIND_HEAP,
GC_ROOT_KIND_MUTATOR,
GC_ROOT_KIND_CONSERVATIVE_EDGES,
GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES,
GC_ROOT_KIND_RESOLVED_EPHEMERONS,
GC_ROOT_KIND_EDGE,
GC_ROOT_KIND_EDGE_BUFFER,
};
struct gc_root {
enum gc_root_kind kind;
union {
struct gc_heap *heap;
struct gc_mutator *mutator;
struct gc_ephemeron *resolved_ephemerons;
struct extent_range range;
struct gc_edge edge;
struct gc_edge_buffer *edge_buffer;
};
};
static inline struct gc_root
gc_root_heap(struct gc_heap* heap) {
struct gc_root ret = { GC_ROOT_KIND_HEAP };
ret.heap = heap;
return ret;
}
static inline struct gc_root
gc_root_mutator(struct gc_mutator* mutator) {
struct gc_root ret = { GC_ROOT_KIND_MUTATOR };
ret.mutator = mutator;
return ret;
}
static inline struct gc_root
gc_root_conservative_edges(uintptr_t lo_addr, uintptr_t hi_addr,
int possibly_interior) {
enum gc_root_kind kind = possibly_interior
? GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES
: GC_ROOT_KIND_CONSERVATIVE_EDGES;
struct gc_root ret = { kind };
ret.range = (struct extent_range) {lo_addr, hi_addr};
return ret;
}
static inline struct gc_root
gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) {
struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS };
ret.resolved_ephemerons = resolved;
return ret;
}
static inline struct gc_root
gc_root_edge(struct gc_edge edge) {
struct gc_root ret = { GC_ROOT_KIND_EDGE };
ret.edge = edge;
return ret;
}
static inline struct gc_root
gc_root_edge_buffer(struct gc_edge_buffer *buf) {
struct gc_root ret = { GC_ROOT_KIND_EDGE_BUFFER };
ret.edge_buffer = buf;
return ret;
}
#endif // ROOT_H

Some files were not shown because too many files have changed in this diff Show more