diff --git a/libguile/whippet/.gitignore b/libguile/whippet/.gitignore new file mode 100644 index 000000000..507595694 --- /dev/null +++ b/libguile/whippet/.gitignore @@ -0,0 +1,16 @@ +/*.o +/*.bdw +/*.semi +/*.mmc +/*.generational-mmc +/*.parallel-mmc +/*.parallel-generational-mmc +/*.stack-conservative-mmc +/*.stack-conservative-generational-mmc +/*.stack-conservative-parallel-mmc +/*.stack-conservative-parallel-generational-mmc +/*.heap-conservative-mmc +/*.heap-conservative-generational-mmc +/*.heap-conservative-parallel-mmc +/*.heap-conservative-parallel-generational-mmc +/.deps/ diff --git a/libguile/whippet/Makefile b/libguile/whippet/Makefile new file mode 100644 index 000000000..c5c91ae3e --- /dev/null +++ b/libguile/whippet/Makefile @@ -0,0 +1,141 @@ +TESTS = quads mt-gcbench ephemerons finalizers +COLLECTORS = \ + bdw \ + semi \ + \ + pcc \ + generational-pcc \ + \ + mmc \ + stack-conservative-mmc \ + heap-conservative-mmc \ + \ + parallel-mmc \ + stack-conservative-parallel-mmc \ + heap-conservative-parallel-mmc \ + \ + generational-mmc \ + stack-conservative-generational-mmc \ + heap-conservative-generational-mmc \ + \ + parallel-generational-mmc \ + stack-conservative-parallel-generational-mmc \ + heap-conservative-parallel-generational-mmc + +DEFAULT_BUILD := opt + +BUILD_CFLAGS_opt = -O2 -g -DNDEBUG +BUILD_CFLAGS_optdebug = -Og -g -DGC_DEBUG=1 +BUILD_CFLAGS_debug = -O0 -g -DGC_DEBUG=1 + +BUILD_CFLAGS = $(BUILD_CFLAGS_$(or $(BUILD),$(DEFAULT_BUILD))) + +USE_LTTNG_0 := +USE_LTTNG_1 := 1 +USE_LTTNG := $(shell pkg-config --exists lttng-ust && echo 1 || echo 0) +LTTNG_CPPFLAGS := $(if $(USE_LTTNG_$(USE_LTTNG)), $(shell pkg-config --cflags lttng-ust),) +LTTNG_LIBS := $(if $(USE_LTTNG_$(USE_LTTNG)), $(shell pkg-config --libs lttng-ust),) +TRACEPOINT_CPPFLAGS = $(if $(USE_LTTNG_$(USE_LTTNG)),$(LTTNG_CPPFLAGS) -DGC_TRACEPOINT_LTTNG=1,) +TRACEPOINT_LIBS = $(LTTNG_LIBS) + +CC = gcc +CFLAGS = -Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(BUILD_CFLAGS) +CPPFLAGS = -Iapi $(TRACEPOINT_CPPFLAGS) +LDFLAGS = -lpthread -flto=auto $(TRACEPOINT_LIBS) +DEPFLAGS = -MMD -MP -MF $(@:obj/%.o=.deps/%.d) +COMPILE = $(CC) $(CFLAGS) $(CPPFLAGS) $(DEPFLAGS) -o $@ +LINK = $(CC) $(LDFLAGS) -o $@ +PLATFORM = gnu-linux + +ALL_TESTS = $(foreach COLLECTOR,$(COLLECTORS),$(addsuffix .$(COLLECTOR),$(TESTS))) + +all: $(ALL_TESTS:%=bin/%) +.deps obj bin: ; mkdir -p $@ + +include $(wildcard .deps/*) + +obj/gc-platform.o: src/gc-platform-$(PLATFORM).c | .deps obj + $(COMPILE) -c $< +obj/gc-stack.o: src/gc-stack.c | .deps obj + $(COMPILE) -c $< +obj/gc-options.o: src/gc-options.c | .deps obj + $(COMPILE) -c $< +obj/gc-tracepoint.o: src/gc-tracepoint.c | .deps obj + $(COMPILE) -c $< +obj/%.gc-ephemeron.o: src/gc-ephemeron.c | .deps obj + $(COMPILE) -include benchmarks/$*-embedder.h -c $< +obj/%.gc-finalizer.o: src/gc-finalizer.c | .deps obj + $(COMPILE) -include benchmarks/$*-embedder.h -c $< + +GC_STEM_bdw = bdw +GC_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 +GC_IMPL_CFLAGS_bdw = `pkg-config --cflags bdw-gc` +GC_LIBS_bdw = `pkg-config --libs bdw-gc` + +GC_STEM_semi = semi +GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +GC_LIBS_semi = -lm + +GC_STEM_pcc = pcc +GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 +GC_LIBS_pcc = -lm + +GC_STEM_generational_pcc = $(GC_STEM_pcc) +GC_CFLAGS_generational_pcc = $(GC_CFLAGS_pcc) -DGC_GENERATIONAL=1 +GC_LIBS_generational_pcc = $(GC_LIBS_pcc) + +define mmc_variant +GC_STEM_$(1) = mmc +GC_CFLAGS_$(1) = $(2) +GC_LIBS_$(1) = -lm +endef + +define generational_mmc_variants +$(call mmc_variant,$(1)mmc,$(2)) +$(call mmc_variant,$(1)generational_mmc,$(2) -DGC_GENERATIONAL=1) +endef + +define parallel_mmc_variants +$(call generational_mmc_variants,$(1),$(2)) +$(call generational_mmc_variants,$(1)parallel_,$(2) -DGC_PARALLEL=1) +endef + +define trace_mmc_variants +$(call parallel_mmc_variants,,-DGC_PRECISE_ROOTS=1) +$(call parallel_mmc_variants,stack_conservative_,-DGC_CONSERVATIVE_ROOTS=1) +$(call parallel_mmc_variants,heap_conservative_,-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1) +endef + +$(eval $(call trace_mmc_variants)) + +# $(1) is the benchmark, $(2) is the collector configuration +make_gc_var = $$($(1)$(subst -,_,$(2))) +gc_impl = $(call make_gc_var,GC_STEM_,$(1)).c +gc_attrs = $(call make_gc_var,GC_STEM_,$(1))-attrs.h +gc_cflags = $(call make_gc_var,GC_CFLAGS_,$(1)) +gc_impl_cflags = $(call make_gc_var,GC_IMPL_CFLAGS_,$(1)) +gc_libs = $(call make_gc_var,GC_LIBS_,$(1)) +define benchmark_template +obj/$(1).$(2).gc.o: src/$(call gc_impl,$(2)) | .deps obj + $$(COMPILE) $(call gc_cflags,$(2)) $(call gc_impl_cflags,$(2)) -include benchmarks/$(1)-embedder.h -c $$< +obj/$(1).$(2).o: benchmarks/$(1).c | .deps obj + $$(COMPILE) $(call gc_cflags,$(2)) -include api/$(call gc_attrs,$(2)) -c $$< +bin/$(1).$(2): obj/$(1).$(2).gc.o obj/$(1).$(2).o obj/gc-stack.o obj/gc-options.o obj/gc-platform.o obj/gc-tracepoint.o obj/$(1).gc-ephemeron.o obj/$(1).gc-finalizer.o | bin + $$(LINK) $$^ $(call gc_libs,$(2)) +endef + +$(foreach BENCHMARK,$(TESTS),\ + $(foreach COLLECTOR,$(COLLECTORS),\ + $(eval $(call benchmark_template,$(BENCHMARK),$(COLLECTOR))))) + +.PRECIOUS: $(ALL_TESTS) $(OBJS) + +clean: + rm -f $(ALL_TESTS) + rm -rf .deps obj bin + +# Clear some of the default rules. +.SUFFIXES: +.SECONDARY: +%.c:; +Makefile:; diff --git a/libguile/whippet/README.md b/libguile/whippet/README.md new file mode 100644 index 000000000..9ef9e3cc9 --- /dev/null +++ b/libguile/whippet/README.md @@ -0,0 +1,91 @@ +# Whippet Garbage Collector + +This repository is for development of Whippet, a new garbage collector +implementation, eventually for use in [Guile +Scheme](https://gnu.org/s/guile). + +Whippet is an embed-only C library, designed to be copied into a +program's source tree. It exposes an abstract C API for managed memory +allocation, and provides a number of implementations of that API. + +## Documentation + +See the [documentation](./doc/README.md). + +## Features + + - Per-object pinning (with `mmc` collectors) + - Finalization (supporting resuscitation) + - Ephemerons (except on `bdw`, which has a polyfill) + - Conservative roots (optionally with `mmc` or always with `bdw`) + - Precise roots (optionally with `mmc` or always with `semi` / `pcc`) + - Precise embedder-parameterized heap tracing (except with `bdw`) + - Conservative heap tracing (optionally with `mmc`, always with `bdw`) + - Parallel tracing (except `semi`) + - Parallel mutators (except `semi`) + - Inline allocation / write barrier fast paths (supporting JIT) + - One unified API with no-overhead abstraction: switch collectors when + you like + - Three policies for sizing heaps: fixed, proportional to live size, and + [MemBalancer](http://marisa.moe/balancer.html) + +## Source repository structure + + * [api/](./api/): The user-facing API. Also, the "embedder API"; see + the [manual](./doc/manual.md) for more. + * [doc/](./doc/): Documentation, such as it is. + * [src/](./src/): The actual GC implementation, containing a number of + collector implementations. The embedder chooses which collector to + use at compile-time. See the [documentation](./doc/collectors.md) + for more on the different collectors (`semi`, `bdw`, `pcc`, and the + different flavors of `mmc`). + * [benchmarks/](./benchmarks/): Benchmarks. A work in progress. + * [test/](./test/): A dusty attic of minimal testing. + +## Status and roadmap + +As of January 2025, Whippet is good to go! Of course there will surely +be new features to build as Whippet gets integrated it into language +run-times, but the basics are there. + +The next phase on the roadmap is support for tracing, and +some performance noodling. + +Once that is done, the big task is integrating Whippet into the [Guile +Scheme](https://gnu.org/s/guile) language run-time, replacing BDW-GC. +Fingers crossed! + +## About the name + +It sounds better than WIP (work-in-progress) garbage collector, doesn't +it? Also apparently a whippet is a kind of dog that is fast for its +size. It would be nice if the Whippet collectors turn out to have this +property. + +## License + +``` +Copyright (c) 2022-2024 Andy Wingo + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +``` + +Note that some benchmarks have other licenses; see +[`benchmarks/README.md`](./benchmarks/README.md) for more. diff --git a/libguile/whippet/api/bdw-attrs.h b/libguile/whippet/api/bdw-attrs.h new file mode 100644 index 000000000..7f8000b3f --- /dev/null +++ b/libguile/whippet/api/bdw-attrs.h @@ -0,0 +1,91 @@ +#ifndef BDW_ATTRS_H +#define BDW_ATTRS_H + +#include "gc-attrs.h" +#include "gc-assert.h" + +static inline enum gc_allocator_kind gc_allocator_kind(void) { + return GC_ALLOCATOR_INLINE_FREELIST; +} +static inline size_t gc_allocator_small_granule_size(void) { + return 2 * sizeof(void *); +} +static inline size_t gc_allocator_large_threshold(void) { + return 256; +} + +static inline size_t gc_allocator_allocation_pointer_offset(void) { + GC_CRASH(); +} +static inline size_t gc_allocator_allocation_limit_offset(void) { + GC_CRASH(); +} + +static inline size_t gc_allocator_freelist_offset(size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size); + size_t base; + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + base = 0; + break; + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + case GC_ALLOCATION_TAGGED_POINTERLESS: + base = (sizeof(void*) * gc_allocator_large_threshold() / + gc_allocator_small_granule_size()); + break; + } + size_t bucket = (size - 1) / gc_allocator_small_granule_size(); + return base + sizeof(void*) * bucket; +} + +static inline size_t gc_allocator_alloc_table_alignment(void) { + return 0; +} +static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind) { + GC_CRASH(); +} +static inline uint8_t gc_allocator_alloc_table_end_pattern(void) { + GC_CRASH(); +} + +static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t) { + return GC_OLD_GENERATION_CHECK_NONE; +} +static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) { + GC_CRASH(); +} +static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) { + GC_CRASH(); +} + +static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t) { + return GC_WRITE_BARRIER_NONE; +} +static inline size_t gc_write_barrier_field_table_alignment(void) { + GC_CRASH(); +} +static inline ptrdiff_t gc_write_barrier_field_table_offset(void) { + GC_CRASH(); +} +static inline size_t gc_write_barrier_field_fields_per_byte(void) { + GC_CRASH(); +} +static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) { + GC_CRASH(); +} + +static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) { + return GC_SAFEPOINT_MECHANISM_SIGNAL; +} + +static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) { + return GC_COOPERATIVE_SAFEPOINT_NONE; +} + +static inline int gc_can_pin_objects(void) { + return 1; +} + +#endif // BDW_ATTRS_H diff --git a/libguile/whippet/api/gc-allocation-kind.h b/libguile/whippet/api/gc-allocation-kind.h new file mode 100644 index 000000000..72de3b6be --- /dev/null +++ b/libguile/whippet/api/gc-allocation-kind.h @@ -0,0 +1,19 @@ +#ifndef GC_ALLOCATION_KIND_H +#define GC_ALLOCATION_KIND_H + +enum gc_allocation_kind { + // An object whose type can be inspected at run-time based on its contents, + // and whose fields be traced via the gc_trace_object procedure. + GC_ALLOCATION_TAGGED, + // Like GC_ALLOCATION_TAGGED, but not containing any fields that reference + // GC-managed objects. The GC may choose to handle these specially. + GC_ALLOCATION_TAGGED_POINTERLESS, + // A raw allocation whose type cannot be inspected at trace-time, and whose + // fields should be traced conservatively. + GC_ALLOCATION_UNTAGGED_CONSERVATIVE, + // A raw allocation whose type cannot be inspected at trace-time, but + // containing no fields that reference GC-managed objects. + GC_ALLOCATION_UNTAGGED_POINTERLESS +}; + +#endif // GC_ALLOCATION_KIND_H diff --git a/libguile/whippet/api/gc-api.h b/libguile/whippet/api/gc-api.h new file mode 100644 index 000000000..78d8b2bdb --- /dev/null +++ b/libguile/whippet/api/gc-api.h @@ -0,0 +1,301 @@ +#ifndef GC_API_H_ +#define GC_API_H_ + +#include "gc-config.h" +#include "gc-allocation-kind.h" +#include "gc-assert.h" +#include "gc-attrs.h" +#include "gc-collection-kind.h" +#include "gc-edge.h" +#include "gc-event-listener.h" +#include "gc-inline.h" +#include "gc-options.h" +#include "gc-ref.h" +#include "gc-visibility.h" + +#include +#include +#include + +struct gc_heap; +struct gc_mutator; + +struct gc_stack_addr; +GC_API_ void* gc_call_with_stack_addr(void* (*f)(struct gc_stack_addr *, + void *), + void *data) GC_NEVER_INLINE; + +GC_API_ int gc_init(const struct gc_options *options, + struct gc_stack_addr *base, struct gc_heap **heap, + struct gc_mutator **mutator, + struct gc_event_listener event_listener, + void *event_listener_data); + +GC_API_ uint64_t gc_allocation_counter(struct gc_heap *heap); + +GC_API_ struct gc_heap* gc_mutator_heap(struct gc_mutator *mut); + +GC_API_ uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap); +GC_API_ uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap); + +struct gc_mutator_roots; +GC_API_ void gc_mutator_set_roots(struct gc_mutator *mut, + struct gc_mutator_roots *roots); + +struct gc_heap_roots; +GC_API_ void gc_heap_set_roots(struct gc_heap *heap, + struct gc_heap_roots *roots); + +struct gc_extern_space; +GC_API_ void gc_heap_set_extern_space(struct gc_heap *heap, + struct gc_extern_space *space); + +GC_API_ struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *base, + struct gc_heap *heap); +GC_API_ void gc_finish_for_thread(struct gc_mutator *mut); +GC_API_ void* gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*), + void *data) GC_NEVER_INLINE; + +GC_API_ void gc_collect(struct gc_mutator *mut, + enum gc_collection_kind requested_kind); + +static inline void gc_update_alloc_table(struct gc_ref obj, size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline void gc_update_alloc_table(struct gc_ref obj, size_t size, + enum gc_allocation_kind kind) { + size_t alignment = gc_allocator_alloc_table_alignment(); + if (!alignment) return; + + uintptr_t addr = gc_ref_value(obj); + uintptr_t base = addr & ~(alignment - 1); + size_t granule_size = gc_allocator_small_granule_size(); + uintptr_t granule = (addr & (alignment - 1)) / granule_size; + uint8_t *alloc = (uint8_t*)(base + granule); + + uint8_t begin_pattern = gc_allocator_alloc_table_begin_pattern(kind); + uint8_t end_pattern = gc_allocator_alloc_table_end_pattern(); + if (end_pattern) { + size_t granules = size / granule_size; + if (granules == 1) { + alloc[0] = begin_pattern | end_pattern; + } else { + alloc[0] = begin_pattern; + if (granules > 2) + memset(alloc + 1, 0, granules - 2); + alloc[granules - 1] = end_pattern; + } + } else { + alloc[0] = begin_pattern; + } +} + +GC_API_ void* gc_allocate_slow(struct gc_mutator *mut, size_t bytes, + enum gc_allocation_kind kind) GC_NEVER_INLINE; + +static inline void* +gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline void* gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut, + size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size <= gc_allocator_large_threshold()); + + size_t granule_size = gc_allocator_small_granule_size(); + size_t hp_offset = gc_allocator_allocation_pointer_offset(); + size_t limit_offset = gc_allocator_allocation_limit_offset(); + + uintptr_t base_addr = (uintptr_t)mut; + uintptr_t *hp_loc = (uintptr_t*)(base_addr + hp_offset); + uintptr_t *limit_loc = (uintptr_t*)(base_addr + limit_offset); + + size = (size + granule_size - 1) & ~(granule_size - 1); + uintptr_t hp = *hp_loc; + uintptr_t limit = *limit_loc; + uintptr_t new_hp = hp + size; + + if (GC_UNLIKELY (new_hp > limit)) + return NULL; + + *hp_loc = new_hp; + + gc_update_alloc_table(gc_ref(hp), size, kind); + + return (void*)hp; +} + +static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut, + size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut, + size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size <= gc_allocator_large_threshold()); + + size_t freelist_offset = gc_allocator_freelist_offset(size, kind); + uintptr_t base_addr = (uintptr_t)mut; + void **freelist_loc = (void**)(base_addr + freelist_offset); + + void *head = *freelist_loc; + if (GC_UNLIKELY(!head)) + return NULL; + + *freelist_loc = *(void**)head; + + gc_update_alloc_table(gc_ref_from_heap_object(head), size, kind); + + return head; +} + +static inline void* gc_allocate_small_fast(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline void* gc_allocate_small_fast(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size != 0); + GC_ASSERT(size <= gc_allocator_large_threshold()); + + switch (gc_allocator_kind()) { + case GC_ALLOCATOR_INLINE_BUMP_POINTER: + return gc_allocate_small_fast_bump_pointer(mut, size, kind); + case GC_ALLOCATOR_INLINE_FREELIST: + return gc_allocate_small_fast_freelist(mut, size, kind); + case GC_ALLOCATOR_INLINE_NONE: + return NULL; + default: + GC_CRASH(); + } +} + +static inline void* gc_allocate_fast(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline void* gc_allocate_fast(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size != 0); + if (size > gc_allocator_large_threshold()) + return NULL; + + return gc_allocate_small_fast(mut, size, kind); +} + +static inline void* gc_allocate(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline void* gc_allocate(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + void *ret = gc_allocate_fast(mut, size, kind); + if (GC_LIKELY(ret != NULL)) + return ret; + + return gc_allocate_slow(mut, size, kind); +} + +GC_API_ int gc_object_is_old_generation_slow(struct gc_mutator *mut, + struct gc_ref obj) GC_NEVER_INLINE; + +static inline int gc_object_is_old_generation(struct gc_mutator *mut, + struct gc_ref obj, + size_t obj_size) GC_ALWAYS_INLINE; +static inline int gc_object_is_old_generation(struct gc_mutator *mut, + struct gc_ref obj, + size_t obj_size) { + switch (gc_old_generation_check_kind(obj_size)) { + case GC_OLD_GENERATION_CHECK_ALLOC_TABLE: { + size_t alignment = gc_allocator_alloc_table_alignment(); + GC_ASSERT(alignment); + uintptr_t addr = gc_ref_value(obj); + uintptr_t base = addr & ~(alignment - 1); + size_t granule_size = gc_allocator_small_granule_size(); + uintptr_t granule = (addr & (alignment - 1)) / granule_size; + uint8_t *byte_loc = (uint8_t*)(base + granule); + uint8_t byte = atomic_load_explicit(byte_loc, memory_order_relaxed); + uint8_t mask = gc_old_generation_check_alloc_table_tag_mask(); + uint8_t young = gc_old_generation_check_alloc_table_young_tag(); + return (byte & mask) != young; + } + case GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY: { + struct gc_heap *heap = gc_mutator_heap(mut); + // Note that these addresses are fixed and that the embedder might + // want to store them somewhere or inline them into the output of + // JIT-generated code. They may also be power-of-two aligned. + uintptr_t low_addr = gc_small_object_nursery_low_address(heap); + uintptr_t high_addr = gc_small_object_nursery_high_address(heap); + uintptr_t size = high_addr - low_addr; + uintptr_t addr = gc_ref_value(obj); + return addr - low_addr >= size; + } + case GC_OLD_GENERATION_CHECK_SLOW: + return gc_object_is_old_generation_slow(mut, obj); + default: + GC_CRASH(); + } +} + +GC_API_ void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) GC_NEVER_INLINE; + +static inline int gc_write_barrier_fast(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) GC_ALWAYS_INLINE; +static inline int gc_write_barrier_fast(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) { + switch (gc_write_barrier_kind(obj_size)) { + case GC_WRITE_BARRIER_NONE: + return 0; + case GC_WRITE_BARRIER_FIELD: { + if (!gc_object_is_old_generation(mut, obj, obj_size)) + return 0; + + size_t field_table_alignment = gc_write_barrier_field_table_alignment(); + size_t fields_per_byte = gc_write_barrier_field_fields_per_byte(); + uint8_t first_bit_pattern = gc_write_barrier_field_first_bit_pattern(); + ssize_t table_offset = gc_write_barrier_field_table_offset(); + + uintptr_t addr = gc_edge_address(edge); + uintptr_t base = addr & ~(field_table_alignment - 1); + uintptr_t field = (addr & (field_table_alignment - 1)) / sizeof(uintptr_t); + uintptr_t log_byte = field / fields_per_byte; + uint8_t log_bit = first_bit_pattern << (field % fields_per_byte); + uint8_t *byte_loc = (uint8_t*)(base + table_offset + log_byte); + uint8_t byte = atomic_load_explicit(byte_loc, memory_order_relaxed); + return !(byte & log_bit); + } + case GC_WRITE_BARRIER_SLOW: + return 1; + default: + GC_CRASH(); + } +} + +static inline void gc_write_barrier(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) GC_ALWAYS_INLINE; +static inline void gc_write_barrier(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) { + if (GC_UNLIKELY(gc_write_barrier_fast(mut, obj, obj_size, edge, new_val))) + gc_write_barrier_slow(mut, obj, obj_size, edge, new_val); +} + +GC_API_ void gc_pin_object(struct gc_mutator *mut, struct gc_ref obj); + +GC_API_ void gc_safepoint_slow(struct gc_mutator *mut) GC_NEVER_INLINE; +GC_API_ int* gc_safepoint_flag_loc(struct gc_mutator *mut); +static inline int gc_should_stop_for_safepoint(struct gc_mutator *mut) { + switch (gc_cooperative_safepoint_kind()) { + case GC_COOPERATIVE_SAFEPOINT_NONE: + return 0; + case GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG: + case GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG: { + return atomic_load_explicit(gc_safepoint_flag_loc(mut), + memory_order_relaxed); + } + default: + GC_CRASH(); + } +} +static inline void gc_safepoint(struct gc_mutator *mut) { + if (GC_UNLIKELY(gc_should_stop_for_safepoint(mut))) + gc_safepoint_slow(mut); +} + +#endif // GC_API_H_ diff --git a/libguile/whippet/api/gc-assert.h b/libguile/whippet/api/gc-assert.h new file mode 100644 index 000000000..c3fa6b749 --- /dev/null +++ b/libguile/whippet/api/gc-assert.h @@ -0,0 +1,21 @@ +#ifndef GC_ASSERT_H +#define GC_ASSERT_H + +#include "gc-config.h" + +#define GC_UNLIKELY(e) __builtin_expect(e, 0) +#define GC_LIKELY(e) __builtin_expect(e, 1) + +#define GC_CRASH() __builtin_trap() + +#if GC_DEBUG +#define GC_ASSERT(x) do { if (GC_UNLIKELY(!(x))) GC_CRASH(); } while (0) +#define GC_UNREACHABLE() GC_CRASH() +#else +#define GC_ASSERT(x) do { } while (0) +#define GC_UNREACHABLE() __builtin_unreachable() +#endif + +#define GC_ASSERT_EQ(a, b) GC_ASSERT((a) == (b)) + +#endif // GC_ASSERT_H diff --git a/libguile/whippet/api/gc-attrs.h b/libguile/whippet/api/gc-attrs.h new file mode 100644 index 000000000..44d5d47e6 --- /dev/null +++ b/libguile/whippet/api/gc-attrs.h @@ -0,0 +1,69 @@ +#ifndef GC_ATTRS_H +#define GC_ATTRS_H + +#include "gc-inline.h" +#include "gc-allocation-kind.h" + +#include +#include + +enum gc_allocator_kind { + GC_ALLOCATOR_INLINE_BUMP_POINTER, + GC_ALLOCATOR_INLINE_FREELIST, + GC_ALLOCATOR_INLINE_NONE +}; + +static inline enum gc_allocator_kind gc_allocator_kind(void) GC_ALWAYS_INLINE; +static inline size_t gc_allocator_large_threshold(void) GC_ALWAYS_INLINE; +static inline size_t gc_allocator_small_granule_size(void) GC_ALWAYS_INLINE; + +static inline size_t gc_allocator_allocation_pointer_offset(void) GC_ALWAYS_INLINE; +static inline size_t gc_allocator_allocation_limit_offset(void) GC_ALWAYS_INLINE; + +static inline size_t gc_allocator_freelist_offset(size_t size, + enum gc_allocation_kind kind) GC_ALWAYS_INLINE; + +static inline size_t gc_allocator_alloc_table_alignment(void) GC_ALWAYS_INLINE; +static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) GC_ALWAYS_INLINE; +static inline uint8_t gc_allocator_alloc_table_end_pattern(void) GC_ALWAYS_INLINE; + +enum gc_old_generation_check_kind { + GC_OLD_GENERATION_CHECK_NONE, + GC_OLD_GENERATION_CHECK_ALLOC_TABLE, + GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY, + GC_OLD_GENERATION_CHECK_SLOW +}; + +static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) GC_ALWAYS_INLINE; + +static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) GC_ALWAYS_INLINE; +static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) GC_ALWAYS_INLINE; + +enum gc_write_barrier_kind { + GC_WRITE_BARRIER_NONE, + GC_WRITE_BARRIER_FIELD, + GC_WRITE_BARRIER_SLOW +}; + +static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) GC_ALWAYS_INLINE; +static inline size_t gc_write_barrier_field_table_alignment(void) GC_ALWAYS_INLINE; +static inline ptrdiff_t gc_write_barrier_field_table_offset(void) GC_ALWAYS_INLINE; +static inline size_t gc_write_barrier_field_fields_per_byte(void) GC_ALWAYS_INLINE; +static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) GC_ALWAYS_INLINE; + +enum gc_safepoint_mechanism { + GC_SAFEPOINT_MECHANISM_COOPERATIVE, + GC_SAFEPOINT_MECHANISM_SIGNAL, +}; +static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) GC_ALWAYS_INLINE; + +enum gc_cooperative_safepoint_kind { + GC_COOPERATIVE_SAFEPOINT_NONE, + GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG, + GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG, +}; +static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) GC_ALWAYS_INLINE; + +static inline int gc_can_pin_objects(void) GC_ALWAYS_INLINE; + +#endif // GC_ATTRS_H diff --git a/libguile/whippet/api/gc-basic-stats.h b/libguile/whippet/api/gc-basic-stats.h new file mode 100644 index 000000000..055340817 --- /dev/null +++ b/libguile/whippet/api/gc-basic-stats.h @@ -0,0 +1,177 @@ +#ifndef GC_BASIC_STATS_H +#define GC_BASIC_STATS_H + +#include "gc-event-listener.h" +#include "gc-histogram.h" + +#include +#include +#include +#include +#include +#include + +GC_DEFINE_HISTOGRAM(gc_latency, 25, 4); + +struct gc_basic_stats { + uint64_t major_collection_count; + uint64_t minor_collection_count; + uint64_t last_time_usec; + uint64_t last_cpu_time_usec; + uint64_t elapsed_mutator_usec; + uint64_t elapsed_collector_usec; + uint64_t cpu_mutator_usec; + uint64_t cpu_collector_usec; + size_t heap_size; + size_t max_heap_size; + size_t max_live_data_size; + struct gc_latency pause_times; +}; + +static inline uint64_t gc_basic_stats_now(void) { + struct timeval tv; + if (gettimeofday(&tv, NULL) != 0) GC_CRASH(); + uint64_t ret = tv.tv_sec; + ret *= 1000 * 1000; + ret += tv.tv_usec; + return ret; +} + +static inline uint64_t gc_basic_stats_cpu_time(void) { + struct timespec ts; + clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts); + uint64_t ret = ts.tv_sec; + ret *= 1000 * 1000; + ret += ts.tv_nsec / 1000; + return ret; +} + +static inline void gc_basic_stats_init(void *data, size_t heap_size) { + struct gc_basic_stats *stats = data; + memset(stats, 0, sizeof(*stats)); + stats->last_time_usec = gc_basic_stats_now(); + stats->last_cpu_time_usec = gc_basic_stats_cpu_time(); + stats->heap_size = stats->max_heap_size = heap_size; +} + +static inline void gc_basic_stats_requesting_stop(void *data) { + struct gc_basic_stats *stats = data; + uint64_t now = gc_basic_stats_now(); + uint64_t cpu_time = gc_basic_stats_cpu_time(); + stats->elapsed_mutator_usec += now - stats->last_time_usec; + stats->cpu_mutator_usec += cpu_time - stats->last_cpu_time_usec; + stats->last_time_usec = now; + stats->last_cpu_time_usec = cpu_time; +} +static inline void gc_basic_stats_waiting_for_stop(void *data) {} +static inline void gc_basic_stats_mutators_stopped(void *data) {} + +static inline void gc_basic_stats_prepare_gc(void *data, + enum gc_collection_kind kind) { + struct gc_basic_stats *stats = data; + if (kind == GC_COLLECTION_MINOR) + stats->minor_collection_count++; + else + stats->major_collection_count++; +} + +static inline void gc_basic_stats_roots_traced(void *data) {} +static inline void gc_basic_stats_heap_traced(void *data) {} +static inline void gc_basic_stats_ephemerons_traced(void *data) {} +static inline void gc_basic_stats_finalizers_traced(void *data) {} + +static inline void gc_basic_stats_restarting_mutators(void *data) { + struct gc_basic_stats *stats = data; + uint64_t now = gc_basic_stats_now(); + uint64_t cpu_time = gc_basic_stats_cpu_time(); + uint64_t pause_time = now - stats->last_time_usec; + uint64_t pause_cpu_time = cpu_time - stats->last_cpu_time_usec; + stats->elapsed_collector_usec += pause_time; + stats->cpu_collector_usec += pause_cpu_time; + gc_latency_record(&stats->pause_times, pause_time); + stats->last_time_usec = now; + stats->last_cpu_time_usec = cpu_time; +} + +static inline void* gc_basic_stats_mutator_added(void *data) { + return NULL; +} +static inline void gc_basic_stats_mutator_cause_gc(void *mutator_data) {} +static inline void gc_basic_stats_mutator_stopping(void *mutator_data) {} +static inline void gc_basic_stats_mutator_stopped(void *mutator_data) {} +static inline void gc_basic_stats_mutator_restarted(void *mutator_data) {} +static inline void gc_basic_stats_mutator_removed(void *mutator_data) {} + +static inline void gc_basic_stats_heap_resized(void *data, size_t size) { + struct gc_basic_stats *stats = data; + stats->heap_size = size; + if (size > stats->max_heap_size) + stats->max_heap_size = size; +} + +static inline void gc_basic_stats_live_data_size(void *data, size_t size) { + struct gc_basic_stats *stats = data; + if (size > stats->max_live_data_size) + stats->max_live_data_size = size; +} + +#define GC_BASIC_STATS \ + ((struct gc_event_listener) { \ + gc_basic_stats_init, \ + gc_basic_stats_requesting_stop, \ + gc_basic_stats_waiting_for_stop, \ + gc_basic_stats_mutators_stopped, \ + gc_basic_stats_prepare_gc, \ + gc_basic_stats_roots_traced, \ + gc_basic_stats_heap_traced, \ + gc_basic_stats_ephemerons_traced, \ + gc_basic_stats_finalizers_traced, \ + gc_basic_stats_restarting_mutators, \ + gc_basic_stats_mutator_added, \ + gc_basic_stats_mutator_cause_gc, \ + gc_basic_stats_mutator_stopping, \ + gc_basic_stats_mutator_stopped, \ + gc_basic_stats_mutator_restarted, \ + gc_basic_stats_mutator_removed, \ + gc_basic_stats_heap_resized, \ + gc_basic_stats_live_data_size, \ + }) + +static inline void gc_basic_stats_finish(struct gc_basic_stats *stats) { + uint64_t now = gc_basic_stats_now(); + uint64_t cpu_time = gc_basic_stats_cpu_time(); + stats->elapsed_mutator_usec += now - stats->last_time_usec; + stats->cpu_mutator_usec += cpu_time - stats->last_cpu_time_usec; + stats->last_time_usec = now; + stats->last_cpu_time_usec = cpu_time; +} + +static inline void gc_basic_stats_print(struct gc_basic_stats *stats, FILE *f) { + fprintf(f, "Completed %" PRIu64 " major collections (%" PRIu64 " minor).\n", + stats->major_collection_count, stats->minor_collection_count); + uint64_t stopped = stats->elapsed_collector_usec; + uint64_t elapsed = stats->elapsed_mutator_usec + stopped; + uint64_t cpu_stopped = stats->cpu_collector_usec; + uint64_t cpu_total = stats->cpu_mutator_usec + cpu_stopped; + uint64_t ms = 1000; // per usec + fprintf(f, "%" PRIu64 ".%.3" PRIu64 " ms total time " + "(%" PRIu64 ".%.3" PRIu64 " stopped); " + "%" PRIu64 ".%.3" PRIu64 " ms CPU time " + "(%" PRIu64 ".%.3" PRIu64 " stopped).\n", + elapsed / ms, elapsed % ms, stopped / ms, stopped % ms, + cpu_total / ms, cpu_total % ms, cpu_stopped / ms, cpu_stopped % ms); + uint64_t pause_median = gc_latency_median(&stats->pause_times); + uint64_t pause_p95 = gc_latency_percentile(&stats->pause_times, 0.95); + uint64_t pause_max = gc_latency_max(&stats->pause_times); + fprintf(f, "%" PRIu64 ".%.3" PRIu64 " ms median pause time, " + "%" PRIu64 ".%.3" PRIu64 " p95, " + "%" PRIu64 ".%.3" PRIu64 " max.\n", + pause_median / ms, pause_median % ms, pause_p95 / ms, pause_p95 % ms, + pause_max / ms, pause_max % ms); + double MB = 1e6; + fprintf(f, "Heap size is %.3f MB (max %.3f MB); peak live data %.3f MB.\n", + stats->heap_size / MB, stats->max_heap_size / MB, + stats->max_live_data_size / MB); +} + +#endif // GC_BASIC_STATS_H_ diff --git a/libguile/whippet/api/gc-collection-kind.h b/libguile/whippet/api/gc-collection-kind.h new file mode 100644 index 000000000..11cfc276a --- /dev/null +++ b/libguile/whippet/api/gc-collection-kind.h @@ -0,0 +1,11 @@ +#ifndef GC_COLLECTION_KIND_H +#define GC_COLLECTION_KIND_H + +enum gc_collection_kind { + GC_COLLECTION_ANY, + GC_COLLECTION_MINOR, + GC_COLLECTION_MAJOR, + GC_COLLECTION_COMPACTING, +}; + +#endif // GC_COLLECTION_KIND_H diff --git a/libguile/whippet/api/gc-config.h b/libguile/whippet/api/gc-config.h new file mode 100644 index 000000000..867af63d2 --- /dev/null +++ b/libguile/whippet/api/gc-config.h @@ -0,0 +1,40 @@ +#ifndef GC_CONFIG_H +#define GC_CONFIG_H + +#ifndef GC_DEBUG +#define GC_DEBUG 0 +#endif + +#ifndef GC_HAS_IMMEDIATES +#define GC_HAS_IMMEDIATES 1 +#endif + +#ifndef GC_PARALLEL +#define GC_PARALLEL 0 +#endif + +#ifndef GC_GENERATIONAL +#define GC_GENERATIONAL 0 +#endif + +// Though you normally wouldn't configure things this way, it's possible +// to have both precise and conservative roots. However we have to +// either have precise or conservative tracing; not a mix. + +#ifndef GC_PRECISE_ROOTS +#define GC_PRECISE_ROOTS 0 +#endif + +#ifndef GC_CONSERVATIVE_ROOTS +#define GC_CONSERVATIVE_ROOTS 0 +#endif + +#ifndef GC_CONSERVATIVE_TRACE +#define GC_CONSERVATIVE_TRACE 0 +#endif + +#ifndef GC_CONCURRENT_TRACE +#define GC_CONCURRENT_TRACE 0 +#endif + +#endif // GC_CONFIG_H diff --git a/libguile/whippet/api/gc-conservative-ref.h b/libguile/whippet/api/gc-conservative-ref.h new file mode 100644 index 000000000..a2b260384 --- /dev/null +++ b/libguile/whippet/api/gc-conservative-ref.h @@ -0,0 +1,17 @@ +#ifndef GC_CONSERVATIVE_REF_H +#define GC_CONSERVATIVE_REF_H + +#include + +struct gc_conservative_ref { + uintptr_t value; +}; + +static inline struct gc_conservative_ref gc_conservative_ref(uintptr_t value) { + return (struct gc_conservative_ref){value}; +} +static inline uintptr_t gc_conservative_ref_value(struct gc_conservative_ref ref) { + return ref.value; +} + +#endif // GC_CONSERVATIVE_REF_H diff --git a/libguile/whippet/api/gc-edge.h b/libguile/whippet/api/gc-edge.h new file mode 100644 index 000000000..ec487df9d --- /dev/null +++ b/libguile/whippet/api/gc-edge.h @@ -0,0 +1,26 @@ +#ifndef GC_EDGE_H +#define GC_EDGE_H + +#include "gc-ref.h" + +struct gc_edge { + struct gc_ref *dst; +}; + +static inline struct gc_edge gc_edge(void* addr) { + return (struct gc_edge){addr}; +} +static inline struct gc_ref gc_edge_ref(struct gc_edge edge) { + return *edge.dst; +} +static inline struct gc_ref* gc_edge_loc(struct gc_edge edge) { + return edge.dst; +} +static inline uintptr_t gc_edge_address(struct gc_edge edge) { + return (uintptr_t)gc_edge_loc(edge); +} +static inline void gc_edge_update(struct gc_edge edge, struct gc_ref ref) { + *edge.dst = ref; +} + +#endif // GC_EDGE_H diff --git a/libguile/whippet/api/gc-embedder-api.h b/libguile/whippet/api/gc-embedder-api.h new file mode 100644 index 000000000..c1b272a51 --- /dev/null +++ b/libguile/whippet/api/gc-embedder-api.h @@ -0,0 +1,67 @@ +#ifndef GC_EMBEDDER_API_H +#define GC_EMBEDDER_API_H + +#include + +#include "gc-config.h" +#include "gc-edge.h" +#include "gc-inline.h" +#include "gc-forwarding.h" + +#ifndef GC_EMBEDDER_API +#define GC_EMBEDDER_API static +#endif + +struct gc_mutator_roots; +struct gc_heap_roots; +struct gc_atomic_forward; +struct gc_heap; +struct gc_extern_space; + +GC_EMBEDDER_API inline int gc_is_valid_conservative_ref_displacement(uintptr_t displacement); +GC_EMBEDDER_API inline size_t gc_finalizer_priority_count(void); + +GC_EMBEDDER_API inline int gc_extern_space_visit(struct gc_extern_space *space, + struct gc_edge edge, + struct gc_ref ref) GC_ALWAYS_INLINE; +GC_EMBEDDER_API inline void gc_extern_space_start_gc(struct gc_extern_space *space, + int is_minor_gc); +GC_EMBEDDER_API inline void gc_extern_space_finish_gc(struct gc_extern_space *space, + int is_minor_gc); + +GC_EMBEDDER_API inline void gc_trace_object(struct gc_ref ref, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data, + size_t *size) GC_ALWAYS_INLINE; + +GC_EMBEDDER_API inline void gc_trace_mutator_roots(struct gc_mutator_roots *roots, + void (*trace_edge)(struct gc_edge edge, + struct gc_heap *heap, + void *trace_data), + struct gc_heap *heap, + void *trace_data); +GC_EMBEDDER_API inline void gc_trace_heap_roots(struct gc_heap_roots *roots, + void (*trace_edge)(struct gc_edge edge, + struct gc_heap *heap, + void *trace_data), + struct gc_heap *heap, + void *trace_data); + +GC_EMBEDDER_API inline uintptr_t gc_object_forwarded_nonatomic(struct gc_ref ref); +GC_EMBEDDER_API inline void gc_object_forward_nonatomic(struct gc_ref ref, + struct gc_ref new_ref); + +GC_EMBEDDER_API inline struct gc_atomic_forward gc_atomic_forward_begin(struct gc_ref ref); +GC_EMBEDDER_API inline void gc_atomic_forward_acquire(struct gc_atomic_forward *); +GC_EMBEDDER_API inline int gc_atomic_forward_retry_busy(struct gc_atomic_forward *); +GC_EMBEDDER_API inline void gc_atomic_forward_abort(struct gc_atomic_forward *); +GC_EMBEDDER_API inline size_t gc_atomic_forward_object_size(struct gc_atomic_forward *); +GC_EMBEDDER_API inline void gc_atomic_forward_commit(struct gc_atomic_forward *, + struct gc_ref new_ref); +GC_EMBEDDER_API inline uintptr_t gc_atomic_forward_address(struct gc_atomic_forward *); + + +#endif // GC_EMBEDDER_API_H diff --git a/libguile/whippet/api/gc-ephemeron.h b/libguile/whippet/api/gc-ephemeron.h new file mode 100644 index 000000000..1d9e59b55 --- /dev/null +++ b/libguile/whippet/api/gc-ephemeron.h @@ -0,0 +1,42 @@ +#ifndef GC_EPHEMERON_H_ +#define GC_EPHEMERON_H_ + +#include "gc-edge.h" +#include "gc-ref.h" +#include "gc-visibility.h" + +// Ephemerons establish an association between a "key" object and a +// "value" object. If the ephemeron and the key are live, then the +// value is live, and can be retrieved from the ephemeron. Ephemerons +// can be chained together, which allows them to function as links in a +// buckets-and-chains hash table. +// +// This file defines the user-facing API for ephemerons. + +struct gc_heap; +struct gc_mutator; +struct gc_ephemeron; + +GC_API_ size_t gc_ephemeron_size(void); +GC_API_ struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut); +GC_API_ void gc_ephemeron_init(struct gc_mutator *mut, + struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value); + +GC_API_ struct gc_ref gc_ephemeron_key(struct gc_ephemeron *ephemeron); +GC_API_ struct gc_ref gc_ephemeron_value(struct gc_ephemeron *ephemeron); + +GC_API_ struct gc_ephemeron* gc_ephemeron_chain_head(struct gc_ephemeron **loc); +GC_API_ void gc_ephemeron_chain_push(struct gc_ephemeron **loc, + struct gc_ephemeron *ephemeron); +GC_API_ struct gc_ephemeron* gc_ephemeron_chain_next(struct gc_ephemeron *ephemeron); +GC_API_ void gc_ephemeron_mark_dead(struct gc_ephemeron *ephemeron); + +GC_API_ void gc_trace_ephemeron(struct gc_ephemeron *ephemeron, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data); + +#endif // GC_EPHEMERON_H_ diff --git a/libguile/whippet/api/gc-event-listener-chain.h b/libguile/whippet/api/gc-event-listener-chain.h new file mode 100644 index 000000000..27b56d5c6 --- /dev/null +++ b/libguile/whippet/api/gc-event-listener-chain.h @@ -0,0 +1,145 @@ +#ifndef GC_EVENT_LISTENER_CHAIN_H +#define GC_EVENT_LISTENER_CHAIN_H + +#include "gc-event-listener.h" + +struct gc_event_listener_chain { + struct gc_event_listener head; void *head_data; + struct gc_event_listener tail; void *tail_data; +}; + +struct gc_event_listener_chain_mutator { + struct gc_event_listener_chain *chain; + void *head_mutator_data; + void *tail_mutator_data; +}; + +static inline void gc_event_listener_chain_init(void *data, size_t heap_size) { + struct gc_event_listener_chain *chain = data; + chain->head.init(chain->head_data, heap_size); + chain->tail.init(chain->tail_data, heap_size); +} + +static inline void gc_event_listener_chain_requesting_stop(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.requesting_stop(chain->head_data); + chain->tail.requesting_stop(chain->tail_data); +} +static inline void gc_event_listener_chain_waiting_for_stop(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.waiting_for_stop(chain->head_data); + chain->tail.waiting_for_stop(chain->tail_data); +} +static inline void gc_event_listener_chain_mutators_stopped(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.mutators_stopped(chain->head_data); + chain->tail.mutators_stopped(chain->tail_data); +} +static inline void +gc_event_listener_chain_prepare_gc(void *data, enum gc_collection_kind kind) { + struct gc_event_listener_chain *chain = data; + chain->head.prepare_gc(chain->head_data, kind); + chain->tail.prepare_gc(chain->tail_data, kind); +} +static inline void gc_event_listener_chain_roots_traced(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.roots_traced(chain->head_data); + chain->tail.roots_traced(chain->tail_data); +} +static inline void gc_event_listener_chain_heap_traced(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.heap_traced(chain->head_data); + chain->tail.heap_traced(chain->tail_data); +} +static inline void gc_event_listener_chain_ephemerons_traced(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.ephemerons_traced(chain->head_data); + chain->tail.ephemerons_traced(chain->tail_data); +} +static inline void gc_event_listener_chain_finalizers_traced(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.finalizers_traced(chain->head_data); + chain->tail.finalizers_traced(chain->tail_data); +} + +static inline void gc_event_listener_chain_restarting_mutators(void *data) { + struct gc_event_listener_chain *chain = data; + chain->head.restarting_mutators(chain->head_data); + chain->tail.restarting_mutators(chain->tail_data); +} + +static inline void* gc_event_listener_chain_mutator_added(void *data) { + struct gc_event_listener_chain *chain = data; + struct gc_event_listener_chain_mutator *mutator = malloc(sizeof(*mutator));; + if (!mutator) abort(); + mutator->chain = chain; + mutator->head_mutator_data = chain->head.mutator_added(chain->head_data); + mutator->tail_mutator_data = chain->tail.mutator_added(chain->tail_data); + return mutator; +} + +static inline void gc_event_listener_chain_mutator_cause_gc(void *mutator_data) { + struct gc_event_listener_chain_mutator *mutator = mutator_data; + mutator->chain->head.restarting_mutators(mutator->head_data); + mutator->chain->tail.restarting_mutators(mutator->tail_data); +} +static inline void gc_event_listener_chain_mutator_stopping(void *mutator_data) { + struct gc_event_listener_chain_mutator *mutator = mutator_data; + mutator->chain->head.mutator_stopping(mutator->head_data); + mutator->chain->tail.mutator_stopping(mutator->tail_data); +} +static inline void gc_event_listener_chain_mutator_stopped(void *mutator_data) { + struct gc_event_listener_chain_mutator *mutator = mutator_data; + mutator->chain->head.mutator_stopped(mutator->head_data); + mutator->chain->tail.mutator_stopped(mutator->tail_data); +} +static inline void gc_event_listener_chain_mutator_restarted(void *mutator_data) { + struct gc_event_listener_chain_mutator *mutator = mutator_data; + mutator->chain->head.mutator_restarted(mutator->head_data); + mutator->chain->tail.mutator_restarted(mutator->tail_data); +} +static inline void gc_event_listener_chain_mutator_removed(void *mutator_data) { + struct gc_event_listener_chain_mutator *mutator = mutator_data; + mutator->chain->head.mutator_removed(mutator->head_data); + mutator->chain->tail.mutator_removed(mutator->tail_data); + free(mutator); +} + +static inline void gc_event_listener_chain_heap_resized(void *data, size_t size) { + struct gc_event_listener_chain *chain = data; + chain->head.heap_resized(chain->head_data, size); + chain->tail.heap_resized(chain->tail_data, size); +} + +static inline void gc_event_listener_chain_live_data_size(void *data, size_t size) { + struct gc_event_listener_chain *chain = data; + chain->head.live_data_size(chain->head_data, size); + chain->tail.live_data_size(chain->tail_data, size); +} + +#define GC_EVENT_LISTENER_CHAIN \ + ((struct gc_event_listener) { \ + gc_event_listener_chain_init, \ + gc_event_listener_chain_requesting_stop, \ + gc_event_listener_chain_waiting_for_stop, \ + gc_event_listener_chain_mutators_stopped, \ + gc_event_listener_chain_prepare_gc, \ + gc_event_listener_chain_roots_traced, \ + gc_event_listener_chain_heap_traced, \ + gc_event_listener_chain_ephemerons_traced, \ + gc_event_listener_chain_finalizers_traced, \ + gc_event_listener_chain_restarting_mutators, \ + gc_event_listener_chain_mutator_added, \ + gc_event_listener_chain_mutator_cause_gc, \ + gc_event_listener_chain_mutator_stopping, \ + gc_event_listener_chain_mutator_stopped, \ + gc_event_listener_chain_mutator_restarted, \ + gc_event_listener_chain_mutator_removed, \ + gc_event_listener_chain_heap_resized, \ + gc_event_listener_chain_live_data_size, \ + }) + +#define GC_EVENT_LISTENER_CHAIN_DATA(head, head_data, tail, tail_data) \ + ((struct gc_event_listener_chain_data){head, head_data, tail, tail_data}) + +#endif // GC_EVENT_LISTENER_CHAIN_H diff --git a/libguile/whippet/api/gc-event-listener.h b/libguile/whippet/api/gc-event-listener.h new file mode 100644 index 000000000..66801a52c --- /dev/null +++ b/libguile/whippet/api/gc-event-listener.h @@ -0,0 +1,29 @@ +#ifndef GC_EVENT_LISTENER_H +#define GC_EVENT_LISTENER_H + +#include "gc-collection-kind.h" + +struct gc_event_listener { + void (*init)(void *data, size_t heap_size); + void (*requesting_stop)(void *data); + void (*waiting_for_stop)(void *data); + void (*mutators_stopped)(void *data); + void (*prepare_gc)(void *data, enum gc_collection_kind kind); + void (*roots_traced)(void *data); + void (*heap_traced)(void *data); + void (*ephemerons_traced)(void *data); + void (*finalizers_traced)(void *data); + void (*restarting_mutators)(void *data); + + void* (*mutator_added)(void *data); + void (*mutator_cause_gc)(void *mutator_data); + void (*mutator_stopping)(void *mutator_data); + void (*mutator_stopped)(void *mutator_data); + void (*mutator_restarted)(void *mutator_data); + void (*mutator_removed)(void *mutator_data); + + void (*heap_resized)(void *data, size_t size); + void (*live_data_size)(void *data, size_t size); +}; + +#endif // GC_EVENT_LISTENER_H diff --git a/libguile/whippet/api/gc-finalizer.h b/libguile/whippet/api/gc-finalizer.h new file mode 100644 index 000000000..1dcb0fb2f --- /dev/null +++ b/libguile/whippet/api/gc-finalizer.h @@ -0,0 +1,81 @@ +#ifndef GC_FINALIZER_H_ +#define GC_FINALIZER_H_ + +#include "gc-edge.h" +#include "gc-ref.h" +#include "gc-visibility.h" + +// A finalizer allows the embedder to be notified when an object becomes +// unreachable. +// +// A finalizer has a priority. When the heap is created, the embedder +// should declare how many priorities there are. Lower-numbered +// priorities take precedence; if an object has a priority-0 finalizer +// outstanding, that will prevent any finalizer at level 1 (or 2, ...) +// from firing until no priority-0 finalizer remains. +// +// Call gc_attach_finalizer to attach a finalizer to an object. +// +// A finalizer also references an associated GC-managed closure object. +// A finalizer's reference to the closure object is strong: if a +// finalizer's closure closure references its finalizable object, +// directly or indirectly, the finalizer will never fire. +// +// When an object with a finalizer becomes unreachable, it is added to a +// queue. The embedder can call gc_pop_finalizable to get the next +// finalizable object and its associated closure. At that point the +// embedder can do anything with the object, including keeping it alive. +// Ephemeron associations will still be present while the finalizable +// object is live. Note however that any objects referenced by the +// finalizable object may themselves be already finalized; finalizers +// are enqueued for objects when they become unreachable, which can +// concern whole subgraphs of objects at once. +// +// The usual way for an embedder to know when the queue of finalizable +// object is non-empty is to call gc_set_finalizer_callback to +// provide a function that will be invoked when there are pending +// finalizers. +// +// Arranging to call gc_pop_finalizable and doing something with the +// finalizable object and closure is the responsibility of the embedder. +// The embedder's finalization action can end up invoking arbitrary +// code, so unless the embedder imposes some kind of restriction on what +// finalizers can do, generally speaking finalizers should be run in a +// dedicated thread instead of recursively from within whatever mutator +// thread caused GC. Setting up such a thread is the responsibility of +// the mutator. gc_pop_finalizable is thread-safe, allowing multiple +// finalization threads if that is appropriate. +// +// gc_allocate_finalizer returns a finalizer, which is a fresh +// GC-managed heap object. The mutator should then directly attach it +// to an object using gc_finalizer_attach. When the finalizer is fired, +// it becomes available to the mutator via gc_pop_finalizable. + +struct gc_heap; +struct gc_mutator; +struct gc_finalizer; + +GC_API_ size_t gc_finalizer_size(void); +GC_API_ struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut); +GC_API_ void gc_finalizer_attach(struct gc_mutator *mut, + struct gc_finalizer *finalizer, + unsigned priority, + struct gc_ref object, struct gc_ref closure); + +GC_API_ struct gc_ref gc_finalizer_object(struct gc_finalizer *finalizer); +GC_API_ struct gc_ref gc_finalizer_closure(struct gc_finalizer *finalizer); + +GC_API_ struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut); + +typedef void (*gc_finalizer_callback)(struct gc_heap *heap, size_t count); +GC_API_ void gc_set_finalizer_callback(struct gc_heap *heap, + gc_finalizer_callback callback); + +GC_API_ void gc_trace_finalizer(struct gc_finalizer *finalizer, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data); + +#endif // GC_FINALIZER_H_ diff --git a/libguile/whippet/api/gc-forwarding.h b/libguile/whippet/api/gc-forwarding.h new file mode 100644 index 000000000..25aca3011 --- /dev/null +++ b/libguile/whippet/api/gc-forwarding.h @@ -0,0 +1,20 @@ +#ifndef GC_FORWARDING_H +#define GC_FORWARDING_H + +#include +#include "gc-ref.h" + +enum gc_forwarding_state { + GC_FORWARDING_STATE_FORWARDED, + GC_FORWARDING_STATE_BUSY, + GC_FORWARDING_STATE_ACQUIRED, + GC_FORWARDING_STATE_NOT_FORWARDED +}; + +struct gc_atomic_forward { + struct gc_ref ref; + uintptr_t data; + enum gc_forwarding_state state; +}; + +#endif // GC_FORWARDING_H diff --git a/libguile/whippet/api/gc-histogram.h b/libguile/whippet/api/gc-histogram.h new file mode 100644 index 000000000..0761a630f --- /dev/null +++ b/libguile/whippet/api/gc-histogram.h @@ -0,0 +1,82 @@ +#ifndef GC_HISTOGRAM_H +#define GC_HISTOGRAM_H + +#include "gc-assert.h" + +#include + +static inline size_t gc_histogram_bucket(uint64_t max_value_bits, + uint64_t precision, + uint64_t val) { + uint64_t major = val < (1ULL << precision) + ? 0ULL + : 64ULL - __builtin_clzl(val) - precision; + uint64_t minor = val < (1 << precision) + ? val + : (val >> (major - 1ULL)) & ((1ULL << precision) - 1ULL); + uint64_t idx = (major << precision) | minor; + if (idx >= (max_value_bits << precision)) + idx = max_value_bits << precision; + return idx; +} + +static inline uint64_t gc_histogram_bucket_min_val(uint64_t precision, + size_t idx) { + uint64_t major = idx >> precision; + uint64_t minor = idx & ((1ULL << precision) - 1ULL); + uint64_t min_val = major + ? ((1ULL << precision) | minor) << (major - 1ULL) + : minor; + return min_val; +} + +#define GC_DEFINE_HISTOGRAM(name, max_value_bits, precision) \ + struct name { uint32_t buckets[((max_value_bits) << (precision)) + 1]; }; \ + static inline size_t name##_size(void) { \ + return ((max_value_bits) << (precision)) + 1; \ + } \ + static inline uint64_t name##_bucket_min_val(size_t idx) { \ + GC_ASSERT(idx < name##_size()); \ + return gc_histogram_bucket_min_val((precision), idx); \ + } \ + static inline struct name make_##name(void) { \ + return (struct name) { { 0, }}; \ + } \ + static inline void name##_record(struct name *h, uint64_t val) { \ + h->buckets[gc_histogram_bucket((max_value_bits), (precision), val)]++; \ + } \ + static inline uint64_t name##_ref(struct name *h, size_t idx) { \ + GC_ASSERT(idx < name##_size()); \ + return h->buckets[idx]; \ + } \ + static inline uint64_t name##_min(struct name *h) { \ + for (size_t bucket = 0; bucket < name##_size(); bucket++) \ + if (h->buckets[bucket]) return name##_bucket_min_val(bucket); \ + return -1; \ + } \ + static inline uint64_t name##_max(struct name *h) { \ + if (h->buckets[name##_size()-1]) return -1LL; \ + for (ssize_t bucket = name##_size() - 1; bucket >= 0; bucket--) \ + if (h->buckets[bucket]) return name##_bucket_min_val(bucket+1); \ + return 0; \ + } \ + static inline uint64_t name##_count(struct name *h) { \ + uint64_t sum = 0; \ + for (size_t bucket = 0; bucket < name##_size(); bucket++) \ + sum += h->buckets[bucket]; \ + return sum; \ + } \ + static inline uint64_t name##_percentile(struct name *h, double p) { \ + uint64_t n = name##_count(h) * p; \ + uint64_t sum = 0; \ + for (size_t bucket = 0; bucket + 1 < name##_size(); bucket++) { \ + sum += h->buckets[bucket]; \ + if (sum >= n) return name##_bucket_min_val(bucket+1); \ + } \ + return -1ULL; \ + } \ + static inline uint64_t name##_median(struct name *h) { \ + return name##_percentile(h, 0.5); \ + } + +#endif // GC_HISTOGRAM_H diff --git a/libguile/whippet/api/gc-inline.h b/libguile/whippet/api/gc-inline.h new file mode 100644 index 000000000..30eac54f3 --- /dev/null +++ b/libguile/whippet/api/gc-inline.h @@ -0,0 +1,7 @@ +#ifndef GC_INLINE_H_ +#define GC_INLINE_H_ + +#define GC_ALWAYS_INLINE __attribute__((always_inline)) +#define GC_NEVER_INLINE __attribute__((noinline)) + +#endif // GC_INLINE_H_ diff --git a/libguile/whippet/api/gc-lttng.h b/libguile/whippet/api/gc-lttng.h new file mode 100644 index 000000000..d192be4ed --- /dev/null +++ b/libguile/whippet/api/gc-lttng.h @@ -0,0 +1,100 @@ +#define LTTNG_UST_TRACEPOINT_PROVIDER whippet + +#undef LTTNG_UST_TRACEPOINT_INCLUDE +#define LTTNG_UST_TRACEPOINT_INCLUDE "gc-lttng.h" + +#if !defined(_TP_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ) +#define _TP_H + +#include + +LTTNG_UST_TRACEPOINT_ENUM( + whippet, gc_kind, + LTTNG_UST_TP_ENUM_VALUES + (lttng_ust_field_enum_value("MINOR", 1) + lttng_ust_field_enum_value("MAJOR", 2) + lttng_ust_field_enum_value("COMPACTING", 3))) + +LTTNG_UST_TRACEPOINT_EVENT_CLASS( + whippet, tracepoint, + LTTNG_UST_TP_ARGS(), + LTTNG_UST_TP_FIELDS()) + +LTTNG_UST_TRACEPOINT_EVENT_CLASS( + whippet, size_tracepoint, + LTTNG_UST_TP_ARGS(size_t, size), + LTTNG_UST_TP_FIELDS(lttng_ust_field_integer(size_t, size, size))) + + +/* The tracepoint instances */ +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, size_tracepoint, whippet, init, + LTTNG_UST_TP_ARGS(size_t, size)) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, size_tracepoint, whippet, heap_resized, + LTTNG_UST_TP_ARGS(size_t, size)) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, size_tracepoint, whippet, live_data_size, + LTTNG_UST_TP_ARGS(size_t, size)) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, requesting_stop, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, waiting_for_stop, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutators_stopped, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT( + whippet, prepare_gc, + LTTNG_UST_TP_ARGS(int, gc_kind), + LTTNG_UST_TP_FIELDS( + lttng_ust_field_enum(whippet, gc_kind, int, gc_kind, gc_kind))) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, roots_traced, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, heap_traced, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, ephemerons_traced, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, finalizers_traced, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, restarting_mutators, LTTNG_UST_TP_ARGS()) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutator_added, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutator_cause_gc, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutator_stopping, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutator_stopped, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutator_restarted, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, mutator_removed, LTTNG_UST_TP_ARGS()) + +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_unpark_all, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_share, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_check_termination_begin, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_check_termination_end, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_steal, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_roots_begin, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_roots_end, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_objects_begin, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_objects_end, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_worker_begin, LTTNG_UST_TP_ARGS()) +LTTNG_UST_TRACEPOINT_EVENT_INSTANCE( + whippet, tracepoint, whippet, trace_worker_end, LTTNG_UST_TP_ARGS()) + +#endif /* _TP_H */ + +#include diff --git a/libguile/whippet/api/gc-null-event-listener.h b/libguile/whippet/api/gc-null-event-listener.h new file mode 100644 index 000000000..9c032ffc2 --- /dev/null +++ b/libguile/whippet/api/gc-null-event-listener.h @@ -0,0 +1,50 @@ +#ifndef GC_NULL_EVENT_LISTENER_H +#define GC_NULL_EVENT_LISTENER_H + +#include "gc-event-listener.h" + +static inline void gc_null_event_listener_init(void *data, size_t size) {} +static inline void gc_null_event_listener_requesting_stop(void *data) {} +static inline void gc_null_event_listener_waiting_for_stop(void *data) {} +static inline void gc_null_event_listener_mutators_stopped(void *data) {} +static inline void gc_null_event_listener_prepare_gc(void *data, + enum gc_collection_kind) {} +static inline void gc_null_event_listener_roots_traced(void *data) {} +static inline void gc_null_event_listener_heap_traced(void *data) {} +static inline void gc_null_event_listener_ephemerons_traced(void *data) {} +static inline void gc_null_event_listener_finalizers_traced(void *data) {} +static inline void gc_null_event_listener_restarting_mutators(void *data) {} + +static inline void* gc_null_event_listener_mutator_added(void *data) {} +static inline void gc_null_event_listener_mutator_cause_gc(void *mutator_data) {} +static inline void gc_null_event_listener_mutator_stopping(void *mutator_data) {} +static inline void gc_null_event_listener_mutator_stopped(void *mutator_data) {} +static inline void gc_null_event_listener_mutator_restarted(void *mutator_data) {} +static inline void gc_null_event_listener_mutator_removed(void *mutator_data) {} + +static inline void gc_null_event_listener_heap_resized(void *, size_t) {} +static inline void gc_null_event_listener_live_data_size(void *, size_t) {} + +#define GC_NULL_EVENT_LISTENER \ + ((struct gc_event_listener) { \ + gc_null_event_listener_init, \ + gc_null_event_listener_requesting_stop, \ + gc_null_event_listener_waiting_for_stop, \ + gc_null_event_listener_mutators_stopped, \ + gc_null_event_listener_prepare_gc, \ + gc_null_event_listener_roots_traced, \ + gc_null_event_listener_heap_traced, \ + gc_null_event_listener_ephemerons_traced, \ + gc_null_event_listener_finalizers_traced, \ + gc_null_event_listener_restarting_mutators, \ + gc_null_event_listener_mutator_added, \ + gc_null_event_listener_mutator_cause_gc, \ + gc_null_event_listener_mutator_stopping, \ + gc_null_event_listener_mutator_stopped, \ + gc_null_event_listener_mutator_restarted, \ + gc_null_event_listener_mutator_removed, \ + gc_null_event_listener_heap_resized, \ + gc_null_event_listener_live_data_size, \ + }) + +#endif // GC_NULL_EVENT_LISTENER_H_ diff --git a/libguile/whippet/api/gc-options.h b/libguile/whippet/api/gc-options.h new file mode 100644 index 000000000..2f3f7f792 --- /dev/null +++ b/libguile/whippet/api/gc-options.h @@ -0,0 +1,39 @@ +#ifndef GC_OPTIONS_H +#define GC_OPTIONS_H + +#include "gc-visibility.h" + +enum gc_heap_size_policy { + GC_HEAP_SIZE_FIXED, + GC_HEAP_SIZE_GROWABLE, + GC_HEAP_SIZE_ADAPTIVE, +}; + +enum { + GC_OPTION_HEAP_SIZE_POLICY, + GC_OPTION_HEAP_SIZE, + GC_OPTION_MAXIMUM_HEAP_SIZE, + GC_OPTION_HEAP_SIZE_MULTIPLIER, + GC_OPTION_HEAP_EXPANSIVENESS, + GC_OPTION_PARALLELISM +}; + +struct gc_options; + +GC_API_ int gc_option_from_string(const char *str); + +GC_API_ struct gc_options* gc_allocate_options(void); + +GC_API_ int gc_options_set_int(struct gc_options *options, int option, + int value); +GC_API_ int gc_options_set_size(struct gc_options *options, int option, + size_t value); +GC_API_ int gc_options_set_double(struct gc_options *options, int option, + double value); + +GC_API_ int gc_options_parse_and_set(struct gc_options *options, + int option, const char *value); +GC_API_ int gc_options_parse_and_set_many(struct gc_options *options, + const char *str); + +#endif // GC_OPTIONS_H diff --git a/libguile/whippet/api/gc-ref.h b/libguile/whippet/api/gc-ref.h new file mode 100644 index 000000000..29e1a3853 --- /dev/null +++ b/libguile/whippet/api/gc-ref.h @@ -0,0 +1,50 @@ +#ifndef GC_REF_H +#define GC_REF_H + +#include "gc-assert.h" +#include "gc-config.h" + +#include + +struct gc_ref { + uintptr_t value; +}; + +static inline struct gc_ref gc_ref(uintptr_t value) { + return (struct gc_ref){value}; +} +static inline uintptr_t gc_ref_value(struct gc_ref ref) { + return ref.value; +} + +static inline struct gc_ref gc_ref_null(void) { + return gc_ref(0); +} +static inline int gc_ref_is_null(struct gc_ref ref) { + return ref.value == 0; +} +static inline int gc_ref_is_immediate(struct gc_ref ref) { + GC_ASSERT(!gc_ref_is_null(ref)); + return GC_HAS_IMMEDIATES && (ref.value & (sizeof(void*) - 1)); +} +static inline struct gc_ref gc_ref_immediate(uintptr_t val) { + GC_ASSERT(val & (sizeof(void*) - 1)); + GC_ASSERT(GC_HAS_IMMEDIATES); + return gc_ref(val); +} +static inline int gc_ref_is_heap_object(struct gc_ref ref) { + return !gc_ref_is_immediate(ref); +} +static inline struct gc_ref gc_ref_from_heap_object_or_null(void *obj) { + return gc_ref((uintptr_t) obj); +} +static inline struct gc_ref gc_ref_from_heap_object(void *obj) { + GC_ASSERT(obj); + return gc_ref_from_heap_object_or_null(obj); +} +static inline void* gc_ref_heap_object(struct gc_ref ref) { + GC_ASSERT(gc_ref_is_heap_object(ref)); + return (void *) gc_ref_value(ref); +} + +#endif // GC_REF_H diff --git a/libguile/whippet/api/gc-tracepoint.h b/libguile/whippet/api/gc-tracepoint.h new file mode 100644 index 000000000..598d0bc44 --- /dev/null +++ b/libguile/whippet/api/gc-tracepoint.h @@ -0,0 +1,17 @@ +#ifndef GC_TRACEPOINT_H +#define GC_TRACEPOINT_H + +#ifdef GC_TRACEPOINT_LTTNG + +#include "gc-lttng.h" + +#define GC_TRACEPOINT(...) \ + lttng_ust_tracepoint(whippet, __VA_ARGS__) + +#else // GC_TRACEPOINT_LTTNG + +#define GC_TRACEPOINT(...) do {} while (0) + +#endif // GC_TRACEPOINT_LTTNG + +#endif // GC_TRACEPOINT_H diff --git a/libguile/whippet/api/gc-visibility.h b/libguile/whippet/api/gc-visibility.h new file mode 100644 index 000000000..b7e1995df --- /dev/null +++ b/libguile/whippet/api/gc-visibility.h @@ -0,0 +1,12 @@ +#ifndef GC_VISIBILITY_H_ +#define GC_VISIBILITY_H_ + +#define GC_INTERNAL __attribute__((visibility("hidden"))) +#define GC_PUBLIC __attribute__((visibility("default"))) + +// FIXME: Conflict with bdw-gc GC_API. Switch prefix? +#ifndef GC_API_ +#define GC_API_ GC_INTERNAL +#endif + +#endif // GC_VISIBILITY_H diff --git a/libguile/whippet/api/mmc-attrs.h b/libguile/whippet/api/mmc-attrs.h new file mode 100644 index 000000000..9371f8abe --- /dev/null +++ b/libguile/whippet/api/mmc-attrs.h @@ -0,0 +1,121 @@ +#ifndef MMC_ATTRS_H +#define MMC_ATTRS_H + +#include "gc-config.h" +#include "gc-assert.h" +#include "gc-attrs.h" + +static inline enum gc_allocator_kind gc_allocator_kind(void) { + return GC_ALLOCATOR_INLINE_BUMP_POINTER; +} +static inline size_t gc_allocator_small_granule_size(void) { + return 16; +} +static inline size_t gc_allocator_large_threshold(void) { + return 8192; +} + +static inline size_t gc_allocator_allocation_pointer_offset(void) { + return sizeof(uintptr_t) * 0; +} +static inline size_t gc_allocator_allocation_limit_offset(void) { + return sizeof(uintptr_t) * 1; +} + +static inline size_t gc_allocator_freelist_offset(size_t size, + enum gc_allocation_kind kind) { + GC_CRASH(); +} + +static inline size_t gc_allocator_alloc_table_alignment(void) { + return 4 * 1024 * 1024; +} +static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) { + uint8_t young = 1; + uint8_t trace_precisely = 0; + uint8_t trace_none = 8; + uint8_t trace_conservatively = 16; + uint8_t pinned = 16; + if (GC_CONSERVATIVE_TRACE) { + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + return young | trace_conservatively; + case GC_ALLOCATION_TAGGED_POINTERLESS: + return young | trace_none; + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return young | trace_none; + default: + GC_CRASH(); + }; + } else { + switch (kind) { + case GC_ALLOCATION_TAGGED: + return young | trace_precisely; + case GC_ALLOCATION_TAGGED_POINTERLESS: + return young | trace_none; + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return young | trace_none | pinned; + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + default: + GC_CRASH(); + }; + } +} +static inline uint8_t gc_allocator_alloc_table_end_pattern(void) { + return 32; +} + +static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) { + if (GC_GENERATIONAL) { + if (obj_size <= gc_allocator_large_threshold()) + return GC_OLD_GENERATION_CHECK_ALLOC_TABLE; + return GC_OLD_GENERATION_CHECK_SLOW; + } + return GC_OLD_GENERATION_CHECK_NONE; +} +static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) { + return 7; +} +static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) { + return 1; +} + +static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) { + if (GC_GENERATIONAL) { + if (obj_size <= gc_allocator_large_threshold()) + return GC_WRITE_BARRIER_FIELD; + return GC_WRITE_BARRIER_SLOW; + } + return GC_WRITE_BARRIER_NONE; +} +static inline size_t gc_write_barrier_field_table_alignment(void) { + GC_ASSERT(GC_GENERATIONAL); + return gc_allocator_alloc_table_alignment(); +} +static inline ptrdiff_t gc_write_barrier_field_table_offset(void) { + GC_ASSERT(GC_GENERATIONAL); + return 0; +} +static inline size_t gc_write_barrier_field_fields_per_byte(void) { + GC_ASSERT(GC_GENERATIONAL); + return 2; +} +static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) { + GC_ASSERT(GC_GENERATIONAL); + return 64; // NOFL_METADATA_BYTE_LOGGED_0 +} + +static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) { + return GC_SAFEPOINT_MECHANISM_COOPERATIVE; +} + +static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) { + return GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG; +} + +static inline int gc_can_pin_objects(void) { + return 1; +} + +#endif // MMC_ATTRS_H diff --git a/libguile/whippet/api/pcc-attrs.h b/libguile/whippet/api/pcc-attrs.h new file mode 100644 index 000000000..12a555a5d --- /dev/null +++ b/libguile/whippet/api/pcc-attrs.h @@ -0,0 +1,92 @@ +#ifndef PCC_ATTRS_H +#define PCC_ATTRS_H + +#include "gc-config.h" +#include "gc-assert.h" +#include "gc-attrs.h" + +static const uintptr_t GC_ALIGNMENT = 8; +static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192; + +static inline enum gc_allocator_kind gc_allocator_kind(void) { + return GC_ALLOCATOR_INLINE_BUMP_POINTER; +} +static inline size_t gc_allocator_small_granule_size(void) { + return GC_ALIGNMENT; +} +static inline size_t gc_allocator_large_threshold(void) { + return GC_LARGE_OBJECT_THRESHOLD; +} + +static inline size_t gc_allocator_allocation_pointer_offset(void) { + return sizeof(uintptr_t) * 0; +} +static inline size_t gc_allocator_allocation_limit_offset(void) { + return sizeof(uintptr_t) * 1; +} + +static inline size_t gc_allocator_freelist_offset(size_t size, enum gc_allocation_kind kind) { + GC_CRASH(); +} + +static inline size_t gc_allocator_alloc_table_alignment(void) { + return 0; +} +static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) { + GC_CRASH(); +} +static inline uint8_t gc_allocator_alloc_table_end_pattern(void) { + GC_CRASH(); +} + +static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t size) { + if (!GC_GENERATIONAL) + return GC_OLD_GENERATION_CHECK_NONE; + if (size <= gc_allocator_large_threshold()) + return GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY; + return GC_OLD_GENERATION_CHECK_SLOW; +} +static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) { + GC_CRASH(); +} +static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) { + GC_CRASH(); +} + +static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) { + if (!GC_GENERATIONAL) + return GC_WRITE_BARRIER_NONE; + if (obj_size <= gc_allocator_large_threshold()) + return GC_WRITE_BARRIER_FIELD; + return GC_WRITE_BARRIER_SLOW; +} +static inline size_t gc_write_barrier_field_table_alignment(void) { + GC_ASSERT(GC_GENERATIONAL); + return 64 * 1024 * 1024; +} +static inline ptrdiff_t gc_write_barrier_field_table_offset(void) { + GC_ASSERT(GC_GENERATIONAL); + return 128 * 1024; +} +static inline size_t gc_write_barrier_field_fields_per_byte(void) { + GC_ASSERT(GC_GENERATIONAL); + return 8; +} +static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) { + GC_ASSERT(GC_GENERATIONAL); + return 1; +} + +static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) { + return GC_SAFEPOINT_MECHANISM_COOPERATIVE; +} + +static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) { + return GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG; +} + +static inline int gc_can_pin_objects(void) { + return 0; +} + +#endif // PCC_ATTRS_H diff --git a/libguile/whippet/api/semi-attrs.h b/libguile/whippet/api/semi-attrs.h new file mode 100644 index 000000000..f2efbd831 --- /dev/null +++ b/libguile/whippet/api/semi-attrs.h @@ -0,0 +1,80 @@ +#ifndef SEMI_ATTRS_H +#define SEMI_ATTRS_H + +#include "gc-attrs.h" +#include "gc-assert.h" + +static const uintptr_t GC_ALIGNMENT = 8; +static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192; + +static inline enum gc_allocator_kind gc_allocator_kind(void) { + return GC_ALLOCATOR_INLINE_BUMP_POINTER; +} +static inline size_t gc_allocator_small_granule_size(void) { + return GC_ALIGNMENT; +} +static inline size_t gc_allocator_large_threshold(void) { + return GC_LARGE_OBJECT_THRESHOLD; +} + +static inline size_t gc_allocator_allocation_pointer_offset(void) { + return sizeof(uintptr_t) * 0; +} +static inline size_t gc_allocator_allocation_limit_offset(void) { + return sizeof(uintptr_t) * 1; +} + +static inline size_t gc_allocator_freelist_offset(size_t size, + enum gc_allocation_kind kind) { + GC_CRASH(); +} + +static inline size_t gc_allocator_alloc_table_alignment(void) { + return 0; +} +static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) { + GC_CRASH(); +} +static inline uint8_t gc_allocator_alloc_table_end_pattern(void) { + GC_CRASH(); +} + +static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t) { + return GC_OLD_GENERATION_CHECK_NONE; +} +static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) { + GC_CRASH(); +} +static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) { + GC_CRASH(); +} + +static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t) { + return GC_WRITE_BARRIER_NONE; +} +static inline size_t gc_write_barrier_field_table_alignment(void) { + GC_CRASH(); +} +static inline ptrdiff_t gc_write_barrier_field_table_offset(void) { + GC_CRASH(); +} +static inline size_t gc_write_barrier_field_fields_per_byte(void) { + GC_CRASH(); +} +static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) { + GC_CRASH(); +} + +static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) { + return GC_SAFEPOINT_MECHANISM_COOPERATIVE; +} + +static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) { + return GC_COOPERATIVE_SAFEPOINT_NONE; +} + +static inline int gc_can_pin_objects(void) { + return 0; +} + +#endif // SEMI_ATTRS_H diff --git a/libguile/whippet/benchmarks/README.md b/libguile/whippet/benchmarks/README.md new file mode 100644 index 000000000..00ec1f731 --- /dev/null +++ b/libguile/whippet/benchmarks/README.md @@ -0,0 +1,35 @@ +# Benchmarks + + - [`mt-gcbench.c`](./mt-gcbench.c): The multi-threaded [GCBench + benchmark](https://hboehm.info/gc/gc_bench.html). An old but + standard benchmark that allocates different sizes of binary trees. + As parameters it takes a heap multiplier and a number of mutator + threads. We analytically compute the peak amount of live data and + then size the GC heap as a multiplier of that size. It has a peak + heap consumption of 10 MB or so per mutator thread: not very large. + At a 2x heap multiplier, it causes about 30 collections for the `mmc` + collector, and runs somewhere around 200-400 milliseconds in + single-threaded mode, on the machines I have in 2022. For low thread + counts, the GCBench benchmark is small; but then again many Guile + processes also are quite short-lived, so perhaps it is useful to + ensure that small heaps remain lightweight. + + To stress `mmc`'s handling of fragmentation, we modified this + benchmark to intersperse pseudorandomly-sized holes between tree + nodes. + + - [`quads.c`](./quads.c): A synthetic benchmark that allocates quad + trees. The mutator begins by allocating one long-lived tree of depth + N, and then allocates 13% of the heap in depth-3 trees, 20 times, + simulating a fixed working set and otherwise an allocation-heavy + workload. By observing the times to allocate 13% of the heap in + garbage we can infer mutator overheads, and also note the variance + for the cycles in which GC hits. + +## License + +mt-gcbench.c was originally from https://hboehm.info/gc/gc_bench/, which +has a somewhat unclear license. I have modified GCBench significantly +so that I can slot in different GC implementations. Other files are +distributed under the Whippet license; see the top-level +[README.md](../README.md) for more. diff --git a/libguile/whippet/benchmarks/ephemerons-embedder.h b/libguile/whippet/benchmarks/ephemerons-embedder.h new file mode 100644 index 000000000..5b17178cd --- /dev/null +++ b/libguile/whippet/benchmarks/ephemerons-embedder.h @@ -0,0 +1,54 @@ +#ifndef EPHEMERONS_EMBEDDER_H +#define EPHEMERONS_EMBEDDER_H + +#include + +#include "ephemerons-types.h" +#include "gc-ephemeron.h" + +struct gc_heap; + +#define DEFINE_METHODS(name, Name, NAME) \ + static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \ + static inline void visit_##name##_fields(Name *obj,\ + void (*visit)(struct gc_edge edge, \ + struct gc_heap *heap, \ + void *visit_data), \ + struct gc_heap *heap, \ + void *visit_data) GC_ALWAYS_INLINE; +FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS) +#undef DEFINE_METHODS + +static inline size_t small_object_size(SmallObject *obj) { return sizeof(*obj); } +static inline size_t ephemeron_size(Ephemeron *obj) { return gc_ephemeron_size(); } +static inline size_t box_size(Box *obj) { return sizeof(*obj); } + +static inline void +visit_small_object_fields(SmallObject *obj, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data) {} + +static inline void +visit_ephemeron_fields(Ephemeron *ephemeron, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + + struct gc_heap *heap, + void *visit_data) { + gc_trace_ephemeron((struct gc_ephemeron*)ephemeron, visit, heap, visit_data); +} + +static inline void +visit_box_fields(Box *box, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data) { + visit(gc_edge(&box->obj), heap, visit_data); +} + +#include "simple-gc-embedder.h" + +#endif // EPHEMERONS_EMBEDDER_H diff --git a/libguile/whippet/benchmarks/ephemerons-types.h b/libguile/whippet/benchmarks/ephemerons-types.h new file mode 100644 index 000000000..d2a4b9a5b --- /dev/null +++ b/libguile/whippet/benchmarks/ephemerons-types.h @@ -0,0 +1,21 @@ +#ifndef EPHEMERONS_TYPES_H +#define EPHEMERONS_TYPES_H + +#define FOR_EACH_HEAP_OBJECT_KIND(M) \ + M(box, Box, BOX) \ + M(ephemeron, Ephemeron, EPHEMERON) \ + M(small_object, SmallObject, SMALL_OBJECT) + +#include "heap-objects.h" +#include "simple-tagging-scheme.h" + +struct SmallObject { + struct gc_header header; +}; + +struct Box { + struct gc_header header; + void *obj; +}; + +#endif // EPHEMERONS_TYPES_H diff --git a/libguile/whippet/benchmarks/ephemerons.c b/libguile/whippet/benchmarks/ephemerons.c new file mode 100644 index 000000000..2262bd5c9 --- /dev/null +++ b/libguile/whippet/benchmarks/ephemerons.c @@ -0,0 +1,272 @@ +#include +#include +#include +#include +#include +#include + +#include "assert.h" +#include "gc-api.h" +#include "gc-basic-stats.h" +#include "gc-ephemeron.h" +#include "simple-roots-api.h" +#include "ephemerons-types.h" +#include "simple-allocator.h" + +typedef HANDLE_TO(SmallObject) SmallObjectHandle; +typedef HANDLE_TO(struct gc_ephemeron) EphemeronHandle; +typedef HANDLE_TO(Box) BoxHandle; + +static SmallObject* allocate_small_object(struct gc_mutator *mut) { + return gc_allocate_with_kind(mut, ALLOC_KIND_SMALL_OBJECT, sizeof(SmallObject)); +} + +static Box* allocate_box(struct gc_mutator *mut) { + return gc_allocate_with_kind(mut, ALLOC_KIND_BOX, sizeof(Box)); +} + +static struct gc_ephemeron* allocate_ephemeron(struct gc_mutator *mut) { + struct gc_ephemeron *ret = gc_allocate_ephemeron(mut); + *tag_word(gc_ref_from_heap_object(ret)) = tag_live(ALLOC_KIND_EPHEMERON); + return ret; +} + +/* Get the current time in microseconds */ +static unsigned long current_time(void) +{ + struct timeval t; + if (gettimeofday(&t, NULL) == -1) + return 0; + return t.tv_sec * 1000 * 1000 + t.tv_usec; +} + +struct thread { + struct gc_mutator *mut; + struct gc_mutator_roots roots; +}; + +static void print_elapsed(const char *what, unsigned long start) { + unsigned long end = current_time(); + unsigned long msec = (end - start) / 1000; + unsigned long usec = (end - start) % 1000; + printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec); +} + +struct call_with_gc_data { + void* (*f)(struct thread *); + struct gc_heap *heap; +}; +static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) { + struct call_with_gc_data *data = arg; + struct gc_mutator *mut = gc_init_for_thread(addr, data->heap); + struct thread t = { mut, }; + gc_mutator_set_roots(mut, &t.roots); + void *ret = data->f(&t); + gc_finish_for_thread(mut); + return ret; +} +static void* call_with_gc(void* (*f)(struct thread *), + struct gc_heap *heap) { + struct call_with_gc_data data = { f, heap }; + return gc_call_with_stack_addr(call_with_gc_inner, &data); +} + +#define CHECK(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "%s:%d: check failed: %s\n", __FILE__, __LINE__, #x); \ + exit(1); \ + } \ + } while (0) + +#define CHECK_EQ(x, y) CHECK((x) == (y)) +#define CHECK_NE(x, y) CHECK((x) != (y)) +#define CHECK_NULL(x) CHECK_EQ(x, NULL) +#define CHECK_NOT_NULL(x) CHECK_NE(x, NULL) + +static size_t ephemeron_chain_length(struct gc_ephemeron **loc, + SmallObject *key) { + struct gc_ephemeron *head = gc_ephemeron_chain_head(loc); + size_t len = 0; + while (head) { + CHECK_EQ(key, (SmallObject*)gc_ref_value(gc_ephemeron_key(head))); + Box *value = gc_ref_heap_object(gc_ephemeron_value(head)); + CHECK_NOT_NULL(value); + key = value->obj; + CHECK_NOT_NULL(key); + head = gc_ephemeron_chain_next(head); + len++; + } + return len; +} + +static double heap_size; +static double heap_multiplier; +static size_t nthreads; + +static void cause_gc(struct gc_mutator *mut) { + // Doing a full collection lets us reason precisely about liveness. + gc_collect(mut, GC_COLLECTION_MAJOR); +} + +static void make_ephemeron_chain(struct thread *t, EphemeronHandle *head, + SmallObjectHandle *head_key, size_t length) { + BoxHandle tail_box = { NULL }; + PUSH_HANDLE(t, tail_box); + + CHECK_NULL(HANDLE_REF(*head_key)); + HANDLE_SET(*head_key, allocate_small_object(t->mut)); + + for (size_t i = 0; i < length; i++) { + HANDLE_SET(tail_box, allocate_box(t->mut)); + HANDLE_REF(tail_box)->obj = HANDLE_REF(*head_key); + HANDLE_SET(*head_key, allocate_small_object(t->mut)); + struct gc_ephemeron *ephemeron = allocate_ephemeron(t->mut); + gc_ephemeron_init(t->mut, ephemeron, + gc_ref_from_heap_object(HANDLE_REF(*head_key)), + gc_ref_from_heap_object(HANDLE_REF(tail_box))); + gc_ephemeron_chain_push(HANDLE_LOC(*head), ephemeron); + } + + POP_HANDLE(t); +} + +static void* run_one_test(struct thread *t) { + size_t unit_size = gc_ephemeron_size() + sizeof(Box); + size_t list_length = heap_size / nthreads / heap_multiplier / unit_size; + + printf("Allocating ephemeron list %zu nodes long. Total size %.3fGB.\n", + list_length, list_length * unit_size / 1e9); + + unsigned long thread_start = current_time(); + + SmallObjectHandle head_key = { NULL }; + EphemeronHandle head = { NULL }; + + PUSH_HANDLE(t, head_key); + PUSH_HANDLE(t, head); + + make_ephemeron_chain(t, &head, &head_key, list_length); + + size_t measured_length = ephemeron_chain_length(HANDLE_LOC(head), + HANDLE_REF(head_key)); + CHECK_EQ(measured_length, list_length); + + cause_gc(t->mut); + measured_length = ephemeron_chain_length(HANDLE_LOC(head), + HANDLE_REF(head_key)); + CHECK_EQ(measured_length, list_length); + + if (!GC_CONSERVATIVE_ROOTS) { + HANDLE_SET(head_key, NULL); + cause_gc(t->mut); + measured_length = ephemeron_chain_length(HANDLE_LOC(head), + HANDLE_REF(head_key)); + CHECK_EQ(measured_length, 0); + } + + // swap head_key for a key halfway in, cause gc + // check length is expected half-length; warn, or error if precise + // clear and return + + print_elapsed("thread", thread_start); + + POP_HANDLE(t); + POP_HANDLE(t); + + return NULL; +} + +static void* run_one_test_in_thread(void *arg) { + struct gc_heap *heap = arg; + return call_with_gc(run_one_test, heap); +} + +struct join_data { int status; pthread_t thread; }; +static void *join_thread(void *data) { + struct join_data *join_data = data; + void *ret; + join_data->status = pthread_join(join_data->thread, &ret); + return ret; +} + +#define MAX_THREAD_COUNT 256 + +int main(int argc, char *argv[]) { + if (argc < 4 || 5 < argc) { + fprintf(stderr, "usage: %s HEAP_SIZE MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]); + return 1; + } + + heap_size = atof(argv[1]); + heap_multiplier = atof(argv[2]); + nthreads = atol(argv[3]); + + if (heap_size < 8192) { + fprintf(stderr, + "Heap size should probably be at least 8192, right? '%s'\n", + argv[1]); + return 1; + } + if (!(1.0 < heap_multiplier && heap_multiplier < 100)) { + fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]); + return 1; + } + if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) { + fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n", + (int)MAX_THREAD_COUNT, argv[2]); + return 1; + } + + printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n", + heap_size / 1e9, heap_multiplier); + + struct gc_options *options = gc_allocate_options(); + gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED); + gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size); + if (argc == 5) { + if (!gc_options_parse_and_set_many(options, argv[4])) { + fprintf(stderr, "Failed to set GC options: '%s'\n", argv[4]); + return 1; + } + } + + struct gc_heap *heap; + struct gc_mutator *mut; + struct gc_basic_stats stats; + if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) { + fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n", + (size_t)heap_size); + return 1; + } + struct thread main_thread = { mut, }; + gc_mutator_set_roots(mut, &main_thread.roots); + + pthread_t threads[MAX_THREAD_COUNT]; + // Run one of the threads in the main thread. + for (size_t i = 1; i < nthreads; i++) { + int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap); + if (status) { + errno = status; + perror("Failed to create thread"); + return 1; + } + } + run_one_test(&main_thread); + for (size_t i = 1; i < nthreads; i++) { + struct join_data data = { 0, threads[i] }; + gc_call_without_gc(mut, join_thread, &data); + if (data.status) { + errno = data.status; + perror("Failed to join thread"); + return 1; + } + } + + gc_basic_stats_finish(&stats); + fputs("\n", stdout); + gc_basic_stats_print(&stats, stdout); + + return 0; +} + diff --git a/libguile/whippet/benchmarks/finalizers-embedder.h b/libguile/whippet/benchmarks/finalizers-embedder.h new file mode 100644 index 000000000..0dde1ae29 --- /dev/null +++ b/libguile/whippet/benchmarks/finalizers-embedder.h @@ -0,0 +1,55 @@ +#ifndef FINALIZERS_EMBEDDER_H +#define FINALIZERS_EMBEDDER_H + +#include + +#include "finalizers-types.h" +#include "gc-finalizer.h" + +struct gc_heap; + +#define DEFINE_METHODS(name, Name, NAME) \ + static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \ + static inline void visit_##name##_fields(Name *obj,\ + void (*visit)(struct gc_edge edge, \ + struct gc_heap *heap, \ + void *visit_data), \ + struct gc_heap *heap, \ + void *visit_data) GC_ALWAYS_INLINE; +FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS) +#undef DEFINE_METHODS + +static inline size_t small_object_size(SmallObject *obj) { return sizeof(*obj); } +static inline size_t finalizer_size(Finalizer *obj) { return gc_finalizer_size(); } +static inline size_t pair_size(Pair *obj) { return sizeof(*obj); } + +static inline void +visit_small_object_fields(SmallObject *obj, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data) {} + +static inline void +visit_finalizer_fields(Finalizer *finalizer, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + + struct gc_heap *heap, + void *visit_data) { + gc_trace_finalizer((struct gc_finalizer*)finalizer, visit, heap, visit_data); +} + +static inline void +visit_pair_fields(Pair *pair, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data) { + visit(gc_edge(&pair->car), heap, visit_data); + visit(gc_edge(&pair->cdr), heap, visit_data); +} + +#include "simple-gc-embedder.h" + +#endif // FINALIZERS_EMBEDDER_H diff --git a/libguile/whippet/benchmarks/finalizers-types.h b/libguile/whippet/benchmarks/finalizers-types.h new file mode 100644 index 000000000..3597ad5d7 --- /dev/null +++ b/libguile/whippet/benchmarks/finalizers-types.h @@ -0,0 +1,22 @@ +#ifndef FINALIZERS_TYPES_H +#define FINALIZERS_TYPES_H + +#define FOR_EACH_HEAP_OBJECT_KIND(M) \ + M(pair, Pair, PAIR) \ + M(finalizer, Finalizer, FINALIZER) \ + M(small_object, SmallObject, SMALL_OBJECT) + +#include "heap-objects.h" +#include "simple-tagging-scheme.h" + +struct SmallObject { + struct gc_header header; +}; + +struct Pair { + struct gc_header header; + void *car; + void *cdr; +}; + +#endif // FINALIZERS_TYPES_H diff --git a/libguile/whippet/benchmarks/finalizers.c b/libguile/whippet/benchmarks/finalizers.c new file mode 100644 index 000000000..537307118 --- /dev/null +++ b/libguile/whippet/benchmarks/finalizers.c @@ -0,0 +1,284 @@ +#include +#include +#include +#include +#include +#include + +#include "assert.h" +#include "gc-api.h" +#include "gc-basic-stats.h" +#include "gc-finalizer.h" +#include "simple-roots-api.h" +#include "finalizers-types.h" +#include "simple-allocator.h" + +typedef HANDLE_TO(SmallObject) SmallObjectHandle; +typedef HANDLE_TO(struct gc_finalizer) FinalizerHandle; +typedef HANDLE_TO(Pair) PairHandle; + +static SmallObject* allocate_small_object(struct gc_mutator *mut) { + return gc_allocate_with_kind(mut, ALLOC_KIND_SMALL_OBJECT, sizeof(SmallObject)); +} + +static Pair* allocate_pair(struct gc_mutator *mut) { + return gc_allocate_with_kind(mut, ALLOC_KIND_PAIR, sizeof(Pair)); +} + +static struct gc_finalizer* allocate_finalizer(struct gc_mutator *mut) { + struct gc_finalizer *ret = gc_allocate_finalizer(mut); + *tag_word(gc_ref_from_heap_object(ret)) = tag_live(ALLOC_KIND_FINALIZER); + return ret; +} + +/* Get the current time in microseconds */ +static unsigned long current_time(void) +{ + struct timeval t; + if (gettimeofday(&t, NULL) == -1) + return 0; + return t.tv_sec * 1000 * 1000 + t.tv_usec; +} + +struct thread { + struct gc_mutator *mut; + struct gc_mutator_roots roots; +}; + +static void print_elapsed(const char *what, unsigned long start) { + unsigned long end = current_time(); + unsigned long msec = (end - start) / 1000; + unsigned long usec = (end - start) % 1000; + printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec); +} + +struct call_with_gc_data { + void* (*f)(struct thread *); + struct gc_heap *heap; +}; +static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) { + struct call_with_gc_data *data = arg; + struct gc_mutator *mut = gc_init_for_thread(addr, data->heap); + struct thread t = { mut, }; + gc_mutator_set_roots(mut, &t.roots); + void *ret = data->f(&t); + gc_finish_for_thread(mut); + return ret; +} +static void* call_with_gc(void* (*f)(struct thread *), + struct gc_heap *heap) { + struct call_with_gc_data data = { f, heap }; + return gc_call_with_stack_addr(call_with_gc_inner, &data); +} + +#define CHECK(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "%s:%d: check failed: %s\n", __FILE__, __LINE__, #x); \ + exit(1); \ + } \ + } while (0) + +#define CHECK_EQ(x, y) CHECK((x) == (y)) +#define CHECK_NE(x, y) CHECK((x) != (y)) +#define CHECK_NULL(x) CHECK_EQ(x, NULL) +#define CHECK_NOT_NULL(x) CHECK_NE(x, NULL) + +static double heap_size; +static double heap_multiplier; +static size_t nthreads; + +static void cause_gc(struct gc_mutator *mut) { + // Doing a full collection lets us reason precisely about liveness. + gc_collect(mut, GC_COLLECTION_MAJOR); +} + +static inline void set_car(struct gc_mutator *mut, Pair *obj, void *val) { + void **field = &obj->car; + if (val) + gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Pair), + gc_edge(field), + gc_ref_from_heap_object(val)); + *field = val; +} + +static inline void set_cdr(struct gc_mutator *mut, Pair *obj, void *val) { + void **field = &obj->cdr; + if (val) + gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Pair), + gc_edge(field), + gc_ref_from_heap_object(val)); + field = val; +} + +static Pair* make_finalizer_chain(struct thread *t, size_t length) { + PairHandle head = { NULL }; + PairHandle tail = { NULL }; + PUSH_HANDLE(t, head); + PUSH_HANDLE(t, tail); + + for (size_t i = 0; i < length; i++) { + HANDLE_SET(tail, HANDLE_REF(head)); + HANDLE_SET(head, allocate_pair(t->mut)); + set_car(t->mut, HANDLE_REF(head), allocate_small_object(t->mut)); + set_cdr(t->mut, HANDLE_REF(head), HANDLE_REF(tail)); + struct gc_finalizer *finalizer = allocate_finalizer(t->mut); + gc_finalizer_attach(t->mut, finalizer, 0, + gc_ref_from_heap_object(HANDLE_REF(head)), + gc_ref_from_heap_object(HANDLE_REF(head)->car)); + } + + Pair *ret = HANDLE_REF(head); + POP_HANDLE(t); + POP_HANDLE(t); + return ret; +} + +static void* run_one_test(struct thread *t) { + size_t unit_size = gc_finalizer_size() + sizeof(Pair); + size_t list_length = heap_size / nthreads / heap_multiplier / unit_size; + ssize_t outstanding = list_length; + + printf("Allocating list %zu nodes long. Total size %.3fGB.\n", + list_length, list_length * unit_size / 1e9); + + unsigned long thread_start = current_time(); + + PairHandle chain = { NULL }; + PUSH_HANDLE(t, chain); + + HANDLE_SET(chain, make_finalizer_chain(t, list_length)); + cause_gc(t->mut); + + size_t finalized = 0; + for (struct gc_finalizer *f = gc_pop_finalizable(t->mut); + f; + f = gc_pop_finalizable(t->mut)) { + Pair* p = gc_ref_heap_object(gc_finalizer_object(f)); + SmallObject* o = gc_ref_heap_object(gc_finalizer_closure(f)); + CHECK_EQ(p->car, o); + finalized++; + } + printf("thread %p: GC before clear finalized %zu nodes.\n", t, finalized); + outstanding -= finalized; + + HANDLE_SET(chain, NULL); + cause_gc(t->mut); + + finalized = 0; + for (struct gc_finalizer *f = gc_pop_finalizable(t->mut); + f; + f = gc_pop_finalizable(t->mut)) { + Pair* p = gc_ref_heap_object(gc_finalizer_object(f)); + SmallObject* o = gc_ref_heap_object(gc_finalizer_closure(f)); + CHECK_EQ(p->car, o); + finalized++; + } + printf("thread %p: GC after clear finalized %zu nodes.\n", t, finalized); + outstanding -= finalized; + + print_elapsed("thread", thread_start); + + POP_HANDLE(t); + + return (void*)outstanding; +} + +static void* run_one_test_in_thread(void *arg) { + struct gc_heap *heap = arg; + return call_with_gc(run_one_test, heap); +} + +struct join_data { int status; pthread_t thread; }; +static void *join_thread(void *data) { + struct join_data *join_data = data; + void *ret; + join_data->status = pthread_join(join_data->thread, &ret); + return ret; +} + +#define MAX_THREAD_COUNT 256 + +int main(int argc, char *argv[]) { + if (argc < 4 || 5 < argc) { + fprintf(stderr, "usage: %s HEAP_SIZE MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]); + return 1; + } + + heap_size = atof(argv[1]); + heap_multiplier = atof(argv[2]); + nthreads = atol(argv[3]); + + if (heap_size < 8192) { + fprintf(stderr, + "Heap size should probably be at least 8192, right? '%s'\n", + argv[1]); + return 1; + } + if (!(1.0 < heap_multiplier && heap_multiplier < 100)) { + fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]); + return 1; + } + if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) { + fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n", + (int)MAX_THREAD_COUNT, argv[2]); + return 1; + } + + printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n", + heap_size / 1e9, heap_multiplier); + + struct gc_options *options = gc_allocate_options(); + gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED); + gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size); + if (argc == 5) { + if (!gc_options_parse_and_set_many(options, argv[4])) { + fprintf(stderr, "Failed to set GC options: '%s'\n", argv[4]); + return 1; + } + } + + struct gc_heap *heap; + struct gc_mutator *mut; + struct gc_basic_stats stats; + if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) { + fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n", + (size_t)heap_size); + return 1; + } + struct thread main_thread = { mut, }; + gc_mutator_set_roots(mut, &main_thread.roots); + + pthread_t threads[MAX_THREAD_COUNT]; + // Run one of the threads in the main thread. + for (size_t i = 1; i < nthreads; i++) { + int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap); + if (status) { + errno = status; + perror("Failed to create thread"); + return 1; + } + } + ssize_t outstanding = (size_t)run_one_test(&main_thread); + for (size_t i = 1; i < nthreads; i++) { + struct join_data data = { 0, threads[i] }; + void *ret = gc_call_without_gc(mut, join_thread, &data); + if (data.status) { + errno = data.status; + perror("Failed to join thread"); + return 1; + } + ssize_t thread_outstanding = (ssize_t)ret; + outstanding += thread_outstanding; + } + + if (outstanding) + printf("\n\nWARNING: %zd nodes outstanding!!!\n\n", outstanding); + + gc_basic_stats_finish(&stats); + fputs("\n", stdout); + gc_basic_stats_print(&stats, stdout); + + return 0; +} + diff --git a/libguile/whippet/benchmarks/heap-objects.h b/libguile/whippet/benchmarks/heap-objects.h new file mode 100644 index 000000000..14ec2e3d8 --- /dev/null +++ b/libguile/whippet/benchmarks/heap-objects.h @@ -0,0 +1,19 @@ +#ifndef HEAP_OBJECTS_H +#define HEAP_OBJECTS_H + +#include "gc-inline.h" +#include "gc-edge.h" + +#define DECLARE_NODE_TYPE(name, Name, NAME) \ + struct Name; \ + typedef struct Name Name; +FOR_EACH_HEAP_OBJECT_KIND(DECLARE_NODE_TYPE) +#undef DECLARE_NODE_TYPE + +#define DEFINE_ENUM(name, Name, NAME) ALLOC_KIND_##NAME, +enum alloc_kind { + FOR_EACH_HEAP_OBJECT_KIND(DEFINE_ENUM) +}; +#undef DEFINE_ENUM + +#endif // HEAP_OBJECTS_H diff --git a/libguile/whippet/benchmarks/mt-gcbench-embedder.h b/libguile/whippet/benchmarks/mt-gcbench-embedder.h new file mode 100644 index 000000000..110e7e05e --- /dev/null +++ b/libguile/whippet/benchmarks/mt-gcbench-embedder.h @@ -0,0 +1,54 @@ +#ifndef MT_GCBENCH_EMBEDDER_H +#define MT_GCBENCH_EMBEDDER_H + +#include "gc-config.h" +#include "mt-gcbench-types.h" + +struct gc_heap; + +#define DEFINE_METHODS(name, Name, NAME) \ + static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \ + static inline void visit_##name##_fields(Name *obj,\ + void (*visit)(struct gc_edge edge, \ + struct gc_heap *heap, \ + void *visit_data), \ + struct gc_heap *heap, \ + void *visit_data) GC_ALWAYS_INLINE; +FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS) +#undef DEFINE_METHODS + +static inline size_t node_size(Node *obj) { + return sizeof(Node); +} +static inline size_t double_array_size(DoubleArray *array) { + return sizeof(*array) + array->length * sizeof(double); +} +static inline size_t hole_size(Hole *hole) { + return sizeof(*hole) + hole->length * sizeof(uintptr_t); +} +static inline void +visit_node_fields(Node *node, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, void *visit_data) { + visit(gc_edge(&node->left), heap, visit_data); + visit(gc_edge(&node->right), heap, visit_data); +} +static inline void +visit_double_array_fields(DoubleArray *obj, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, void *visit_data), + struct gc_heap *heap, void *visit_data) { +} +static inline void +visit_hole_fields(Hole *obj, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, void *visit_data), + struct gc_heap *heap, void *visit_data) { + if (GC_PRECISE_ROOTS) + GC_CRASH(); +} + +#include "simple-gc-embedder.h" + +#endif // MT_GCBENCH_EMBEDDER_H diff --git a/libguile/whippet/benchmarks/mt-gcbench-types.h b/libguile/whippet/benchmarks/mt-gcbench-types.h new file mode 100644 index 000000000..60bddc489 --- /dev/null +++ b/libguile/whippet/benchmarks/mt-gcbench-types.h @@ -0,0 +1,34 @@ +#ifndef GCBENCH_TYPES_H +#define GCBENCH_TYPES_H + +#include +#include + +#define FOR_EACH_HEAP_OBJECT_KIND(M) \ + M(node, Node, NODE) \ + M(double_array, DoubleArray, DOUBLE_ARRAY) \ + M(hole, Hole, HOLE) + +#include "heap-objects.h" +#include "simple-tagging-scheme.h" + +struct Node { + struct gc_header header; + struct Node *left; + struct Node *right; + int i, j; +}; + +struct DoubleArray { + struct gc_header header; + size_t length; + double values[0]; +}; + +struct Hole { + struct gc_header header; + size_t length; + uintptr_t values[0]; +}; + +#endif // GCBENCH_TYPES_H diff --git a/libguile/whippet/benchmarks/mt-gcbench.c b/libguile/whippet/benchmarks/mt-gcbench.c new file mode 100644 index 000000000..9b2521043 --- /dev/null +++ b/libguile/whippet/benchmarks/mt-gcbench.c @@ -0,0 +1,402 @@ +// This is adapted from a benchmark written by John Ellis and Pete Kovac +// of Post Communications. +// It was modified by Hans Boehm of Silicon Graphics. +// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ. +// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs. +// +// This is no substitute for real applications. No actual application +// is likely to behave in exactly this way. However, this benchmark was +// designed to be more representative of real applications than other +// Java GC benchmarks of which we are aware. +// It attempts to model those properties of allocation requests that +// are important to current GC techniques. +// It is designed to be used either to obtain a single overall performance +// number, or to give a more detailed estimate of how collector +// performance varies with object lifetimes. It prints the time +// required to allocate and collect balanced binary trees of various +// sizes. Smaller trees result in shorter object lifetimes. Each cycle +// allocates roughly the same amount of memory. +// Two data structures are kept around during the entire process, so +// that the measured performance is representative of applications +// that maintain some live in-memory data. One of these is a tree +// containing many pointers. The other is a large array containing +// double precision floating point numbers. Both should be of comparable +// size. +// +// The results are only really meaningful together with a specification +// of how much memory was used. It is possible to trade memory for +// better time performance. This benchmark should be run in a 32 MB +// heap, though we don't currently know how to enforce that uniformly. +// +// Unlike the original Ellis and Kovac benchmark, we do not attempt +// measure pause times. This facility should eventually be added back +// in. There are several reasons for omitting it for now. The original +// implementation depended on assumptions about the thread scheduler +// that don't hold uniformly. The results really measure both the +// scheduler and GC. Pause time measurements tend to not fit well with +// current benchmark suites. As far as we know, none of the current +// commercial Java implementations seriously attempt to minimize GC pause +// times. + +#include +#include +#include +#include +#include + +#include "assert.h" +#include "gc-api.h" +#include "gc-basic-stats.h" +#include "mt-gcbench-types.h" +#include "simple-roots-api.h" +#include "simple-allocator.h" + +#define MAX_THREAD_COUNT 256 + +static const int long_lived_tree_depth = 16; // about 4Mb +static const int array_size = 500000; // about 4Mb +static const int min_tree_depth = 4; +static const int max_tree_depth = 16; + +typedef HANDLE_TO(Node) NodeHandle; +typedef HANDLE_TO(DoubleArray) DoubleArrayHandle; + +static Node* allocate_node(struct gc_mutator *mut) { + // memset to 0 by the collector. + return gc_allocate_with_kind(mut, ALLOC_KIND_NODE, sizeof (Node)); +} + +static DoubleArray* allocate_double_array(struct gc_mutator *mut, + size_t size) { + // May be uninitialized. + size_t bytes = sizeof(DoubleArray) + sizeof (double) * size; + DoubleArray *ret = + gc_allocate_pointerless_with_kind(mut, ALLOC_KIND_DOUBLE_ARRAY, bytes); + ret->length = size; + return ret; +} + +static Hole* allocate_hole(struct gc_mutator *mut, size_t size) { + size_t bytes = sizeof(Hole) + sizeof (uintptr_t) * size; + Hole *ret = gc_allocate_with_kind(mut, ALLOC_KIND_HOLE, bytes); + ret->length = size; + return ret; +} + +static unsigned long current_time(void) { + struct timeval t = { 0 }; + gettimeofday(&t, NULL); + return t.tv_sec * 1000 * 1000 + t.tv_usec; +} + +static double elapsed_millis(unsigned long start) { + return (current_time() - start) * 1e-3; +} + +// Nodes used by a tree of a given size +static int tree_size(int i) { + return ((1 << (i + 1)) - 1); +} + +// Number of iterations to use for a given tree depth +static int compute_num_iters(int i) { + return 2 * tree_size(max_tree_depth + 2) / tree_size(i); +} + +// A power-law distribution. Each integer was selected by starting at 0, taking +// a random number in [0,1), and then accepting the integer if the random number +// was less than 0.15, or trying again with the next integer otherwise. Useful +// for modelling allocation sizes or number of garbage objects to allocate +// between live allocations. +static const uint8_t power_law_distribution[256] = { + 1, 15, 3, 12, 2, 8, 4, 0, 18, 7, 9, 8, 15, 2, 36, 5, + 1, 9, 6, 11, 9, 19, 2, 0, 0, 3, 9, 6, 3, 2, 1, 1, + 6, 1, 8, 4, 2, 0, 5, 3, 7, 0, 0, 3, 0, 4, 1, 7, + 1, 8, 2, 2, 2, 14, 0, 7, 8, 0, 2, 1, 4, 12, 7, 5, + 0, 3, 4, 13, 10, 2, 3, 7, 0, 8, 0, 23, 0, 16, 1, 1, + 6, 28, 1, 18, 0, 3, 6, 5, 8, 6, 14, 5, 2, 5, 0, 11, + 0, 18, 4, 16, 1, 4, 3, 13, 3, 23, 7, 4, 10, 5, 3, 13, + 0, 14, 5, 5, 2, 5, 0, 16, 2, 0, 1, 1, 0, 0, 4, 2, + 7, 7, 0, 5, 7, 2, 1, 24, 27, 3, 7, 1, 0, 8, 1, 4, + 0, 3, 0, 7, 7, 3, 9, 2, 9, 2, 5, 10, 1, 1, 12, 6, + 2, 9, 5, 0, 4, 6, 0, 7, 2, 1, 5, 4, 1, 0, 1, 15, + 4, 0, 15, 4, 0, 0, 32, 18, 2, 2, 1, 7, 8, 3, 11, 1, + 2, 7, 11, 1, 9, 1, 2, 6, 11, 17, 1, 2, 5, 1, 14, 3, + 6, 1, 1, 15, 3, 1, 0, 6, 10, 8, 1, 3, 2, 7, 0, 1, + 0, 11, 3, 3, 5, 8, 2, 0, 0, 7, 12, 2, 5, 20, 3, 7, + 4, 4, 5, 22, 1, 5, 2, 7, 15, 2, 4, 6, 11, 8, 12, 1 +}; + +static size_t power_law(size_t *counter) { + return power_law_distribution[(*counter)++ & 0xff]; +} + +struct thread { + struct gc_mutator *mut; + struct gc_mutator_roots roots; + size_t counter; +}; + +static void allocate_garbage(struct thread *t) { + size_t hole = power_law(&t->counter); + if (hole) { + allocate_hole(t->mut, hole); + } +} + +static inline void set_field(struct gc_mutator *mut, Node *obj, + Node **field, Node *val) { + gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Node), + gc_edge(field), + gc_ref_from_heap_object(val)); + *field = val; +} + +// Build tree top down, assigning to older objects. +static void populate(struct thread *t, int depth, Node *node) { + struct gc_mutator *mut = t->mut; + if (depth <= 0) + return; + + NodeHandle self = { node }; + PUSH_HANDLE(t, self); + allocate_garbage(t); + NodeHandle l = { allocate_node(mut) }; + PUSH_HANDLE(t, l); + allocate_garbage(t); + NodeHandle r = { allocate_node(mut) }; + PUSH_HANDLE(t, r); + + set_field(mut, HANDLE_REF(self), &HANDLE_REF(self)->left, HANDLE_REF(l)); + set_field(mut, HANDLE_REF(self), &HANDLE_REF(self)->right, HANDLE_REF(r)); + // i is 0 because the memory is zeroed. + HANDLE_REF(self)->j = depth; + + populate(t, depth-1, HANDLE_REF(self)->left); + populate(t, depth-1, HANDLE_REF(self)->right); + + POP_HANDLE(t); + POP_HANDLE(t); + POP_HANDLE(t); +} + +// Build tree bottom-up +static Node* make_tree(struct thread *t, int depth) { + struct gc_mutator *mut = t->mut; + if (depth <= 0) + return allocate_node(mut); + + NodeHandle left = { make_tree(t, depth-1) }; + PUSH_HANDLE(t, left); + NodeHandle right = { make_tree(t, depth-1) }; + PUSH_HANDLE(t, right); + + allocate_garbage(t); + Node *result = allocate_node(mut); + result->left = HANDLE_REF(left); + result->right = HANDLE_REF(right); + // i is 0 because the memory is zeroed. + result->j = depth; + + POP_HANDLE(t); + POP_HANDLE(t); + + return result; +} + +static void validate_tree(Node *tree, int depth) { +#ifndef NDEBUG + GC_ASSERT_EQ(tree->i, 0); + GC_ASSERT_EQ(tree->j, depth); + if (depth == 0) { + GC_ASSERT(!tree->left); + GC_ASSERT(!tree->right); + } else { + GC_ASSERT(tree->left); + GC_ASSERT(tree->right); + validate_tree(tree->left, depth - 1); + validate_tree(tree->right, depth - 1); + } +#endif +} + +static void time_construction(struct thread *t, int depth) { + struct gc_mutator *mut = t->mut; + int num_iters = compute_num_iters(depth); + NodeHandle temp_tree = { NULL }; + PUSH_HANDLE(t, temp_tree); + + printf("Creating %d trees of depth %d\n", num_iters, depth); + + { + unsigned long start = current_time(); + for (int i = 0; i < num_iters; ++i) { + HANDLE_SET(temp_tree, allocate_node(mut)); + populate(t, depth, HANDLE_REF(temp_tree)); + validate_tree(HANDLE_REF(temp_tree), depth); + HANDLE_SET(temp_tree, NULL); + } + printf("\tTop down construction took %.3f msec\n", + elapsed_millis(start)); + } + + { + long start = current_time(); + for (int i = 0; i < num_iters; ++i) { + HANDLE_SET(temp_tree, make_tree(t, depth)); + validate_tree(HANDLE_REF(temp_tree), depth); + HANDLE_SET(temp_tree, NULL); + } + printf("\tBottom up construction took %.3f msec\n", + elapsed_millis(start)); + } + + POP_HANDLE(t); +} + +struct call_with_gc_data { + void* (*f)(struct thread *); + struct gc_heap *heap; +}; +static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) { + struct call_with_gc_data *data = arg; + struct gc_mutator *mut = gc_init_for_thread(addr, data->heap); + struct thread t = { mut, }; + gc_mutator_set_roots(mut, &t.roots); + void *ret = data->f(&t); + gc_finish_for_thread(mut); + return ret; +} +static void* call_with_gc(void* (*f)(struct thread *), + struct gc_heap *heap) { + struct call_with_gc_data data = { f, heap }; + return gc_call_with_stack_addr(call_with_gc_inner, &data); +} + +static void* run_one_test(struct thread *t) { + NodeHandle long_lived_tree = { NULL }; + NodeHandle temp_tree = { NULL }; + DoubleArrayHandle array = { NULL }; + + PUSH_HANDLE(t, long_lived_tree); + PUSH_HANDLE(t, temp_tree); + PUSH_HANDLE(t, array); + + // Create a long lived object + printf(" Creating a long-lived binary tree of depth %d\n", + long_lived_tree_depth); + HANDLE_SET(long_lived_tree, allocate_node(t->mut)); + populate(t, long_lived_tree_depth, HANDLE_REF(long_lived_tree)); + + // Create long-lived array, filling half of it + printf(" Creating a long-lived array of %d doubles\n", array_size); + HANDLE_SET(array, allocate_double_array(t->mut, array_size)); + for (int i = 0; i < array_size/2; ++i) { + HANDLE_REF(array)->values[i] = 1.0/i; + } + + for (int d = min_tree_depth; d <= max_tree_depth; d += 2) { + time_construction(t, d); + } + + validate_tree(HANDLE_REF(long_lived_tree), long_lived_tree_depth); + + // Fake reference to LongLivedTree and array to keep them from being optimized + // away. + if (HANDLE_REF(long_lived_tree)->i != 0 + || HANDLE_REF(array)->values[1000] != 1.0/1000) + fprintf(stderr, "Failed\n"); + + POP_HANDLE(t); + POP_HANDLE(t); + POP_HANDLE(t); + return NULL; +} + +static void* run_one_test_in_thread(void *arg) { + struct gc_heap *heap = arg; + return call_with_gc(run_one_test, heap); +} + +struct join_data { int status; pthread_t thread; }; +static void *join_thread(void *data) { + struct join_data *join_data = data; + void *ret; + join_data->status = pthread_join(join_data->thread, &ret); + return ret; +} + +int main(int argc, char *argv[]) { + size_t heap_max_live = + tree_size(long_lived_tree_depth) * sizeof(Node) + + tree_size(max_tree_depth) * sizeof(Node) + + sizeof(DoubleArray) + sizeof(double) * array_size; + if (argc < 3 || argc > 4) { + fprintf(stderr, "usage: %s MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]); + return 1; + } + + double multiplier = atof(argv[1]); + size_t nthreads = atol(argv[2]); + + if (!(0.1 < multiplier && multiplier < 100)) { + fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[1]); + return 1; + } + if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) { + fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n", + (int)MAX_THREAD_COUNT, argv[2]); + return 1; + } + + size_t heap_size = heap_max_live * multiplier * nthreads; + + struct gc_options *options = gc_allocate_options(); + gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED); + gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size); + if (argc == 4) { + if (!gc_options_parse_and_set_many(options, argv[3])) { + fprintf(stderr, "Failed to set GC options: '%s'\n", argv[3]); + return 1; + } + } + + struct gc_heap *heap; + struct gc_mutator *mut; + struct gc_basic_stats stats; + if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) { + fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n", + heap_size); + return 1; + } + struct thread main_thread = { mut, }; + gc_mutator_set_roots(mut, &main_thread.roots); + + printf("Garbage Collector Test\n"); + printf(" Live storage will peak at %zd bytes.\n\n", heap_max_live); + + pthread_t threads[MAX_THREAD_COUNT]; + // Run one of the threads in the main thread. + for (size_t i = 1; i < nthreads; i++) { + int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap); + if (status) { + errno = status; + perror("Failed to create thread"); + return 1; + } + } + run_one_test(&main_thread); + for (size_t i = 1; i < nthreads; i++) { + struct join_data data = { 0, threads[i] }; + gc_call_without_gc(mut, join_thread, &data); + if (data.status) { + errno = data.status; + perror("Failed to join thread"); + return 1; + } + } + + gc_basic_stats_finish(&stats); + fputs("\n", stdout); + gc_basic_stats_print(&stats, stdout); +} diff --git a/libguile/whippet/benchmarks/quads-embedder.h b/libguile/whippet/benchmarks/quads-embedder.h new file mode 100644 index 000000000..1d9d3f71c --- /dev/null +++ b/libguile/whippet/benchmarks/quads-embedder.h @@ -0,0 +1,37 @@ +#ifndef QUADS_EMBEDDER_H +#define QUADS_EMBEDDER_H + +#include + +#include "quads-types.h" + +struct gc_heap; + +#define DEFINE_METHODS(name, Name, NAME) \ + static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \ + static inline void visit_##name##_fields(Name *obj,\ + void (*visit)(struct gc_edge edge, \ + struct gc_heap *heap, \ + void *visit_data), \ + struct gc_heap *heap, \ + void *visit_data) GC_ALWAYS_INLINE; +FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS) +#undef DEFINE_METHODS + +static inline size_t quad_size(Quad *obj) { + return sizeof(Quad); +} + +static inline void +visit_quad_fields(Quad *quad, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data) { + for (size_t i = 0; i < 4; i++) + visit(gc_edge(&quad->kids[i]), heap, visit_data); +} + +#include "simple-gc-embedder.h" + +#endif // QUADS_EMBEDDER_H diff --git a/libguile/whippet/benchmarks/quads-types.h b/libguile/whippet/benchmarks/quads-types.h new file mode 100644 index 000000000..935591ef2 --- /dev/null +++ b/libguile/whippet/benchmarks/quads-types.h @@ -0,0 +1,15 @@ +#ifndef QUADS_TYPES_H +#define QUADS_TYPES_H + +#define FOR_EACH_HEAP_OBJECT_KIND(M) \ + M(quad, Quad, QUAD) + +#include "heap-objects.h" +#include "simple-tagging-scheme.h" + +struct Quad { + struct gc_header header; + struct Quad *kids[4]; +}; + +#endif // QUADS_TYPES_H diff --git a/libguile/whippet/benchmarks/quads.c b/libguile/whippet/benchmarks/quads.c new file mode 100644 index 000000000..6fa19f452 --- /dev/null +++ b/libguile/whippet/benchmarks/quads.c @@ -0,0 +1,181 @@ +#include +#include +#include +#include + +#include "assert.h" +#include "gc-api.h" +#include "gc-basic-stats.h" +#include "simple-roots-api.h" +#include "quads-types.h" +#include "simple-allocator.h" + +typedef HANDLE_TO(Quad) QuadHandle; + +static Quad* allocate_quad(struct gc_mutator *mut) { + // memset to 0 by the collector. + return gc_allocate_with_kind(mut, ALLOC_KIND_QUAD, sizeof (Quad)); +} + +/* Get the current time in microseconds */ +static unsigned long current_time(void) +{ + struct timeval t; + if (gettimeofday(&t, NULL) == -1) + return 0; + return t.tv_sec * 1000 * 1000 + t.tv_usec; +} + +struct thread { + struct gc_mutator *mut; + struct gc_mutator_roots roots; + size_t counter; +}; + +// Build tree bottom-up +static Quad* make_tree(struct thread *t, int depth) { + if (depth<=0) { + return allocate_quad(t->mut); + } else { + QuadHandle kids[4] = { { NULL }, }; + for (size_t i = 0; i < 4; i++) { + HANDLE_SET(kids[i], make_tree(t, depth-1)); + PUSH_HANDLE(t, kids[i]); + } + + Quad *result = allocate_quad(t->mut); + for (size_t i = 0; i < 4; i++) + result->kids[i] = HANDLE_REF(kids[i]); + + for (size_t i = 0; i < 4; i++) + POP_HANDLE(t); + + return result; + } +} + +static void validate_tree(Quad *tree, int depth) { + for (size_t i = 0; i < 4; i++) { + if (depth == 0) { + if (tree->kids[i]) + abort(); + } else { + if (!tree->kids[i]) + abort(); + validate_tree(tree->kids[i], depth - 1); + } + } +} + +static void print_elapsed(const char *what, unsigned long start) { + unsigned long end = current_time(); + unsigned long msec = (end - start) / 1000; + unsigned long usec = (end - start) % 1000; + printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec); +} + +static size_t parse_size(char *arg, const char *what) { + long val = atol(arg); + if (val <= 0) { + fprintf(stderr, "Failed to parse %s '%s'\n", what, arg); + exit(1); + } + return val; +} + +static size_t tree_size(size_t depth) { + size_t nquads = 0; + size_t leaf_count = 1; + for (size_t i = 0; i <= depth; i++) { + if (nquads > ((size_t)-1) - leaf_count) { + fprintf(stderr, + "error: address space too small for quad tree of depth %zu\n", + depth); + exit(1); + } + nquads += leaf_count; + leaf_count *= 4; + } + return nquads; +} + +#define MAX_THREAD_COUNT 256 + +int main(int argc, char *argv[]) { + if (argc < 3 || 4 < argc) { + fprintf(stderr, "usage: %s DEPTH MULTIPLIER [GC-OPTIONS]\n", argv[0]); + return 1; + } + + size_t depth = parse_size(argv[1], "depth"); + double multiplier = atof(argv[2]); + + if (!(1.0 < multiplier && multiplier < 100)) { + fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]); + return 1; + } + + size_t nquads = tree_size(depth); + size_t tree_bytes = nquads * sizeof(Quad); + size_t heap_size = tree_bytes * multiplier; + + printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n", + heap_size / 1e9, multiplier); + + struct gc_options *options = gc_allocate_options(); + gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED); + gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size); + if (argc == 4) { + if (!gc_options_parse_and_set_many(options, argv[3])) { + fprintf(stderr, "Failed to set GC options: '%s'\n", argv[3]); + return 1; + } + } + + struct gc_heap *heap; + struct gc_mutator *mut; + struct gc_basic_stats stats; + if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) { + fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n", + heap_size); + return 1; + } + struct thread t = { mut, }; + gc_mutator_set_roots(mut, &t.roots); + + QuadHandle quad = { NULL }; + + PUSH_HANDLE(&t, quad); + + printf("Making quad tree of depth %zu (%zu nodes). Total size %.3fGB.\n", + depth, nquads, (nquads * sizeof(Quad)) / 1e9); + unsigned long start = current_time(); + HANDLE_SET(quad, make_tree(&t, depth)); + print_elapsed("construction", start); + + validate_tree(HANDLE_REF(quad), depth); + + size_t garbage_step = heap_size / 7.5; + printf("Allocating %.3f GB of garbage, 20 times, validating live tree each time.\n", + garbage_step / 1e9); + unsigned long garbage_start = current_time(); + for (size_t i = 0; i < 20; i++) { + size_t garbage_depth = 3; + start = current_time(); + for (size_t i = garbage_step/(tree_size(garbage_depth)*4*sizeof(Quad*)); i; i--) + make_tree(&t, garbage_depth); + print_elapsed("allocating garbage", start); + + start = current_time(); + validate_tree(HANDLE_REF(quad), depth); + } + print_elapsed("allocation loop", garbage_start); + + gc_basic_stats_finish(&stats); + fputs("\n", stdout); + gc_basic_stats_print(&stats, stdout); + + POP_HANDLE(&t); + return 0; +} + diff --git a/libguile/whippet/benchmarks/simple-allocator.h b/libguile/whippet/benchmarks/simple-allocator.h new file mode 100644 index 000000000..09ed8f3be --- /dev/null +++ b/libguile/whippet/benchmarks/simple-allocator.h @@ -0,0 +1,21 @@ +#ifndef SIMPLE_ALLOCATOR_H +#define SIMPLE_ALLOCATOR_H + +#include "simple-tagging-scheme.h" +#include "gc-api.h" + +static inline void* +gc_allocate_with_kind(struct gc_mutator *mut, enum alloc_kind kind, size_t bytes) { + void *obj = gc_allocate(mut, bytes, GC_ALLOCATION_TAGGED); + *tag_word(gc_ref_from_heap_object(obj)) = tag_live(kind); + return obj; +} + +static inline void* +gc_allocate_pointerless_with_kind(struct gc_mutator *mut, enum alloc_kind kind, size_t bytes) { + void *obj = gc_allocate(mut, bytes, GC_ALLOCATION_TAGGED_POINTERLESS); + *tag_word(gc_ref_from_heap_object(obj)) = tag_live(kind); + return obj; +} + +#endif // SIMPLE_ALLOCATOR_H diff --git a/libguile/whippet/benchmarks/simple-gc-embedder.h b/libguile/whippet/benchmarks/simple-gc-embedder.h new file mode 100644 index 000000000..904d2c740 --- /dev/null +++ b/libguile/whippet/benchmarks/simple-gc-embedder.h @@ -0,0 +1,183 @@ +#include + +#include "simple-tagging-scheme.h" +#include "simple-roots-types.h" +#include "gc-config.h" +#include "gc-embedder-api.h" + +#define GC_EMBEDDER_EPHEMERON_HEADER struct gc_header header; +#define GC_EMBEDDER_FINALIZER_HEADER struct gc_header header; + +static inline size_t gc_finalizer_priority_count(void) { return 2; } + +static inline int +gc_is_valid_conservative_ref_displacement(uintptr_t displacement) { +#if GC_CONSERVATIVE_ROOTS || GC_CONSERVATIVE_TRACE + // Here is where you would allow tagged heap object references. + return displacement == 0; +#else + // Shouldn't get here. + GC_CRASH(); +#endif +} + +// No external objects in simple benchmarks. +static inline int gc_extern_space_visit(struct gc_extern_space *space, + struct gc_edge edge, + struct gc_ref ref) { + GC_CRASH(); +} +static inline void gc_extern_space_start_gc(struct gc_extern_space *space, + int is_minor_gc) { +} +static inline void gc_extern_space_finish_gc(struct gc_extern_space *space, + int is_minor_gc) { +} + +static inline void gc_trace_object(struct gc_ref ref, + void (*trace_edge)(struct gc_edge edge, + struct gc_heap *heap, + void *trace_data), + struct gc_heap *heap, + void *trace_data, + size_t *size) { +#if GC_CONSERVATIVE_TRACE + // Shouldn't get here. + GC_CRASH(); +#else + switch (tag_live_alloc_kind(*tag_word(ref))) { +#define SCAN_OBJECT(name, Name, NAME) \ + case ALLOC_KIND_##NAME: \ + if (trace_edge) \ + visit_##name##_fields(gc_ref_heap_object(ref), trace_edge, \ + heap, trace_data); \ + if (size) \ + *size = name##_size(gc_ref_heap_object(ref)); \ + break; + FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT) +#undef SCAN_OBJECT + default: + GC_CRASH(); + } +#endif +} + +static inline void visit_roots(struct handle *roots, + void (*trace_edge)(struct gc_edge edge, + struct gc_heap *heap, + void *trace_data), + struct gc_heap *heap, + void *trace_data) { + for (struct handle *h = roots; h; h = h->next) + trace_edge(gc_edge(&h->v), heap, trace_data); +} + +static inline void gc_trace_mutator_roots(struct gc_mutator_roots *roots, + void (*trace_edge)(struct gc_edge edge, + struct gc_heap *heap, + void *trace_data), + struct gc_heap *heap, + void *trace_data) { + if (roots) + visit_roots(roots->roots, trace_edge, heap, trace_data); +} + +static inline void gc_trace_heap_roots(struct gc_heap_roots *roots, + void (*trace_edge)(struct gc_edge edge, + struct gc_heap *heap, + void *trace_data), + struct gc_heap *heap, + void *trace_data) { + if (roots) + visit_roots(roots->roots, trace_edge, heap, trace_data); +} + +static inline uintptr_t gc_object_forwarded_nonatomic(struct gc_ref ref) { + uintptr_t tag = *tag_word(ref); + return (tag & gcobj_not_forwarded_bit) ? 0 : tag; +} + +static inline void gc_object_forward_nonatomic(struct gc_ref ref, + struct gc_ref new_ref) { + *tag_word(ref) = gc_ref_value(new_ref); +} + +static inline struct gc_atomic_forward +gc_atomic_forward_begin(struct gc_ref ref) { + uintptr_t tag = atomic_load_explicit(tag_word(ref), memory_order_acquire); + enum gc_forwarding_state state; + if (tag == gcobj_busy) + state = GC_FORWARDING_STATE_BUSY; + else if (tag & gcobj_not_forwarded_bit) + state = GC_FORWARDING_STATE_NOT_FORWARDED; + else + state = GC_FORWARDING_STATE_FORWARDED; + return (struct gc_atomic_forward){ ref, tag, state }; +} + +static inline int +gc_atomic_forward_retry_busy(struct gc_atomic_forward *fwd) { + GC_ASSERT(fwd->state == GC_FORWARDING_STATE_BUSY); + uintptr_t tag = atomic_load_explicit(tag_word(fwd->ref), + memory_order_acquire); + if (tag == gcobj_busy) + return 0; + if (tag & gcobj_not_forwarded_bit) { + fwd->state = GC_FORWARDING_STATE_NOT_FORWARDED; + fwd->data = tag; + } else { + fwd->state = GC_FORWARDING_STATE_FORWARDED; + fwd->data = tag; + } + return 1; +} + +static inline void +gc_atomic_forward_acquire(struct gc_atomic_forward *fwd) { + GC_ASSERT(fwd->state == GC_FORWARDING_STATE_NOT_FORWARDED); + if (atomic_compare_exchange_strong(tag_word(fwd->ref), &fwd->data, + gcobj_busy)) + fwd->state = GC_FORWARDING_STATE_ACQUIRED; + else if (fwd->data == gcobj_busy) + fwd->state = GC_FORWARDING_STATE_BUSY; + else { + GC_ASSERT((fwd->data & gcobj_not_forwarded_bit) == 0); + fwd->state = GC_FORWARDING_STATE_FORWARDED; + } +} + +static inline void +gc_atomic_forward_abort(struct gc_atomic_forward *fwd) { + GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED); + atomic_store_explicit(tag_word(fwd->ref), fwd->data, memory_order_release); + fwd->state = GC_FORWARDING_STATE_NOT_FORWARDED; +} + +static inline size_t +gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) { + GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED); + switch (tag_live_alloc_kind(fwd->data)) { +#define OBJECT_SIZE(name, Name, NAME) \ + case ALLOC_KIND_##NAME: \ + return name##_size(gc_ref_heap_object(fwd->ref)); + FOR_EACH_HEAP_OBJECT_KIND(OBJECT_SIZE) +#undef OBJECT_SIZE + default: + GC_CRASH(); + } +} + +static inline void +gc_atomic_forward_commit(struct gc_atomic_forward *fwd, struct gc_ref new_ref) { + GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED); + *tag_word(new_ref) = fwd->data; + atomic_store_explicit(tag_word(fwd->ref), gc_ref_value(new_ref), + memory_order_release); + fwd->state = GC_FORWARDING_STATE_FORWARDED; +} + +static inline uintptr_t +gc_atomic_forward_address(struct gc_atomic_forward *fwd) { + GC_ASSERT(fwd->state == GC_FORWARDING_STATE_FORWARDED); + return fwd->data; +} diff --git a/libguile/whippet/benchmarks/simple-roots-api.h b/libguile/whippet/benchmarks/simple-roots-api.h new file mode 100644 index 000000000..d94397adf --- /dev/null +++ b/libguile/whippet/benchmarks/simple-roots-api.h @@ -0,0 +1,26 @@ +#ifndef SIMPLE_ROOTS_API_H +#define SIMPLE_ROOTS_API_H + +#include "gc-config.h" +#include "simple-roots-types.h" + +#define HANDLE_TO(T) union { T* v; struct handle handle; } +#define HANDLE_LOC(h) &(h).v +#define HANDLE_REF(h) (h).v +#define HANDLE_SET(h,val) do { (h).v = val; } while (0) +#define PUSH_HANDLE(cx, h) push_handle(&(cx)->roots.roots, &h.handle) +#define POP_HANDLE(cx) pop_handle(&(cx)->roots.roots) + +static inline void push_handle(struct handle **roots, struct handle *handle) { + if (GC_PRECISE_ROOTS) { + handle->next = *roots; + *roots = handle; + } +} + +static inline void pop_handle(struct handle **roots) { + if (GC_PRECISE_ROOTS) + *roots = (*roots)->next; +} + +#endif // SIMPLE_ROOTS_API_H diff --git a/libguile/whippet/benchmarks/simple-roots-types.h b/libguile/whippet/benchmarks/simple-roots-types.h new file mode 100644 index 000000000..6d47fa788 --- /dev/null +++ b/libguile/whippet/benchmarks/simple-roots-types.h @@ -0,0 +1,17 @@ +#ifndef SIMPLE_ROOTS_TYPES_H +#define SIMPLE_ROOTS_TYPES_H + +struct handle { + void *v; + struct handle *next; +}; + +struct gc_heap_roots { + struct handle *roots; +}; + +struct gc_mutator_roots { + struct handle *roots; +}; + +#endif // SIMPLE_ROOTS_TYPES_H diff --git a/libguile/whippet/benchmarks/simple-tagging-scheme.h b/libguile/whippet/benchmarks/simple-tagging-scheme.h new file mode 100644 index 000000000..b6b8a924c --- /dev/null +++ b/libguile/whippet/benchmarks/simple-tagging-scheme.h @@ -0,0 +1,29 @@ +#ifndef SIMPLE_TAGGING_SCHEME_H +#define SIMPLE_TAGGING_SCHEME_H + +#include + +struct gc_header { + uintptr_t tag; +}; + +// Alloc kind is in bits 1-7, for live objects. +static const uintptr_t gcobj_alloc_kind_mask = 0x7f; +static const uintptr_t gcobj_alloc_kind_shift = 1; +static const uintptr_t gcobj_forwarded_mask = 0x1; +static const uintptr_t gcobj_not_forwarded_bit = 0x1; +static const uintptr_t gcobj_busy = 0; +static inline uint8_t tag_live_alloc_kind(uintptr_t tag) { + return (tag >> gcobj_alloc_kind_shift) & gcobj_alloc_kind_mask; +} +static inline uintptr_t tag_live(uint8_t alloc_kind) { + return ((uintptr_t)alloc_kind << gcobj_alloc_kind_shift) + | gcobj_not_forwarded_bit; +} + +static inline uintptr_t* tag_word(struct gc_ref ref) { + struct gc_header *header = gc_ref_heap_object(ref); + return &header->tag; +} + +#endif // SIMPLE_TAGGING_SCHEME_H diff --git a/libguile/whippet/ctf_to_json.py b/libguile/whippet/ctf_to_json.py new file mode 100755 index 000000000..f6b7f429a --- /dev/null +++ b/libguile/whippet/ctf_to_json.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# Any copyright is dedicated to the Public Domain. +# https://creativecommons.org/publicdomain/zero/1.0/ +# +# Originally written by Andy Wingo . + +import bt2 # From the babeltrace2 package. +import sys +import json +from enum import Enum + +# Usage: ./ctf_to_json.py ~/lttng-traces/name-of-your-trace > foo.json +# +# Convert a Common Trace Format (CTF) trace, for example as produced by +# LTTng, to the JSON-based Trace Event Format (TEF), for example as +# consumed by `chrome://tracing`, `https://ui.perfetto.dev/`, or +# `https://profiler.firefox.com`. + +# The Trace Event Format is documented here: +# +# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0 + +# By default, events are emitted as EventPhase.INSTANT. We also support +# rewriting the event stream so as to generate EventPhase.BEGIN / +# EventPhase.END events for specific named events. + +synthetic_events = { + 'gc': ['whippet:mutator_cause_gc', + 'whippet:restarting_mutators'], + 'stop-the-world': ['whippet:requesting_stop', + 'whippet:mutators_stopped'], + 'trace': ['whippet:prepare_gc', + 'whippet:restarting_mutators'], + 'mutator-stopped': ['whippet:mutator_stopping', + 'whippet:mutator_restarted'], + 'trace-roots': ['whippet:trace_roots_begin', + 'whippet:trace_roots_end'], + 'trace-check-termination': ['whippet:trace_check_termination_begin', + 'whippet:trace_check_termination_end'], + 'trace-objects': ['whippet:trace_objects_begin', + 'whippet:trace_objects_end'], + 'trace-worker': ['whippet:trace_worker_begin', + 'whippet:trace_worker_end'] +} + +class EventPhase(Enum): + BEGIN = 'B' + END = 'E' + COMPLETE = 'X' + INSTANT = 'i' + COUNTER = 'C' + NESTABLE_START = 'b' + NESTABLE_INSTANT = 'n' + NESTABLE_END = 'e' + FLOW_START = 's' + FLOW_STEP = 't' + FLOW_END = 'f' + SAMPLE = 'P' + OBJECT_CREATED = 'N' + OBJECT_SNAPSHOT = 'O' + OBJECT_DESTROYED = 'D' + METADATA = 'M' + MEMORY_DUMP_GLOBAL = 'V' + MEMORY_DUMP_PROCESS = 'V' + MARK = 'R' + CLOCK_SYNC = 'c' + CONTEXT_BEGIN = '(' + CONTEXT_END = ')' + +base_time = None +def event_us(msg): + assert(msg.default_clock_snapshot.clock_class.name == 'monotonic') + assert(msg.default_clock_snapshot.clock_class.frequency == 1e9) + global base_time + ns = msg.default_clock_snapshot.value + if base_time is None: + base_time = ns + return (ns - base_time) * 1e-3 + +def lower(x): + if isinstance(x, str) or isinstance(x, int) or isinstance(x, float): + return x + if isinstance(x, dict) or isinstance(x, bt2._StructureFieldConst): + return {lower(k):lower(v) for k, v in x.items()} + if isinstance(x, bt2._BoolValueConst) or isinstance(x, bt2._BoolFieldConst): + return bool(x) + if isinstance(x, bt2._EnumerationFieldConst): + return repr(x) + if isinstance(x, bt2._IntegerValueConst) or isinstance(x, bt2._IntegerFieldConst): + return int(x) + if isinstance(x, bt2._RealValueConst) or isinstance(x, bt2._RealFieldConst): + return float(x) + if isinstance(x, bt2._StringValueConst) or isinstance(x, bt2._StringFieldConst): + return str(x) + raise ValueError("Unexpected value from trace", x) + +# Specific Whippet events. +synthetic_begin = {} +synthetic_end = {} +for synthetic, [begin, end] in synthetic_events.items(): + synthetic_begin[begin] = [] + synthetic_end[end] = [] +for synthetic, [begin, end] in synthetic_events.items(): + synthetic_begin[begin].append(synthetic) + synthetic_end[end].append(synthetic) + +def put(str): + sys.stdout.write(str) + +need_comma = False +def print_event(ev): + global need_comma + if need_comma: + sys.stdout.write(',\n ') + else: + need_comma = True + # It appears to be faster to make a string, then print the string, + # than to call json.dump with a file object. + # json.dump(ev, sys.stdout, ensure_ascii=False, check_circular=False) + put(json.dumps(ev, ensure_ascii=False, check_circular=False)) + +def emit_event(msg, name, phase): + ev = {'name': name, + 'cat': 'whippet', + 'ph': phase.value, + 'ts': event_us(msg), + 'pid': lower(msg.event.common_context_field['vpid']), + 'tid': lower(msg.event.common_context_field['vtid']), + 'args': lower(msg.event.payload_field)} + print_event(ev) +def emit_begin_event(msg, name): + emit_event(msg, name, EventPhase.BEGIN) +def emit_end_event(msg, name): + emit_event(msg, name, EventPhase.END) + +def emit_events(msg): + emit_event(msg, msg.event.name, EventPhase.INSTANT) + for begin in synthetic_begin.get(msg.event.name, []): + emit_begin_event(msg, begin) + for end in synthetic_end.get(msg.event.name, []): + emit_end_event(msg, end) + +def ctf_to_json(path): + msg_it = bt2.TraceCollectionMessageIterator(path) + put('{\n') + put(' "traceEvents": [\n ') + for msg in msg_it: + if hasattr(msg, 'event'): + emit_events(msg) + put('\n') + put('\n ],\n') + put(' "displayTimeUnit": "ns"\n') + put('}\n') + +if len(sys.argv) != 2: + sys.stderr.write( + 'usage: ' + sys.argv[0] + ' ~/lttng-traces/name-of-your-trace\n') + sys.exit(1) +else: + ctf_to_json(sys.argv[1]) diff --git a/libguile/whippet/doc/README.md b/libguile/whippet/doc/README.md new file mode 100644 index 000000000..eee1ad701 --- /dev/null +++ b/libguile/whippet/doc/README.md @@ -0,0 +1,13 @@ +# Whippet documentation + + * [Manual](./manual.md): How do you get your program to use + Whippet? What is the API? + + * [Collector implementations](./collectors.md): There are a number of + implementations of the Whippet API with differing performance + characteristics and which impose different requirements on the + embedder. + + * [Guile](./guile.md): Some notes on a potential rebase of Guile on + top of Whippet. + diff --git a/libguile/whippet/doc/collector-bdw.md b/libguile/whippet/doc/collector-bdw.md new file mode 100644 index 000000000..5a38b4e2e --- /dev/null +++ b/libguile/whippet/doc/collector-bdw.md @@ -0,0 +1,26 @@ +# Boehm-Demers-Weiser collector + +Whippet's `bdw` collector is backed by a third-party garbage collector, +the [Boehm-Demers-Weiser collector](https://github.com/ivmai/bdwgc). + +BDW-GC is a mark-sweep collector with conservative root-finding, +conservative heap tracing, and parallel tracing. + +Whereas the other Whippet collectors which rely on mutators to +[periodically check if they need to +stop](https://github.com/wingo/whippet/blob/main/doc/manual.md#safepoints), +`bdw` will stop mutators with a POSIX signal. Also, it doesn't really +support ephemerons (the Whippet `bdw` collector simulates them using +finalizers), and both ephemerons and finalizers only approximate the +Whippet behavior, because they are implemented in terms of what BDW-GC +provides. + +`bdw` supports the `fixed` and `growable` heap-sizing policies, but not +`adaptive`, as BDW-GC can't reliably return memory to the OS. Also, +[`growable` has an effective limit of a 3x heap +multiplier](https://github.com/wingo/whippet/blob/main/src/bdw.c#L478). +Oh well! + +It's a bit of an oddball from a Whippet perspective, but useful as a +migration path if you have an embedder that is already using BDW-GC. +And, it is a useful performance comparison. diff --git a/libguile/whippet/doc/collector-mmc.md b/libguile/whippet/doc/collector-mmc.md new file mode 100644 index 000000000..5f1ea936e --- /dev/null +++ b/libguile/whippet/doc/collector-mmc.md @@ -0,0 +1,148 @@ +# Mostly-marking collector + +The `mmc` collector is mainly a mark-region collector, inspired by +[Immix](http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf). +To a first approximation, `mmc` is a whole-heap Immix collector with a +large object space on the side. + +When tracing, `mmc` mostly marks objects in place. If the heap is +too fragmented, it can compact the heap by choosing to evacuate +sparsely-populated heap blocks instead of marking in place. However +evacuation is strictly optional, which means that `mmc` is also +compatible with conservative root-finding, making it a good replacement +for embedders that currently use the [Boehm-Demers-Weiser +collector](./collector-bdw.md). + +## Differences from Immix + +The original Immix divides the heap into 32kB blocks, and then divides +those blocks into 128B lines. An Immix allocation can span lines but +not blocks; allocations larger than 8kB go into a separate large object +space. Mutators request blocks from the global store and allocate into +those blocks using bump-pointer allocation. When all blocks are +consumed, Immix stops the world and traces the object graph, marking +objects but also the lines that objects are on. After marking, blocks +contain some lines with live objects and others that are completely +free. Spans of free lines are called holes. When a mutator gets a +recycled block from the global block store, it allocates into those +holes. For an exposition of Immix, see the lovely detailed [Rust +implementation](http://users.cecs.anu.edu.au/~steveb/pubs/papers/rust-ismm-2016.pdf). + +The essential difference of `mmc` from Immix stems from a simple +observation: Immix needs a side table of line mark bytes and also a mark +bit or bits in each object (or in a side table). But if instead you +choose to store mark bytes instead of bits (for concurrency reasons) in +a side table, with one mark byte per granule (unit of allocation, +perhaps 16 bytes), then you effectively have a line mark table where the +granule size is the line size. You can bump-pointer allocate into holes +in the mark byte table. + +You might think this is a bad tradeoff, and perhaps it is: I don't know +yet. If your granule size is two pointers, then one mark byte per +granule is 6.25% overhead on 64-bit, or 12.5% on 32-bit. Especially on +32-bit, it's a lot! On the other hand, instead of the worst case of one +survivor object wasting a line (or two, in the case of conservative line +marking), granule-size-is-line-size instead wastes nothing. Also, you +don't need GC bits in the object itself, and you can use the mark byte +array to record the object end, so that finding holes in a block can +just read the mark table and can avoid looking at object memory. + +## Optional features + +The `mmc` collector has a few feature flags that can be turned on or +off. If you use the [standard embedder makefile include](../embed.mk), +then there is a name for each combination of features: `mmc` has no +additional features, `parallel-mmc` enables parallel marking, +`parallel-generational-mmc` enables generations, +`stack-conservative-parallel-generational-mmc` uses conservative +root-finding, and `heap-conservative-parallel-generational-mmc` +additionally traces the heap conservatively. You can leave off +components of the name to get a collector without those features. +Underneath this corresponds to some pre-processor definitions passed to +the compiler on the command line. + +### Generations + +`mmc` supports generational tracing via the [sticky mark-bit +algorithm](https://wingolog.org/archives/2022/10/22/the-sticky-mark-bit-algorithm). +This requires that the embedder emit [write +barriers](https://github.com/wingo/whippet/blob/main/doc/manual.md#write-barriers); +if your embedder cannot ensure write barriers are always invoked, then +generational collection is not for you. (We could perhaps relax this a +bit, following what [Ruby developers +did](http://rvm.jp/~ko1/activities/rgengc_ismm.pdf).) + +The write barrier is currently a card-marking barrier emitted on stores, +with one card byte per 256 object bytes, where the card location can be +computed from the object address because blocks are allocated in +two-megabyte aligned slabs. + +### Parallel tracing + +You almost certainly want this on! `parallel-mmc` uses a the +[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h). +Each trace worker maintains a [local queue of objects that need +tracing](../src/local-worklist.h), which currently has a capacity of +1024 entries. If the local queue becomes full, the worker will publish +3/4 of those entries to the worker's [shared +worklist](../src/shared-worklist.h). When a worker runs out of local +work, it will first try to remove work from its own shared worklist, +then will try to steal from other workers. + +The memory used for the external worklist is dynamically allocated from +the OS and is not currently counted as contributing to the heap size. +If you absolutely need to avoid dynamic allocation during GC, `mmc` +(even `serial-mmc`) would need some work for your use case, to allocate +a fixed-size space for a marking queue and to gracefully handle mark +queue overflow. + +### Conservative stack scanning + +With `semi` and `pcc`, embedders must precisely enumerate the set of +*roots*: the edges into the heap from outside. Commonly, roots include +global variables, as well as working variables from each mutator's +stack. `mmc` can optionally mark mutator stacks *conservatively*: +treating each word on the stack as if it may be an object reference, and +marking any object at that address. + +After all these years, *whether* to mark stacks conservatively or not is +still an open research question. Conservative stack scanning can retain +too much data if an integer is confused for an object reference and +removes a layer of correctness-by-construction from a system. Sometimes +conservative stack-scanning is required, for example if your embedder +cannot enumerate roots precisely. But there are reasons to consider it +even if you can do precise roots: conservative scanning removes the need +for the compiler to produce a stack map to store the precise root +enumeration at every safepoint; it removes the need to look up a stack +map when tracing; and it allows C or C++ support code to avoid having to +place roots in traceable locations published to the garbage collector. +And the [performance question is still +open](https://dl.acm.org/doi/10.1145/2660193.2660198). + +Anyway. `mmc` can scan roots conservatively. Those roots are pinned +for the collection; even if the collection will compact via evacuation, +referents of conservative roots won't be moved. Objects not directly +referenced by roots can be evacuated, however. + +### Conservative heap scanning + +In addition to stack and global references, the Boehm-Demers-Weiser +collector scans heap objects conservatively as well, treating each word +of each heap object as if it were a reference. `mmc` can do that, if +the embedder is unable to provide a `gc_trace_object` implementation. +However this is generally a performance lose, and it prevents +evacuation. + +## Other implementation tidbits + +`mmc` does lazy sweeping: as a mutator grabs a fresh block, it +reclaims memory that was unmarked in the previous collection before +making the memory available for allocation. This makes sweeping +naturally cache-friendly and parallel. + +The mark byte array facilitates conservative collection by being an +oracle for "does this address start an object". + +For a detailed introduction, see [Whippet: Towards a new local +maximum](https://wingolog.org/archives/2023/02/07/whippet-towards-a-new-local-maximum), +a talk given at FOSDEM 2023. diff --git a/libguile/whippet/doc/collector-pcc.md b/libguile/whippet/doc/collector-pcc.md new file mode 100644 index 000000000..dc7bd1fb0 --- /dev/null +++ b/libguile/whippet/doc/collector-pcc.md @@ -0,0 +1,84 @@ +# Parallel copying collector + +Whippet's `pcc` collector is a copying collector, like the more simple +[`semi`](./collector-semi.md), but supporting multiple mutator threads, +multiple tracing threads, and using an external FIFO worklist instead of +a Cheney worklist. + +Like `semi`, `pcc` traces by evacuation: it moves all live objects on +every collection. (Exception: objects larger than 8192 bytes are +placed into a partitioned space which traces by marking in place instead +of copying.) Evacuation requires precise roots, so if your embedder +does not support precise roots, `pcc` is not for you. + +Again like `semi`, `pcc` generally requires a heap size at least twice +as large as the maximum live heap size, and performs best with ample +heap sizes; between 3× and 5× is best. + +Overall, `pcc` is a better version of `semi`. It should have broadly +the same performance characteristics with a single mutator and with +parallelism disabled, additionally allowing multiple mutators, and +scaling better with multiple tracing threads. + +`pcc` has a generational configuration, conventionally referred to as +`generational-pcc`, in which both the nursery and the old generation are +copy spaces. Objects stay in the nursery for one cycle before moving on +to the old generation. This configuration is a bit new (January 2025) +and still needs some tuning. + +## Implementation notes + +Unlike `semi` which has a single global bump-pointer allocation region, +`pcc` structures the heap into 64-kB blocks. In this way it supports +multiple mutator threads: mutators do local bump-pointer allocation into +their own block, and when their block is full, they fetch another from +the global store. + +The block size is 64 kB, but really it's 128 kB, because each block has +two halves: the active region and the copy reserve. Dividing each block +in two allows the collector to easily grow and shrink the heap while +ensuring there is always enough reserve space. + +Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in +a slab. The first block in a slab is used by the collector itself, to +keep metadata for the rest of the blocks, for example a chain pointer +allowing blocks to be collected in lists, a saved allocation pointer for +partially-filled blocks, whether the block is paged in or out, and so +on. + +`pcc` supports tracing in parallel. This mechanism works somewhat like +allocation, in which multiple trace workers compete to evacuate objects +into their local allocation buffers; when an allocation buffer is full, +the trace worker grabs another, just like mutators do. + +Unlike the simple semi-space collector which uses a Cheney grey +worklist, `pcc` uses an external worklist. If parallelism is disabled +at compile-time, it uses a simple first-in, first-out queue of objects +to be traced. Like a Cheney worklist, this should result in objects +being copied in breadth-first order. The literature would suggest that +depth-first is generally better for locality, but that preserving +allocation order is generally best. This is something to experiment +with in the future. + +If parallelism is enabled, as it is by default, `pcc` uses a +[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h). +Each trace worker maintains a [local queue of objects that need +tracing](../src/local-worklist.h), which currently has 1024 entries. If +the local queue becomes full, the worker will publish 3/4 of those +entries to the worker's [shared worklist](../src/shared-worklist.h). +When a worker runs out of local work, it will first try to remove work +from its own shared worklist, then will try to steal from other workers. + +If only one tracing thread is enabled at run-time (`parallelism=1`) (or +if parallelism is disabled at compile-time), `pcc` will evacuate by +non-atomic forwarding, but if multiple threads compete to evacuate +objects, `pcc` uses [atomic compare-and-swap instead of simple +forwarding pointer updates](./manual.md#forwarding-objects). This +imposes around a ~30% performance penalty but having multiple tracing +threads is generally worth it, unless the object graph is itself serial. + +The memory used for the external worklist is dynamically allocated from +the OS and is not currently counted as contributing to the heap size. +If you are targetting a microcontroller or something, probably you need +to choose a different kind of collector that never dynamically +allocates, such as `semi`. diff --git a/libguile/whippet/doc/collector-semi.md b/libguile/whippet/doc/collector-semi.md new file mode 100644 index 000000000..ea84720df --- /dev/null +++ b/libguile/whippet/doc/collector-semi.md @@ -0,0 +1,23 @@ +# Semi-space collector + +The `semi` collector is simple. It is mostly useful as a first +collector to try out, to make sure that a mutator correctly records all +roots: because `semi` moves every live object on every collection, it is +very effective at shaking out mutator bugs. + +If your embedder chooses to not precisely record roots, for example +instead choosing to conservatively scan the stack, then the semi-space +collector is not for you: `semi` requires precise roots. + +For more on semi-space collectors, see +https://wingolog.org/archives/2022/12/10/a-simple-semi-space-collector. + +Whippet's `semi` collector incorporates a large-object space, which +marks objects in place instead of moving. Otherwise, `semi` generally +requires a heap size at least twice as large as the maximum live heap +size, and performs best with ample heap sizes; between 3× and 5× is +best. + +The semi-space collector doesn't support multiple mutator threads. If +you want a copying collector for a multi-threaded mutator, look at +[pcc](./collector-pcc.md). diff --git a/libguile/whippet/doc/collectors.md b/libguile/whippet/doc/collectors.md new file mode 100644 index 000000000..90f4867fc --- /dev/null +++ b/libguile/whippet/doc/collectors.md @@ -0,0 +1,43 @@ +# Whippet collectors + +Whippet has four collectors currently: + - [Semi-space collector (`semi`)](./collector-semi.md): For + single-threaded embedders who are not too tight on memory. + - [Parallel copying collector (`pcc`)](./collector-pcc.md): Like + `semi`, but with support for multiple mutator and tracing threads and + generational collection. + - [Mostly marking collector (`mmc`)](./collector-mmc.md): + Immix-inspired collector. Optionally parallel, conservative (stack + and/or heap), and/or generational. + - [Boehm-Demers-Weiser collector (`bdw`)](./collector-bdw.md): + Conservative mark-sweep collector, implemented by + Boehm-Demers-Weiser library. + +## How to choose? + +If you are migrating an embedder off BDW-GC, then it could be reasonable +to first go to `bdw`, then `stack-conservative-parallel-mmc`. + +If you have an embedder with precise roots, use `pcc`. That will shake +out mutator/embedder bugs. Then if memory is tight, switch to +`parallel-mmc`, possibly `parallel-generational-mmc`. + +If you are aiming for maximum simplicity and minimal code size (ten +kilobytes or so), use `semi`. + +If you are writing a new project, you have a choice as to whether to pay +the development cost of precise roots or not. If you choose to not have +precise roots, then go for `stack-conservative-parallel-mmc` directly. + +## More collectors + +It would be nice to have a generational GC that uses the space from +`parallel-mmc` for the old generation but a pcc-style copying nursery. +We have `generational-pcc` now, so this should be possible. + +Support for concurrent marking in `mmc` would be good as well, perhaps +with a SATB barrier. (Or, if you are the sort of person to bet on +conservative stack scanning, perhaps a retreating-wavefront barrier +would be more appropriate.) + +Contributions are welcome, provided they have no more dependencies! diff --git a/libguile/whippet/doc/guile.md b/libguile/whippet/doc/guile.md new file mode 100644 index 000000000..12bdb97fc --- /dev/null +++ b/libguile/whippet/doc/guile.md @@ -0,0 +1,26 @@ +# Whippet and Guile + +If the `mmc` collector works out, it could replace Guile's garbage +collector. Guile currently uses BDW-GC. Guile has a widely used C API +and implements part of its run-time in C. For this reason it may be +infeasible to require precise enumeration of GC roots -- we may need to +allow GC roots to be conservatively identified from data sections and +from stacks. Such conservative roots would be pinned, but other objects +can be moved by the collector if it chooses to do so. We assume that +object references within a heap object can be precisely identified. +(However, Guile currently uses BDW-GC in its default configuration, +which scans for references conservatively even on the heap.) + +The existing C API allows direct access to mutable object fields, +without the mediation of read or write barriers. Therefore it may be +impossible to switch to collector strategies that need barriers, such as +generational or concurrent collectors. However, we shouldn't write off +this possibility entirely; an ideal replacement for Guile's GC will +offer the possibility of migration to other GC designs without imposing +new requirements on C API users in the initial phase. + +In this regard, the Whippet experiment also has the goal of identifying +a smallish GC abstraction in Guile, so that we might consider evolving +GC implementation in the future without too much pain. If we switch +away from BDW-GC, we should be able to evaluate that it's a win for a +large majority of use cases. diff --git a/libguile/whippet/doc/manual.md b/libguile/whippet/doc/manual.md new file mode 100644 index 000000000..7b889e364 --- /dev/null +++ b/libguile/whippet/doc/manual.md @@ -0,0 +1,718 @@ +# Whippet user's guide + +Whippet is an embed-only library: it should be copied into the source +tree of the program that uses it. The program's build system needs to +be wired up to compile Whippet, then link it into the program that uses +it. + +## Subtree merges + +One way is get Whippet is just to manually copy the files present in a +Whippet checkout into your project. However probably the best way is to +perform a [subtree +merge](https://docs.github.com/en/get-started/using-git/about-git-subtree-merges) +of Whippet into your project's Git repository, so that you can easily +update your copy of Whippet in the future. + +Performing the first subtree merge is annoying and full of arcane +incantations. Follow the [subtree merge +page](https://docs.github.com/en/get-started/using-git/about-git-subtree-merges) +for full details, but for a cheat sheet, you might do something like +this to copy Whippet into the `whippet/` directory of your project root: + +``` +git remote add whippet https://github.com/wingo/whippet +git fetch whippet +git merge -s ours --no-commit --allow-unrelated-histories whippet/main +git read-tree --prefix=whippet/ -u whippet/main +git commit -m 'Added initial Whippet merge' +``` + +Then to later update your copy of whippet, assuming you still have the +`whippet` remote, just do: + +``` +git pull -s subtree whippet main +``` + +## `gc-embedder-api.h` + +To determine the live set of objects, a tracing garbage collector starts +with a set of root objects, and then transitively visits all reachable +object edges. Exactly how it goes about doing this depends on the +program that is using the garbage collector; different programs will +have different object representations, different strategies for +recording roots, and so on. + +To traverse the heap in a program-specific way but without imposing an +abstraction overhead, Whippet requires that a number of data types and +inline functions be implemented by the program, for use by Whippet +itself. This is the *embedder API*, and this document describes what +Whippet requires from a program. + +A program should provide a header file implementing the API in +[`gc-embedder-api.h`](../api/gc-embedder-api.h). This header should only be +included when compiling Whippet itself; it is not part of the API that +Whippet exposes to the program. + +### Identifying roots + +The collector uses two opaque struct types, `struct gc_mutator_roots` +and `struct gc_heap_roots`, that are used by the program to record +object roots. Probably you should put the definition of these data +types in a separate header that is included both by Whippet, via the +embedder API, and via users of Whippet, so that programs can populate +the root set. In any case the embedder-API use of these structs is via +`gc_trace_mutator_roots` and `gc_trace_heap_roots`, two functions that +are passed a trace visitor function `trace_edge`, and which should call +that function on all edges from a given mutator or heap. (Usually +mutator roots are per-thread roots, such as from the stack, and heap +roots are global roots.) + +### Tracing objects + +The `gc_trace_object` is responsible for calling the `trace_edge` +visitor function on all outgoing edges in an object. It also includes a +`size` out-parameter, for when the collector wants to measure the size +of an object. `trace_edge` and `size` may be `NULL`, in which case no +tracing or size computation should be performed. + +### Tracing ephemerons and finalizers + +Most kinds of GC-managed object are defined by the program, but the GC +itself has support for two specific object kind: ephemerons and +finalizers. If the program allocates ephemerons, it should trace them +in the `gc_trace_object` function by calling `gc_trace_ephemeron` from +[`gc-ephemerons.h`](../api/gc-ephemerons.h). Likewise if the program +allocates finalizers, it should trace them by calling +`gc_trace_finalizer` from [`gc-finalizer.h`](../api/gc-finalizer.h). + +### Forwarding objects + +When built with a collector that moves objects, the embedder must also +allow for forwarding pointers to be installed in an object. There are +two forwarding APIs: one that is atomic and one that isn't. + +The nonatomic API is relatively simple; there is a +`gc_object_forwarded_nonatomic` function that returns an embedded +forwarding address, or 0 if the object is not yet forwarded, and +`gc_object_forward_nonatomic`, which installs a forwarding pointer. + +The atomic API is gnarly. It is used by parallel collectors, in which +multiple collector threads can race to evacuate an object. + +There is a state machine associated with the `gc_atomic_forward` +structure from [`gc-forwarding.h`](../api/gc-forwarding.h); the embedder API +implements the state changes. The collector calls +`gc_atomic_forward_begin` on an object to begin a forwarding attempt, +and the resulting `gc_atomic_forward` can be in the `NOT_FORWARDED`, +`FORWARDED`, or `BUSY` state. + +If the `gc_atomic_forward`'s state is `BUSY`, the collector will call +`gc_atomic_forward_retry_busy`; a return value of 0 means the object is +still busy, because another thread is attempting to forward it. +Otherwise the forwarding state becomes either `FORWARDED`, if the other +thread succeeded in forwarding it, or go back to `NOT_FORWARDED`, +indicating that the other thread failed to forward it. + +If the forwarding state is `FORWARDED`, the collector will call +`gc_atomic_forward_address` to get the new address. + +If the forwarding state is `NOT_FORWARDED`, the collector may begin a +forwarding attempt by calling `gc_atomic_forward_acquire`. The +resulting state is `ACQUIRED` on success, or `BUSY` if another thread +acquired the object in the meantime, or `FORWARDED` if another thread +acquired and completed the forwarding attempt. + +An `ACQUIRED` object can then be forwarded via +`gc_atomic_forward_commit`, or the forwarding attempt can be aborted via +`gc_atomic_forward_abort`. Also, when an object is acquired, the +collector may call `gc_atomic_forward_object_size` to compute how many +bytes to copy. (The collector may choose instead to record object sizes +in a different way.) + +All of these `gc_atomic_forward` functions are to be implemented by the +embedder. Some programs may allocate a dedicated forwarding word in all +objects; some will manage to store the forwarding word in an initial +"tag" word, via a specific pattern for the low 3 bits of the tag that no +non-forwarded object will have. The low-bits approach takes advantage +of the collector's minimum object alignment, in which objects are +aligned at least to an 8-byte boundary, so all objects have 0 for the +low 3 bits of their address. + +### Conservative references + +Finally, when configured in a mode in which root edges or intra-object +edges are *conservative*, the embedder can filter out which bit patterns +might be an object reference by implementing +`gc_is_valid_conservative_ref_displacement`. Here, the collector masks +off the low bits of a conservative reference, and asks the embedder if a +value with those low bits might point to an object. Usually the +embedder should return 1 only if the displacement is 0, but if the +program allows low-bit tagged pointers, then it should also return 1 for +those pointer tags. + +### External objects + +Sometimes a system will allocate objects outside the GC, for example on +the stack or in static data sections. To support this use case, Whippet +allows the embedder to provide a `struct gc_extern_space` +implementation. Whippet will call `gc_extern_space_start_gc` at the +start of each collection, and `gc_extern_space_finish_gc` at the end. +External objects will be visited by `gc_extern_space_mark`, which should +return nonzero if the object hasn't been seen before and needs to be +traced via `gc_trace_object` (coloring the object grey). Note, +`gc_extern_space_mark` may be called concurrently from many threads; be +prepared! + +## Configuration, compilation, and linking + +To the user, Whippet presents an abstract API that does not encode the +specificities of any given collector. Whippet currently includes four +implementations of that API: `semi`, a simple semi-space collector; +`pcc`, a parallel copying collector (like semi but multithreaded); +`bdw`, an implementation via the third-party +[Boehm-Demers-Weiser](https://github.com/ivmai/bdwgc) conservative +collector; and `mmc`, a mostly-marking collector inspired by Immix. + +The program that embeds Whippet selects the collector implementation at +build-time. For `pcc`, the program can also choose whether to be +generational or not. For `mmc` collector, the program configures a +specific collector mode, again at build-time: generational or not, +parallel or not, stack-conservative or not, and heap-conservative or +not. It may be nice in the future to be able to configure these at +run-time, but for the time being they are compile-time options so that +adding new features doesn't change the footprint of a more minimal +collector. + +Different collectors have different allocation strategies: for example, +the BDW collector allocates from thread-local freelists, whereas the +semi-space collector has a bump-pointer allocator. A collector may also +expose a write barrier, for example to enable generational collection. +For performance reasons, many of these details can't be hidden behind an +opaque functional API: they must be inlined into call sites. Whippet's +approach is to expose fast paths as part of its inline API, but which +are *parameterized* on attributes of the selected garbage collector. +The goal is to keep the user's code generic and avoid any code +dependency on the choice of garbage collector. Because of inlining, +however, the choice of garbage collector does need to be specified when +compiling user code. + +### Compiling the collector + +As an embed-only library, Whippet needs to be integrated into the build +system of its host (embedder). There are two build systems supported +currently; we would be happy to add other systems over time. + +#### GNU make + +At a high level, first the embedder chooses a collector and defines how +to specialize the collector against the embedder. Whippet's `embed.mk` +Makefile snippet then defines how to build the set of object files that +define the collector, and how to specialize the embedder against the +chosen collector. + +As an example, say you have a file `program.c`, and you want to compile +it against a Whippet checkout in `whippet/`. Your headers are in +`include/`, and you have written an implementation of the embedder +interface in `host-gc.h`. In that case you would have a Makefile like +this: + +``` +HOST_DIR:=$(dir $(lastword $(MAKEFILE_LIST))) +WHIPPET_DIR=$(HOST_DIR)whippet/ + +all: out + +# The collector to choose: e.g. semi, bdw, pcc, generational-pcc, mmc, +# parallel-mmc, etc. +GC_COLLECTOR=pcc + +include $(WHIPPET_DIR)embed.mk + +# Host cflags go here... +HOST_CFLAGS= + +# Whippet's embed.mk uses this variable when it compiles code that +# should be specialized against the embedder. +EMBEDDER_TO_GC_CFLAGS=$(HOST_CFLAGS) -include $(HOST_DIR)host-gc.h + +program.o: program.c + $(GC_COMPILE) $(HOST_CFLAGS) $(GC_TO_EMBEDDER_CFLAGS) -c $< +program: program.o $(GC_OBJS) + $(GC_LINK) $^ $(GC_LIBS) +``` + +The optimization settings passed to the C compiler are taken from +`GC_BUILD_CFLAGS`. Embedders can override this variable directly, or +via the shorthand `GC_BUILD` variable. A `GC_BUILD` of `opt` indicates +maximum optimization and no debugging assertions; `optdebug` adds +debugging assertions; and `debug` removes optimizations. + +Though Whippet tries to put performance-sensitive interfaces in header +files, users should also compile with link-time optimization (LTO) to +remove any overhead imposed by the division of code into separate +compilation units. `embed.mk` includes the necessary LTO flags in +`GC_CFLAGS` and `GC_LDFLAGS`. + +#### GNU Autotools + +To use Whippet from an autotools project, the basic idea is to include a +`Makefile.am` snippet from the subdirectory containing the Whippet +checkout. That will build `libwhippet.la`, which you should link into +your binary. There are some `m4` autoconf macros that need to be +invoked, for example to select the collector. + +Let us imagine you have checked out Whippet in `whippet/`. Let us also +assume for the moment that we are going to build `mt-gcbench`, a program +included in Whippet itself. + +A top-level autoconf file (`configure.ac`) might look like this: + +```autoconf +AC_PREREQ([2.69]) +AC_INIT([whippet-autotools-example],[0.1.0]) +AC_CONFIG_SRCDIR([whippet/benchmarks/mt-gcbench.c]) +AC_CONFIG_AUX_DIR([build-aux]) +AC_CONFIG_MACRO_DIRS([m4 whippet]) +AM_INIT_AUTOMAKE([subdir-objects foreign]) + +WHIPPET_ENABLE_LTO + +LT_INIT + +WARN_CFLAGS=-Wall +AC_ARG_ENABLE([Werror], + AS_HELP_STRING([--disable-Werror], + [Don't stop the build on errors]), + [], + WARN_CFLAGS="-Wall -Werror") +CFLAGS="$CFLAGS $WARN_CFLAGS" + +WHIPPET_PKG + +AC_CONFIG_FILES(Makefile) +AC_OUTPUT +``` + +Then your `Makefile.am` might look like this: + +```automake +noinst_LTLIBRARIES = +WHIPPET_EMBEDDER_CPPFLAGS = -include $(srcdir)/whippet/benchmarks/mt-gcbench-embedder.h +include whippet/embed.am + +noinst_PROGRAMS = whippet/benchmarks/mt-gcbench +whippet_benchmarks_mt_gcbench_SOURCES = \ + whippet/benchmarks/heap-objects.h \ + whippet/benchmarks/mt-gcbench-embedder.h \ + whippet/benchmarks/mt-gcbench-types.h \ + whippet/benchmarks/mt-gcbench.c \ + whippet/benchmarks/simple-allocator.h \ + whippet/benchmarks/simple-gc-embedder.h \ + whippet/benchmarks/simple-roots-api.h \ + whippet/benchmarks/simple-roots-types.h \ + whippet/benchmarks/simple-tagging-scheme.h + +AM_CFLAGS = $(WHIPPET_CPPFLAGS) $(WHIPPET_CFLAGS) $(WHIPPET_TO_EMBEDDER_CPPFLAGS) +LDADD = libwhippet.la +``` + +We have to list all the little header files it uses because, well, +autotools. + +To actually build, you do the usual autotools dance: + +```bash +autoreconf -vif && ./configure && make +``` + +See `./configure --help` for a list of user-facing options. Before the +`WHIPPET_PKG`, you can run e.g. `WHIPPET_PKG_COLLECTOR(mmc)` to set the +default collector to `mmc`; if you don't do that, the default collector +is `pcc`. There are also `WHIPPET_PKG_DEBUG`, `WHIPPET_PKG_TRACING`, +and `WHIPPET_PKG_PLATFORM`; see [`whippet.m4`](../whippet.m4) for more +details. See also +[`whippet-autotools`](https://github.com/wingo/whippet-autotools) for an +example of how this works. + +#### Compile-time options + +There are a number of pre-processor definitions that can parameterize +the collector at build-time: + + * `GC_DEBUG`: If nonzero, then enable debugging assertions. + * `NDEBUG`: This one is a bit weird; if not defined, then enable + debugging assertions and some debugging printouts. Probably + Whippet's use of `NDEBUG` should be folded in to `GC_DEBUG`. + * `GC_PARALLEL`: If nonzero, then enable parallelism in the collector. + Defaults to 0. + * `GC_GENERATIONAL`: If nonzero, then enable generational collection. + Defaults to zero. + * `GC_PRECISE_ROOTS`: If nonzero, then collect precise roots via + `gc_heap_roots` and `gc_mutator_roots`. Defaults to zero. + * `GC_CONSERVATIVE_ROOTS`: If nonzero, then scan the stack and static + data sections for conservative roots. Defaults to zero. Not + mutually exclusive with `GC_PRECISE_ROOTS`. + * `GC_CONSERVATIVE_TRACE`: If nonzero, heap edges are scanned + conservatively. Defaults to zero. + +Some collectors require specific compile-time options. For example, the +semi-space collector has to be able to move all objects; this is not +compatible with conservative roots or heap edges. + +#### Tracing support + +Whippet includes support for low-overhead run-time tracing via +[LTTng](https://lttng.org/). If the support library `lttng-ust` is +present when Whippet is compiled (as checked via `pkg-config`), +tracepoint support will be present. See +[tracepoints.md](./tracepoints.md) for more information on how to get +performance traces out of Whippet. + +## Using the collector + +Whew! So you finally built the thing! Did you also link it into your +program? No, because your program isn't written yet? Well this section +is for you: we describe the user-facing API of Whippet, where "user" in +this case denotes the embedding program. + +What is the API, you ask? It is in [`gc-api.h`](../api/gc-api.h). + +### Heaps and mutators + +To start with, you create a *heap*. Usually an application will create +just one heap. A heap has one or more associated *mutators*. A mutator +is a thread-specific handle on the heap. Allocating objects requires a +mutator. + +The initial heap and mutator are created via `gc_init`, which takes +three logical input parameters: the *options*, a stack base address, and +an *event listener*. The options specify the initial heap size and so +on. The event listener is mostly for gathering statistics; see below +for more. `gc_init` returns the new heap as an out parameter, and also +returns a mutator for the current thread. + +To make a new mutator for a new thread, use `gc_init_for_thread`. When +a thread is finished with its mutator, call `gc_finish_for_thread`. +Each thread that allocates or accesses GC-managed objects should have +its own mutator. + +The stack base address allows the collector to scan the mutator's stack, +if conservative root-finding is enabled. It may be omitted in the call +to `gc_init` and `gc_init_for_thread`; passing `NULL` tells Whippet to +ask the platform for the stack bounds of the current thread. Generally +speaking, this works on all platforms for the main thread, but not +necessarily on other threads. The most reliable solution is to +explicitly obtain a base address by trampolining through +`gc_call_with_stack_addr`. + +### Options + +There are some run-time parameters that programs and users might want to +set explicitly; these are encapsulated in the *options*. Make an +options object with `gc_allocate_options()`; this object will be +consumed by its `gc_init`. Then, the most convenient thing is to set +those options from `gc_options_parse_and_set_many` from a string passed +on the command line or an environment variable, but to get there we have +to explain the low-level first. There are a few options that are +defined for all collectors: + + * `GC_OPTION_HEAP_SIZE_POLICY`: How should we size the heap? Either + it's `GC_HEAP_SIZE_FIXED` (which is 0), in which the heap size is + fixed at startup; or `GC_HEAP_SIZE_GROWABLE` (1), in which the heap + may grow but will never shrink; or `GC_HEAP_SIZE_ADAPTIVE` (2), in + which we take an + [adaptive](https://wingolog.org/archives/2023/01/27/three-approaches-to-heap-sizing) + approach, depending on the rate of allocation and the cost of + collection. Really you want the adaptive strategy, but if you are + benchmarking you definitely want the fixed policy. + * `GC_OPTION_HEAP_SIZE`: The initial heap size. For a + `GC_HEAP_SIZE_FIXED` policy, this is also the final heap size. In + bytes. + * `GC_OPTION_MAXIMUM_HEAP_SIZE`: For growable and adaptive heaps, the + maximum heap size, in bytes. + * `GC_OPTION_HEAP_SIZE_MULTIPLIER`: For growable heaps, the target heap + multiplier. A heap multiplier of 2.5 means that for 100 MB of live + data, the heap should be 250 MB. + * `GC_OPTION_HEAP_EXPANSIVENESS`: For adaptive heap sizing, an + indication of how much free space will be given to heaps, as a + proportion of the square root of the live data size. + * `GC_OPTION_PARALLELISM`: How many threads to devote to collection + tasks during GC pauses. By default, the current number of + processors, with a maximum of 8. + +You can set these options via `gc_option_set_int` and so on; see +[`gc-options.h`](../api/gc-options.h). Or, you can parse options from +strings: `heap-size-policy`, `heap-size`, `maximum-heap-size`, and so +on. Use `gc_option_from_string` to determine if a string is really an +option. Use `gc_option_parse_and_set` to parse a value for an option. +Use `gc_options_parse_and_set_many` to parse a number of comma-delimited +*key=value* settings from a string. + +### Allocation + +So you have a heap and a mutator; great! Let's allocate! Call +`gc_allocate`, passing the mutator and the number of bytes to allocate. + +There is also `gc_allocate_fast`, which is an inlined fast-path. If +that returns NULL, you need to call `gc_allocate_slow`. The advantage +of this API is that you can punt some root-saving overhead to the slow +path. + +Allocation always succeeds. If it doesn't, it kills your program. The +bytes in the resulting allocation will be initialized to 0. + +The allocation fast path is parameterized by collector-specific +attributes. JIT compilers can also read those attributes to emit +appropriate inline code that replicates the logic of `gc_allocate_fast`. + +### Write barriers + +For some collectors, mutators have to tell the collector whenever they +mutate an object. They tell the collector by calling a *write barrier*; +in Whippet this is currently the case only for generational collectors. + +The write barrier is `gc_write_barrier`; see `gc-api.h` for its +parameters. + +As with allocation, the fast path for the write barrier is parameterized +by collector-specific attributes, to allow JIT compilers to inline write +barriers. + +### Safepoints + +Sometimes Whippet will need to synchronize all threads, for example as +part of the "stop" phase of a stop-and-copy semi-space collector. +Whippet stops at *safepoints*. At a safepoint, all mutators must be +able to enumerate all of their edges to live objects. + +Whippet has cooperative safepoints: mutators have to periodically call +into the collector to potentially synchronize with other mutators. +`gc_allocate_slow` is a safepoint, so if you a bunch of threads that are +all allocating, usually safepoints are reached in a more-or-less prompt +fashion. But if a mutator isn't allocating, it either needs to +temporarily mark itself as inactive by trampolining through +`gc_call_without_gc`, or it should arrange to periodically call +`gc_safepoint`. Marking a mutator as inactive is the right strategy +for, for example, system calls that might block. Periodic safepoints is +better for code that is active but not allocating. + +Also, the BDW collector actually uses pre-emptive safepoints: it stops +threads via POSIX signals. `gc_safepoint` is a no-op with BDW. + +Embedders can inline safepoint checks. If +`gc_cooperative_safepoint_kind()` is `GC_COOPERATIVE_SAFEPOINT_NONE`, +then the collector doesn't need safepoints, as is the case for `bdw` +which uses signals and `semi` which is single-threaded. If it is +`GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG`, then calling +`gc_safepoint_flag_loc` on a mutator will return the address of an `int` +in memory, which if nonzero when loaded using relaxed atomics indicates +that the mutator should call `gc_safepoint_slow`. Similarly for +`GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG`, except that the address is +per-mutator rather than global. + +### Pinning + +Sometimes a mutator or embedder would like to tell the collector to not +move a particular object. This can happen for example during a foreign +function call, or if the embedder allows programs to access the address +of an object, for example to compute an identity hash code. To support +this use case, some Whippet collectors allow the embedder to *pin* +objects. Call `gc_pin_object` to prevent the collector from relocating +an object. + +Pinning is currently supported by the `bdw` collector, which never moves +objects, and also by the various `mmc` collectors, which can move +objects that have no inbound conservative references. + +Pinning is not supported on `semi` or `pcc`. + +Call `gc_can_pin_objects` to determine whether the current collector can +pin objects. + +### Statistics + +Sometimes a program would like some information from the GC: how many +bytes and objects have been allocated? How much time has been spent in +the GC? How many times has GC run, and how many of those were minor +collections? What's the maximum pause time? Stuff like that. + +Instead of collecting a fixed set of information, Whippet emits +callbacks when the collector reaches specific states. The embedder +provides a *listener* for these events when initializing the collector. + +The listener interface is defined in +[`gc-event-listener.h`](../api/gc-event-listener.h). Whippet ships with +two listener implementations, +[`GC_NULL_EVENT_LISTENER`](../api/gc-null-event-listener.h), and +[`GC_BASIC_STATS`](../api/gc-basic-stats.h). Most embedders will want +their own listener, but starting with the basic stats listener is not a +bad option: + +``` +#include "gc-api.h" +#include "gc-basic-stats.h" +#include + +int main() { + struct gc_options *options = NULL; + struct gc_heap *heap; + struct gc_mutator *mut; + struct gc_basic_stats stats; + gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats); + // ... + gc_basic_stats_finish(&stats); + gc_basic_stats_print(&stats, stdout); +} +``` + +As you can see, `GC_BASIC_STATS` expands to a `struct gc_event_listener` +definition. We pass an associated pointer to a `struct gc_basic_stats` +instance which will be passed to the listener at every event. + +The output of this program might be something like: + +``` +Completed 19 major collections (0 minor). +654.597 ms total time (385.235 stopped). +Heap size is 167.772 MB (max 167.772 MB); peak live data 55.925 MB. +``` + +There are currently three different sorts of events: heap events to +track heap growth, collector events to time different parts of +collection, and mutator events to indicate when specific mutators are +stopped. + +There are three heap events: + + * `init(void* data, size_t heap_size)`: Called during `gc_init`, to + allow the listener to initialize its associated state. + * `heap_resized(void* data, size_t new_size)`: Called if the heap grows + or shrinks. + * `live_data_size(void* data, size_t size)`: Called periodically when + the collector learns about live data size. + +The collection events form a kind of state machine, and are called in +this order: + + * `requesting_stop(void* data)`: Called when the collector asks + mutators to stop. + * `waiting_for_stop(void* data)`: Called when the collector has done + all the pre-stop work that it is able to and is just waiting on + mutators to stop. + * `mutators_stopped(void* data)`: Called when all mutators have + stopped; the trace phase follows. + * `prepare_gc(void* data, enum gc_collection_kind gc_kind)`: Called + to indicate which kind of collection is happening. + * `roots_traced(void* data)`: Called when roots have been visited. + * `heap_traced(void* data)`: Called when the whole heap has been + traced. + * `ephemerons_traced(void* data)`: Called when the [ephemeron + fixpoint](https://wingolog.org/archives/2023/01/24/parallel-ephemeron-tracing) + has been reached. + * `restarting_mutators(void* data)`: Called right before the collector + restarts mutators. + +The collectors in Whippet will call all of these event handlers, but it +may be that they are called conservatively: for example, the +single-mutator, single-collector semi-space collector will never have to +wait for mutators to stop. It will still call the functions, though! + +Finally, there are the mutator events: + * `mutator_added(void* data) -> void*`: The only event handler that + returns a value, called when a new mutator is added. The parameter + is the overall event listener data, and the result is + mutator-specific data. The rest of the mutator events pass this + mutator-specific data instead. + * `mutator_cause_gc(void* mutator_data)`: Called when a mutator causes + GC, either via allocation or an explicit `gc_collect` call. + * `mutator_stopping(void* mutator_data)`: Called when a mutator has + received the signal to stop. It may perform some marking work before + it stops. + * `mutator_stopped(void* mutator_data)`: Called when a mutator parks + itself. + * `mutator_restarted(void* mutator_data)`: Called when a mutator + restarts. + * `mutator_removed(void* mutator_data)`: Called when a mutator goes + away. + +Note that these events handlers shouldn't really do much. In +particular, they shouldn't call into the Whippet API, and they shouldn't +even access GC-managed objects. Event listeners are really about +statistics and profiling and aren't a place to mutate the object graph. + +### Ephemerons + +Whippet supports ephemerons, first-class objects that weakly associate +keys with values. If the an ephemeron's key ever becomes unreachable, +the ephemeron becomes dead and loses its value. + +The user-facing API is in [`gc-ephemeron.h`](../api/gc-ephemeron.h). To +allocate an ephemeron, call `gc_allocate_ephemeron`, then initialize its +key and value via `gc_ephemeron_init`. Get the key and value via +`gc_ephemeron_key` and `gc_ephemeron_value`, respectively. + +In Whippet, ephemerons can be linked together in a chain. During GC, if +an ephemeron's chain points to a dead ephemeron, that link will be +elided, allowing the dead ephemeron itself to be collected. In that +way, ephemerons can be used to build weak data structures such as weak +maps. + +Weak data structures are often shared across multiple threads, so all +routines to access and modify chain links are atomic. Use +`gc_ephemeron_chain_head` to access the head of a storage location that +points to an ephemeron; push a new ephemeron on a location with +`gc_ephemeron_chain_push`; and traverse a chain with +`gc_ephemeron_chain_next`. + +An ephemeron association can be removed via `gc_ephemeron_mark_dead`. + +### Finalizers + +A finalizer allows the embedder to be notified when an object becomes +unreachable. + +A finalizer has a priority. When the heap is created, the embedder +should declare how many priorities there are. Lower-numbered priorities +take precedence; if an object has a priority-0 finalizer outstanding, +that will prevent any finalizer at level 1 (or 2, ...) from firing +until no priority-0 finalizer remains. + +Call `gc_attach_finalizer`, from `gc-finalizer.h`, to attach a finalizer +to an object. + +A finalizer also references an associated GC-managed closure object. +A finalizer's reference to the closure object is strong: if a +finalizer's closure closure references its finalizable object, +directly or indirectly, the finalizer will never fire. + +When an object with a finalizer becomes unreachable, it is added to a +queue. The embedder can call `gc_pop_finalizable` to get the next +finalizable object and its associated closure. At that point the +embedder can do anything with the object, including keeping it alive. +Ephemeron associations will still be present while the finalizable +object is live. Note however that any objects referenced by the +finalizable object may themselves be already finalized; finalizers are +enqueued for objects when they become unreachable, which can concern +whole subgraphs of objects at once. + +The usual way for an embedder to know when the queue of finalizable +object is non-empty is to call `gc_set_finalizer_callback` to +provide a function that will be invoked when there are pending +finalizers. + +Arranging to call `gc_pop_finalizable` and doing something with the +finalizable object and closure is the responsibility of the embedder. +The embedder's finalization action can end up invoking arbitrary code, +so unless the embedder imposes some kind of restriction on what +finalizers can do, generally speaking finalizers should be run in a +dedicated thread instead of recursively from within whatever mutator +thread caused GC. Setting up such a thread is the responsibility of the +mutator. `gc_pop_finalizable` is thread-safe, allowing multiple +finalization threads if that is appropriate. + +`gc_allocate_finalizer` returns a finalizer, which is a fresh GC-managed +heap object. The mutator should then directly attach it to an object +using `gc_finalizer_attach`. When the finalizer is fired, it becomes +available to the mutator via `gc_pop_finalizable`. diff --git a/libguile/whippet/doc/perfetto-minor-gc.png b/libguile/whippet/doc/perfetto-minor-gc.png new file mode 100644 index 000000000..3c528ae42 Binary files /dev/null and b/libguile/whippet/doc/perfetto-minor-gc.png differ diff --git a/libguile/whippet/doc/tracepoints.md b/libguile/whippet/doc/tracepoints.md new file mode 100644 index 000000000..18b7d8f29 --- /dev/null +++ b/libguile/whippet/doc/tracepoints.md @@ -0,0 +1,127 @@ +# Whippet performance tracing + +Whippet includes support for run-time tracing via +[LTTng](https://LTTng.org) user-space tracepoints. This allows you to +get a detailed look at how Whippet is performing on your system. +Tracing support is currently limited to Linux systems. + +## Getting started + +First, you need to build Whippet with LTTng support. Usually this is as +easy as building it in an environment where the `lttng-ust` library is +present, as determined by `pkg-config --libs lttng-ust`. You can know +if your Whippet has tracing support by seeing if the resulting binaries +are dynamically linked to `liblttng-ust`. + +If we take as an example the `mt-gcbench` test in the Whippet source +tree, we would have: + +``` +$ ldd bin/mt-gcbench.pcc | grep lttng +... +liblttng-ust.so.1 => ... +... +``` + +### Capturing traces + +Actually capturing traces is a little annoying; it's not as easy as +`perf run`. The [LTTng +documentation](https://lttng.org/docs/v2.13/#doc-controlling-tracing) is +quite thorough, but here is a summary. + +First, create your tracing session: + +``` +$ lttng create +Session auto-20250214-091153 created. +Traces will be output to ~/lttng-traces/auto-20250214-091153 +``` + +You run all these commands as your own user; they don't require root +permissions or system-wide modifications, as all of the Whippet +tracepoints are user-space tracepoints (UST). + +Just having an LTTng session created won't do anything though; you need +to configure the session. Monotonic nanosecond-resolution timestamps +are already implicitly part of each event. We also want to have process +and thread IDs for all events: + +``` +$ lttng add-context --userspace --type=vpid --type=vtid +ust context vpid added to all channels +ust context vtid added to all channels +``` + +Now enable Whippet events: + +``` +$ lttng enable-event --userspace 'whippet:*' +ust event whippet:* created in channel channel0 +``` + +And now, start recording: + +``` +$ lttng start +Tracing started for session auto-20250214-091153 +``` + +With this, traces will be captured for our program of interest: + +``` +$ bin/mt-gcbench.pcc 2.5 8 +... +``` + +Now stop the trace: + +``` +$ lttng stop +Waiting for data availability +Tracing stopped for session auto-20250214-091153 +``` + +Whew. If we did it right, our data is now in +`~/lttng-traces/auto-20250214-091153`. + +### Visualizing traces + +LTTng produces traces in the [Common Trace Format +(CTF)](https://diamon.org/ctf/). My favorite trace viewing tool is the +family of web-based trace viewers derived from `chrome://tracing`. The +best of these appear to be [the Firefox +profiler](https://profiler.firefox.com) and +[Perfetto](https://ui.perfetto.dev). Unfortunately neither of these can +work with CTF directly, so we instead need to run a trace converter. + +Oddly, there is no trace converter that can read CTF and write something +that Perfetto (e.g.) can read. However there is a [JSON-based tracing +format that these tools can +read](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0#heading=h.yr4qxyxotyw), +and [Python bindings for Babeltrace, a library that works with +CTF](https://babeltrace.org/), so that's what we do: + +``` +$ python3 ctf_to_json.py ~/lttng-traces/auto-20250214-091153 > trace.json +``` + +While Firefox Profiler can load this file, it works better on Perfetto, +as the Whippet events are visually rendered on their respective threads. + +![Screenshot of part of Perfetto UI showing a minor GC](./perfetto-minor-gc.png) + +### Expanding the set of events + +As of February 2025, +the current set of tracepoints includes the [heap +events](https://github.com/wingo/whippet/blob/main/doc/manual.md#statistics) +and some detailed internals of the parallel tracer. We expect this set +of tracepoints to expand over time. + +### Overhead of tracepoints + +When tracepoints are compiled in but no events are enabled, tracepoints +appear to have no impact on run-time. When event collection is on, for +x86-64 hardware, [emitting a tracepoint event takes about +100ns](https://discuss.systems/@DesnoyersMa/113986344940256872). diff --git a/libguile/whippet/embed.am b/libguile/whippet/embed.am new file mode 100644 index 000000000..af49e5ead --- /dev/null +++ b/libguile/whippet/embed.am @@ -0,0 +1,207 @@ +# Automake snippet for embedding Whippet in an autotools project. +# +# The including Makefile.am needs to do this, assuming Whippet is in the +# whippet/ subdirectory: +# +# noinst_LTLIBRARIES = +# WHIPPET_EMBEDDER_CPPFLAGS = -include src/my-embedder.h +# include whippet/embed.am +# +# my-embedder.h should provide the various hooks that Whippet needs to +# specialize itself to the embedder's object representation. +# +# The result is a libwhippet.la. To compile and link against it: +# +# AM_CFLAGS = $(WHIPPET_CPPFLAGS) $(WHIPPET_CFLAGS) $(WHIPPET_TO_EMBEDDER_CPPFLAGS) +# LDADD = libwhippet.la +# AM_LDFLAGS = $(WHIPPET_TO_EMBEDDER_LDFLAGS) +# +# The assumption is that the embedder will build a single copy of +# Whippet, specialized against a single collector, a single set of +# embedder hooks, and a single target platform. The collector and +# platform should be chosen at configure-time. Because Automake really +# wants the set of source files to be visible to it at automake-time, we +# need to implement these conditions via AM_CONDITIONAL in a +# configure.ac. For example for a parallel-mmc configuration on +# gnu-linux, we would need: +# +# AM_SUBST(WHIPPET_COLLECTOR, parallel-mmc) +# AM_CONDITIONAL(WHIPPET_COLLECTOR_SEMI, 0) +# AM_CONDITIONAL(WHIPPET_COLLECTOR_PCC, 0) +# AM_CONDITIONAL(WHIPPET_COLLECTOR_BDW, 0) +# AM_CONDITIONAL(WHIPPET_COLLECTOR_MMC, 1) +# AM_CONDITIONAL(WHIPPET_PLATFORM_GNU_LINUX, 1) +# +# Then there are other conditionals for compilation options: +# +# AM_CONDITIONAL(WHIPPET_ENABLE_DEBUG, 0) +# AM_CONDITIONAL(WHIPPET_USE_LTTNG, 1) +# +# Finally, LTO should be enabled, for best performance. This should be +# added to CFLAGS at configure-time. +# +# Getting all of this in there is gnarly. See the example configure.ac +# for one take on the topic. + +noinst_LTLIBRARIES += libwhippet-common.la libwhippet.la + +libwhippet_common_la_SOURCES = \ + %D%/src/gc-options-internal.h \ + %D%/src/gc-options.c \ + %D%/src/gc-stack.c \ + %D%/src/gc-stack.h \ + %D%/src/gc-tracepoint.c + +if WHIPPET_PLATFORM_GNU_LINUX +libwhippet_common_la_SOURCES += %D%/src/gc-platform-gnu-linux.c +endif + +libwhippet_la_SOURCES = \ + %D%/src/adaptive-heap-sizer.h \ + %D%/src/address-hash.h \ + %D%/src/address-map.h \ + %D%/src/address-set.h \ + %D%/src/assert.h \ + %D%/src/background-thread.h \ + %D%/src/copy-space.h \ + %D%/src/debug.h \ + %D%/src/extents.h \ + %D%/src/field-set.h \ + %D%/src/freelist.h \ + %D%/src/gc-align.h \ + %D%/src/gc-ephemeron-internal.h \ + %D%/src/gc-ephemeron.c \ + %D%/src/gc-finalizer-internal.h \ + %D%/src/gc-finalizer.c \ + %D%/src/gc-internal.h \ + %D%/src/gc-lock.h \ + %D%/src/gc-platform.h \ + %D%/src/gc-trace.h \ + %D%/src/growable-heap-sizer.h \ + %D%/src/heap-sizer.h \ + %D%/src/large-object-space.h \ + %D%/src/local-worklist.h \ + %D%/src/nofl-space.h \ + %D%/src/parallel-tracer.h \ + %D%/src/root.h \ + %D%/src/root-worklist.h \ + %D%/src/serial-tracer.h \ + %D%/src/shared-worklist.h \ + %D%/src/simple-worklist.h \ + %D%/src/spin.h \ + %D%/src/splay-tree.h \ + %D%/src/swar.h \ + %D%/src/tracer.h + +WHIPPET_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 +WHIPPET_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +WHIPPET_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 +WHIPPET_CFLAGS_generational_pcc = $(WHIPPET_CFLAGS_pcc) -DGC_GENERATIONAL=1 +WHIPPET_CFLAGS_mmc = \ + -DGC_PRECISE_ROOTS=1 +WHIPPET_CFLAGS_generational_mmc = \ + -DGC_PRECISE_ROOTS=1 -DGC_GENERATIONAL=1 +WHIPPET_CFLAGS_parallel_mmc = \ + -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 +WHIPPET_CFLAGS_parallel_generational_mmc = \ + -DGC_PRECISE_ROOTS=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1 +WHIPPET_CFLAGS_stack_conservative_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 +WHIPPET_CFLAGS_stack_conservative_generational_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_GENERATIONAL=1 +WHIPPET_CFLAGS_stack_conservative_parallel_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_PARALLEL=1 +WHIPPET_CFLAGS_stack_conservative_parallel_generational_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1 +WHIPPET_CFLAGS_heap_conservative_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 +WHIPPET_CFLAGS_heap_conservative_generational_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_GENERATIONAL=1 +WHIPPET_CFLAGS_heap_conservative_parallel_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_PARALLEL=1 +WHIPPET_CFLAGS_heap_conservative_parallel_generational_mmc = \ + -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1 + +WHIPPET_CFLAGS = $(WHIPPET_CFLAGS_$(subst -,_,$(WHIPPET_COLLECTOR))) +WHIPPET_IMPL_CFLAGS = +WHIPPET_LIBS = -lm +WHIPPET_CPPFLAGS = -I$(srcdir)/%D%/api +WHIPPET_TO_EMBEDDER_CPPFLAGS = $(WHIPPET_CPPFLAGS) + +if WHIPPET_ENABLE_DEBUG +WHIPPET_CFLAGS += -DGC_DEBUG=1 +endif + +if WHIPPET_COLLECTOR_SEMI +libwhippet_la_SOURCES += %D%/src/semi.c +WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/semi-attrs.h +endif + +if WHIPPET_COLLECTOR_PCC +libwhippet_la_SOURCES += %D%/src/pcc.c +WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/pcc-attrs.h +endif + +if WHIPPET_COLLECTOR_BDW +libwhippet_la_SOURCES += %D%/src/bdw.c +WHIPPET_IMPL_CFLAGS += $(WHIPPET_BDW_CFLAGS) +WHIPPET_LIBS += $(WHIPPET_BDW_LIBS) +WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/bdw-attrs.h +endif + +if WHIPPET_COLLECTOR_MMC +libwhippet_la_SOURCES += %D%/src/mmc.c +WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/mmc-attrs.h +endif + +# add to cflags: -flto -fvisibility=hidden -fno-strict-aliasing + +libwhippet_common_la_CPPFLAGS = $(WHIPPET_CPPFLAGS) +libwhippet_common_la_CFLAGS = -Wall -Wno-unused $(CFLAGS) +libwhippet_common_la_CFLAGS += $(WHIPPET_CFLAGS) +libwhippet_common_la_LDFLAGS = -lpthread $(LDFLAGS) +libwhippet_common_la_LIBADD = $(LIBS) + +if WHIPPET_USE_LTTNG +libwhippet_common_la_CPPFLAGS += $(WHIPPET_LTTNG_CFLAGS) -DGC_TRACEPOINT_LTTNG=1 +WHIPPET_LIBS += $(WHIPPET_LTTNG_LIBS) +endif + +if !WHIPPET_ENABLE_DEBUG +libwhippet_common_la_CFLAGS += -DNDEBUG +endif + +libwhippet_la_CPPFLAGS = $(libwhippet_common_la_CPPFLAGS) $(WHIPPET_EMBEDDER_CPPFLAGS) +libwhippet_la_CFLAGS = $(libwhippet_common_la_CFLAGS) +libwhippet_la_CFLAGS += $(WHIPPET_IMPL_CFLAGS) +libwhippet_la_LDFLAGS = $(libwhippet_common_la_LDFLAGS) $(WHIPPET_LIBS) +libwhippet_la_LIBADD = libwhippet-common.la + +noinst_HEADERS = \ + %D%/api/bdw-attrs.h \ + %D%/api/gc-allocation-kind.h \ + %D%/api/gc-api.h \ + %D%/api/gc-assert.h \ + %D%/api/gc-attrs.h \ + %D%/api/gc-basic-stats.h \ + %D%/api/gc-collection-kind.h \ + %D%/api/gc-config.h \ + %D%/api/gc-conservative-ref.h \ + %D%/api/gc-edge.h \ + %D%/api/gc-embedder-api.h \ + %D%/api/gc-ephemeron.h \ + %D%/api/gc-event-listener-chain.h \ + %D%/api/gc-event-listener.h \ + %D%/api/gc-finalizer.h \ + %D%/api/gc-forwarding.h \ + %D%/api/gc-histogram.h \ + %D%/api/gc-inline.h \ + %D%/api/gc-lttng.h \ + %D%/api/gc-null-event-listener.h \ + %D%/api/gc-options.h \ + %D%/api/gc-ref.h \ + %D%/api/gc-tracepoint.h \ + %D%/api/gc-visibility.h \ + %D%/api/mmc-attrs.h \ + %D%/api/pcc-attrs.h \ + %D%/api/semi-attrs.h diff --git a/libguile/whippet/embed.mk b/libguile/whippet/embed.mk new file mode 100644 index 000000000..0d2de0df5 --- /dev/null +++ b/libguile/whippet/embed.mk @@ -0,0 +1,105 @@ +GC_COLLECTOR ?= semi + +DEFAULT_BUILD := opt + +BUILD_CFLAGS_opt = -O2 -g -DNDEBUG +BUILD_CFLAGS_optdebug = -O2 -g -DGC_DEBUG=1 +BUILD_CFLAGS_debug = -O0 -g -DGC_DEBUG=1 + +GC_BUILD_CFLAGS = $(BUILD_CFLAGS_$(or $(GC_BUILD),$(DEFAULT_BUILD))) + +V ?= 1 +v_0 = @ +v_1 = + +GC_USE_LTTNG_0 := +GC_USE_LTTNG_1 := 1 +GC_USE_LTTNG := $(shell pkg-config --exists lttng-ust && echo 1 || echo 0) +GC_LTTNG_CPPFLAGS := $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)), $(shell pkg-config --cflags lttng-ust),) +GC_LTTNG_LIBS := $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)), $(shell pkg-config --libs lttng-ust),) +GC_TRACEPOINT_CPPFLAGS = $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)),$(GC_LTTNG_CPPFLAGS) -DGC_TRACEPOINT_LTTNG=1,) +GC_TRACEPOINT_LIBS = $(GC_LTTNG_LIBS) + +GC_V = $(v_$(V)) +GC_CC = gcc +GC_CFLAGS = -Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(GC_BUILD_CFLAGS) +GC_CPPFLAGS = -I$(WHIPPET)api $(GC_TRACEPOINT_CPPFLAGS) +GC_LDFLAGS = -lpthread -flto=auto $(GC_TRACEPOINT_LIBS) +GC_DEPFLAGS = +GC_COMPILE = $(GC_V)$(GC_CC) $(GC_CFLAGS) $(GC_CPPFLAGS) $(GC_DEPFLAGS) -o $@ +GC_LINK = $(GC_V)$(GC_CC) $(GC_LDFLAGS) -o $@ +GC_PLATFORM = gnu-linux +GC_OBJDIR = + +$(GC_OBJDIR)gc-platform.o: $(WHIPPET)src/gc-platform-$(GC_PLATFORM).c + $(GC_COMPILE) -c $< +$(GC_OBJDIR)gc-stack.o: $(WHIPPET)src/gc-stack.c + $(GC_COMPILE) -c $< +$(GC_OBJDIR)gc-options.o: $(WHIPPET)src/gc-options.c + $(GC_COMPILE) -c $< +$(GC_OBJDIR)gc-tracepoint.o: $(WHIPPET)src/gc-tracepoint.c + $(GC_COMPILE) -c $< +$(GC_OBJDIR)gc-ephemeron.o: $(WHIPPET)src/gc-ephemeron.c + $(GC_COMPILE) $(EMBEDDER_TO_GC_CFLAGS) -c $< +$(GC_OBJDIR)gc-finalizer.o: $(WHIPPET)src/gc-finalizer.c + $(GC_COMPILE) $(EMBEDDER_TO_GC_CFLAGS) -c $< + +GC_STEM_bdw = bdw +GC_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 +GC_IMPL_CFLAGS_bdw = `pkg-config --cflags bdw-gc` +GC_LIBS_bdw = `pkg-config --libs bdw-gc` + +GC_STEM_semi = semi +GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1 +GC_LIBS_semi = -lm + +GC_STEM_pcc = pcc +GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1 +GC_LIBS_pcc = -lm + +GC_STEM_generational_pcc = $(GC_STEM_pcc) +GC_CFLAGS_generational_pcc = $(GC_CFLAGS_pcc) -DGC_GENERATIONAL=1 +GC_LIBS_generational_pcc = $(GC_LIBS_pcc) + +define mmc_variant +GC_STEM_$(1) = mmc +GC_CFLAGS_$(1) = $(2) +GC_LIBS_$(1) = -lm +endef + +define generational_mmc_variants +$(call mmc_variant,$(1)mmc,$(2)) +$(call mmc_variant,$(1)generational_mmc,$(2) -DGC_GENERATIONAL=1) +endef + +define parallel_mmc_variants +$(call generational_mmc_variants,$(1),$(2)) +$(call generational_mmc_variants,$(1)parallel_,$(2) -DGC_PARALLEL=1) +endef + +define trace_mmc_variants +$(call parallel_mmc_variants,,-DGC_PRECISE_ROOTS=1) +$(call parallel_mmc_variants,stack_conservative_,-DGC_CONSERVATIVE_ROOTS=1) +$(call parallel_mmc_variants,heap_conservative_,-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1) +endef + +$(eval $(call trace_mmc_variants)) + +gc_var = $($(1)$(subst -,_,$(2))) +gc_impl = $(call gc_var,GC_STEM_,$(1)).c +gc_attrs = $(call gc_var,GC_STEM_,$(1))-attrs.h +gc_cflags = $(call gc_var,GC_CFLAGS_,$(1)) +gc_impl_cflags = $(call gc_var,GC_IMPL_CFLAGS_,$(1)) +gc_libs = $(call gc_var,GC_LIBS_,$(1)) + +GC_IMPL = $(call gc_impl,$(GC_COLLECTOR)) +GC_CFLAGS += $(call gc_cflags,$(GC_COLLECTOR)) +GC_IMPL_CFLAGS = $(call gc_impl_cflags,$(GC_COLLECTOR)) +GC_ATTRS = $(WHIPPET)api/$(call gc_attrs,$(GC_COLLECTOR)) +GC_TO_EMBEDDER_CFLAGS = -include $(GC_ATTRS) +GC_LIBS = $(call gc_libs,$(GC_COLLECTOR)) + +$(GC_OBJDIR)gc-impl.o: $(WHIPPET)src/$(call gc_impl,$(GC_COLLECTOR)) + $(GC_COMPILE) $(GC_IMPL_CFLAGS) $(EMBEDDER_TO_GC_CFLAGS) -c $< + +GC_OBJS=$(foreach O,gc-platform.o gc-stack.o gc-options.o gc-tracepoint.o gc-ephemeron.o gc-finalizer.o gc-impl.o,$(GC_OBJDIR)$(O)) diff --git a/libguile/whippet/manifest.scm b/libguile/whippet/manifest.scm new file mode 100644 index 000000000..ea35cf3d2 --- /dev/null +++ b/libguile/whippet/manifest.scm @@ -0,0 +1,11 @@ +(use-modules (guix packages)) + +(specifications->manifest + '("bash" + "coreutils" + "gcc-toolchain" + "lttng-ust" + "glibc" + "libgc" + "make" + "pkg-config")) diff --git a/libguile/whippet/src/adaptive-heap-sizer.h b/libguile/whippet/src/adaptive-heap-sizer.h new file mode 100644 index 000000000..225b44baf --- /dev/null +++ b/libguile/whippet/src/adaptive-heap-sizer.h @@ -0,0 +1,171 @@ +#ifndef ADAPTIVE_HEAP_SIZER_H +#define ADAPTIVE_HEAP_SIZER_H + +#include +#include +#include +#include + +#include "assert.h" +#include "background-thread.h" +#include "debug.h" +#include "gc-config.h" +#include "gc-platform.h" +#include "heap-sizer.h" + +// This is the MemBalancer algorithm from "Optimal Heap Limits for Reducing +// Browser Memory Use" by Marisa Kirisame, Pranav Shenoy, and Pavel Panchekha +// (https://arxiv.org/abs/2204.10455). +// +// This implementation differs slightly in that the constant "c" of the paper +// has been extracted outside the radical, and notionally reversed: it is a +// unitless "expansiveness" parameter whose domain is [0,+∞]. Also there are +// minimum and maximum heap size multipliers, and a minimum amount of free +// space. The initial collection rate is an informed guess. The initial +// allocation rate estimate is high, considering that allocation rates are often +// high on program startup. + +struct gc_adaptive_heap_sizer { + uint64_t (*get_allocation_counter)(struct gc_heap *heap); + void (*set_heap_size)(struct gc_heap *heap, size_t size); + struct gc_heap *heap; + uint64_t smoothed_pause_time; + uint64_t smoothed_live_bytes; + uint64_t live_bytes; + double smoothed_allocation_rate; + double collection_smoothing_factor; + double allocation_smoothing_factor; + double minimum_multiplier; + double maximum_multiplier; + double minimum_free_space; + double expansiveness; +#if GC_PARALLEL + pthread_mutex_t lock; +#endif + int background_task_id; + uint64_t last_bytes_allocated; + uint64_t last_heartbeat; +}; + +static void +gc_adaptive_heap_sizer_lock(struct gc_adaptive_heap_sizer *sizer) { +#if GC_PARALLEL + pthread_mutex_lock(&sizer->lock); +#endif +} + +static void +gc_adaptive_heap_sizer_unlock(struct gc_adaptive_heap_sizer *sizer) { +#if GC_PARALLEL + pthread_mutex_unlock(&sizer->lock); +#endif +} + +// With lock +static uint64_t +gc_adaptive_heap_sizer_calculate_size(struct gc_adaptive_heap_sizer *sizer) { + double allocation_rate = sizer->smoothed_allocation_rate; + double collection_rate = + (double)sizer->smoothed_pause_time / (double)sizer->smoothed_live_bytes; + double radicand = sizer->live_bytes * allocation_rate / collection_rate; + double multiplier = 1.0 + sizer->expansiveness * sqrt(radicand); + if (isnan(multiplier) || multiplier < sizer->minimum_multiplier) + multiplier = sizer->minimum_multiplier; + else if (multiplier > sizer->maximum_multiplier) + multiplier = sizer->maximum_multiplier; + uint64_t size = sizer->live_bytes * multiplier; + if (size - sizer->live_bytes < sizer->minimum_free_space) + size = sizer->live_bytes + sizer->minimum_free_space; + return size; +} + +static uint64_t +gc_adaptive_heap_sizer_set_expansiveness(struct gc_adaptive_heap_sizer *sizer, + double expansiveness) { + gc_adaptive_heap_sizer_lock(sizer); + sizer->expansiveness = expansiveness; + uint64_t heap_size = gc_adaptive_heap_sizer_calculate_size(sizer); + gc_adaptive_heap_sizer_unlock(sizer); + return heap_size; +} + +static void +gc_adaptive_heap_sizer_on_gc(struct gc_adaptive_heap_sizer *sizer, + size_t live_bytes, uint64_t pause_ns, + void (*set_heap_size)(struct gc_heap*, size_t)) { + gc_adaptive_heap_sizer_lock(sizer); + sizer->live_bytes = live_bytes; + sizer->smoothed_live_bytes *= 1.0 - sizer->collection_smoothing_factor; + sizer->smoothed_live_bytes += sizer->collection_smoothing_factor * live_bytes; + sizer->smoothed_pause_time *= 1.0 - sizer->collection_smoothing_factor; + sizer->smoothed_pause_time += sizer->collection_smoothing_factor * pause_ns; + set_heap_size(sizer->heap, gc_adaptive_heap_sizer_calculate_size(sizer)); + gc_adaptive_heap_sizer_unlock(sizer); +} + +static void +gc_adaptive_heap_sizer_background_task(void *data) { + struct gc_adaptive_heap_sizer *sizer = data; + gc_adaptive_heap_sizer_lock(sizer); + uint64_t bytes_allocated = + sizer->get_allocation_counter(sizer->heap); + // bytes_allocated being 0 means the request failed; retry later. + if (bytes_allocated) { + uint64_t heartbeat = gc_platform_monotonic_nanoseconds(); + double rate = (double) (bytes_allocated - sizer->last_bytes_allocated) / + (double) (heartbeat - sizer->last_heartbeat); + // Just smooth the rate, under the assumption that the denominator is almost + // always 1. + sizer->smoothed_allocation_rate *= 1.0 - sizer->allocation_smoothing_factor; + sizer->smoothed_allocation_rate += rate * sizer->allocation_smoothing_factor; + sizer->last_heartbeat = heartbeat; + sizer->last_bytes_allocated = bytes_allocated; + sizer->set_heap_size(sizer->heap, + gc_adaptive_heap_sizer_calculate_size(sizer)); + } + gc_adaptive_heap_sizer_unlock(sizer); +} + +static struct gc_adaptive_heap_sizer* +gc_make_adaptive_heap_sizer(struct gc_heap *heap, double expansiveness, + uint64_t (*get_allocation_counter)(struct gc_heap*), + void (*set_heap_size)(struct gc_heap*, size_t), + struct gc_background_thread *thread) { + struct gc_adaptive_heap_sizer *sizer; + sizer = malloc(sizeof(*sizer)); + if (!sizer) + GC_CRASH(); + memset(sizer, 0, sizeof(*sizer)); + sizer->get_allocation_counter = get_allocation_counter; + sizer->set_heap_size = set_heap_size; + sizer->heap = heap; + // Baseline estimate of GC speed: 10 MB/ms, or 10 bytes/ns. However since we + // observe this speed by separately noisy measurements, we have to provide + // defaults for numerator and denominator; estimate 2ms for initial GC pauses + // for 20 MB of live data during program startup. + sizer->smoothed_pause_time = 2 * 1000 * 1000; + sizer->smoothed_live_bytes = 20 * 1024 * 1024; + // Baseline estimate of allocation rate during startup: 50 MB in 10ms, or 5 + // bytes/ns. + sizer->smoothed_allocation_rate = 5; + sizer->collection_smoothing_factor = 0.5; + sizer->allocation_smoothing_factor = 0.95; + sizer->minimum_multiplier = 1.1; + sizer->maximum_multiplier = 5; + sizer->minimum_free_space = 4 * 1024 * 1024; + sizer->expansiveness = expansiveness; + sizer->last_bytes_allocated = get_allocation_counter(heap); + sizer->last_heartbeat = gc_platform_monotonic_nanoseconds(); +#if GC_PARALLEL + pthread_mutex_init(&thread->lock, NULL); + sizer->background_task_id = + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_MIDDLE, + gc_adaptive_heap_sizer_background_task, + sizer); +#else + sizer->background_task_id = -1; +#endif + return sizer; +} + +#endif // ADAPTIVE_HEAP_SIZER_H diff --git a/libguile/whippet/src/address-hash.h b/libguile/whippet/src/address-hash.h new file mode 100644 index 000000000..49c33be97 --- /dev/null +++ b/libguile/whippet/src/address-hash.h @@ -0,0 +1,45 @@ +#ifndef ADDRESS_HASH_H +#define ADDRESS_HASH_H + +#include + +static uintptr_t hash_address(uintptr_t x) { + if (sizeof (x) < 8) { + // Chris Wellon's lowbias32, from https://nullprogram.com/blog/2018/07/31/. + x ^= x >> 16; + x *= 0x7feb352dU; + x ^= x >> 15; + x *= 0x846ca68bU; + x ^= x >> 16; + return x; + } else { + // Sebastiano Vigna's splitmix64 integer mixer, from + // https://prng.di.unimi.it/splitmix64.c. + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9U; + x ^= x >> 27; + x *= 0x94d049bb133111ebU; + x ^= x >> 31; + return x; + } +} +// Inverse of hash_address from https://nullprogram.com/blog/2018/07/31/. +static uintptr_t unhash_address(uintptr_t x) { + if (sizeof (x) < 8) { + x ^= x >> 16; + x *= 0x43021123U; + x ^= x >> 15 ^ x >> 30; + x *= 0x1d69e2a5U; + x ^= x >> 16; + return x; + } else { + x ^= x >> 31 ^ x >> 62; + x *= 0x319642b2d24d8ec3U; + x ^= x >> 27 ^ x >> 54; + x *= 0x96de1b173f119089U; + x ^= x >> 30 ^ x >> 60; + return x; + } +} + +#endif // ADDRESS_HASH_H diff --git a/libguile/whippet/src/address-map.h b/libguile/whippet/src/address-map.h new file mode 100644 index 000000000..57c2a0a04 --- /dev/null +++ b/libguile/whippet/src/address-map.h @@ -0,0 +1,213 @@ +#ifndef ADDRESS_MAP_H +#define ADDRESS_MAP_H + +#include +#include +#include + +#include "address-hash.h" +#include "gc-assert.h" + +struct hash_map_entry { + uintptr_t k; + uintptr_t v; +}; + +struct hash_map { + struct hash_map_entry *data; + size_t size; // total number of slots + size_t n_items; // number of items in set + uint8_t *bits; // bitvector indicating set slots +}; + +static void hash_map_clear(struct hash_map *map) { + memset(map->bits, 0, map->size / 8); + map->n_items = 0; +} + +// Size must be a power of 2. +static void hash_map_init(struct hash_map *map, size_t size) { + map->size = size; + map->data = malloc(sizeof(struct hash_map_entry) * size); + if (!map->data) GC_CRASH(); + map->bits = malloc(size / 8); + if (!map->bits) GC_CRASH(); + hash_map_clear(map); +} +static void hash_map_destroy(struct hash_map *map) { + free(map->data); + free(map->bits); +} + +static size_t hash_map_slot_index(struct hash_map *map, size_t idx) { + return idx & (map->size - 1); +} +static struct hash_map_entry* hash_map_slot_entry(struct hash_map *map, + size_t idx) { + return &map->data[hash_map_slot_index(map, idx)]; +} +static int hash_map_slot_is_empty(struct hash_map *map, size_t idx) { + idx = hash_map_slot_index(map, idx); + return (map->bits[idx / 8] & (1 << (idx % 8))) == 0; +} +static void hash_map_slot_acquire(struct hash_map *map, size_t idx) { + idx = hash_map_slot_index(map, idx); + map->bits[idx / 8] |= (1 << (idx % 8)); + map->n_items++; +} +static void hash_map_slot_release(struct hash_map *map, size_t idx) { + idx = hash_map_slot_index(map, idx); + map->bits[idx / 8] &= ~(1 << (idx % 8)); + map->n_items--; +} +static size_t hash_map_slot_distance(struct hash_map *map, size_t idx) { + return hash_map_slot_index(map, idx - hash_map_slot_entry(map, idx)->k); +} +static int hash_map_should_shrink(struct hash_map *map) { + return map->size > 8 && map->n_items <= (map->size >> 3); +} +static int hash_map_should_grow(struct hash_map *map) { + return map->n_items >= map->size - (map->size >> 3); +} + +static void hash_map_do_insert(struct hash_map *map, uintptr_t k, uintptr_t v) { + size_t displacement = 0; + while (!hash_map_slot_is_empty(map, k + displacement) + && displacement < hash_map_slot_distance(map, k + displacement)) + displacement++; + while (!hash_map_slot_is_empty(map, k + displacement) + && displacement == hash_map_slot_distance(map, k + displacement)) { + if (hash_map_slot_entry(map, k + displacement)->k == k) { + hash_map_slot_entry(map, k + displacement)->v = v; + return; + } + displacement++; + } + size_t idx = k + displacement; + size_t slots_to_move = 0; + while (!hash_map_slot_is_empty(map, idx + slots_to_move)) + slots_to_move++; + hash_map_slot_acquire(map, idx + slots_to_move); + while (slots_to_move--) + *hash_map_slot_entry(map, idx + slots_to_move + 1) = + *hash_map_slot_entry(map, idx + slots_to_move); + *hash_map_slot_entry(map, idx) = (struct hash_map_entry){ k, v }; +} + +static void hash_map_populate(struct hash_map *dst, struct hash_map *src) { + for (size_t i = 0; i < src->size; i++) + if (!hash_map_slot_is_empty(src, i)) + hash_map_do_insert(dst, hash_map_slot_entry(src, i)->k, + hash_map_slot_entry(src, i)->v); +} +static void hash_map_grow(struct hash_map *map) { + struct hash_map fresh; + hash_map_init(&fresh, map->size << 1); + hash_map_populate(&fresh, map); + hash_map_destroy(map); + memcpy(map, &fresh, sizeof(fresh)); +} +static void hash_map_shrink(struct hash_map *map) { + struct hash_map fresh; + hash_map_init(&fresh, map->size >> 1); + hash_map_populate(&fresh, map); + hash_map_destroy(map); + memcpy(map, &fresh, sizeof(fresh)); +} + +static void hash_map_insert(struct hash_map *map, uintptr_t k, uintptr_t v) { + if (hash_map_should_grow(map)) + hash_map_grow(map); + hash_map_do_insert(map, k, v); +} +static void hash_map_remove(struct hash_map *map, uintptr_t k) { + size_t slot = k; + while (!hash_map_slot_is_empty(map, slot) && hash_map_slot_entry(map, slot)->k != k) + slot++; + if (hash_map_slot_is_empty(map, slot)) + __builtin_trap(); + while (!hash_map_slot_is_empty(map, slot + 1) + && hash_map_slot_distance(map, slot + 1)) { + *hash_map_slot_entry(map, slot) = *hash_map_slot_entry(map, slot + 1); + slot++; + } + hash_map_slot_release(map, slot); + if (hash_map_should_shrink(map)) + hash_map_shrink(map); +} +static int hash_map_contains(struct hash_map *map, uintptr_t k) { + for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) { + if (hash_map_slot_entry(map, slot)->k == k) + return 1; + if (hash_map_slot_distance(map, slot) < (slot - k)) + return 0; + } + return 0; +} +static uintptr_t hash_map_lookup(struct hash_map *map, uintptr_t k, uintptr_t default_) { + for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) { + if (hash_map_slot_entry(map, slot)->k == k) + return hash_map_slot_entry(map, slot)->v; + if (hash_map_slot_distance(map, slot) < (slot - k)) + break; + } + return default_; +} +static inline void hash_map_for_each (struct hash_map *map, + void (*f)(uintptr_t, uintptr_t, void*), + void *data) __attribute__((always_inline)); +static inline void hash_map_for_each(struct hash_map *map, + void (*f)(uintptr_t, uintptr_t, void*), + void *data) { + for (size_t i = 0; i < map->size; i++) + if (!hash_map_slot_is_empty(map, i)) + f(hash_map_slot_entry(map, i)->k, hash_map_slot_entry(map, i)->v, data); +} + +struct address_map { + struct hash_map hash_map; +}; + +static void address_map_init(struct address_map *map) { + hash_map_init(&map->hash_map, 8); +} +static void address_map_destroy(struct address_map *map) { + hash_map_destroy(&map->hash_map); +} +static void address_map_clear(struct address_map *map) { + hash_map_clear(&map->hash_map); +} + +static void address_map_add(struct address_map *map, uintptr_t addr, uintptr_t v) { + hash_map_insert(&map->hash_map, hash_address(addr), v); +} +static void address_map_remove(struct address_map *map, uintptr_t addr) { + hash_map_remove(&map->hash_map, hash_address(addr)); +} +static int address_map_contains(struct address_map *map, uintptr_t addr) { + return hash_map_contains(&map->hash_map, hash_address(addr)); +} +static uintptr_t address_map_lookup(struct address_map *map, uintptr_t addr, + uintptr_t default_) { + return hash_map_lookup(&map->hash_map, hash_address(addr), default_); +} + +struct address_map_for_each_data { + void (*f)(uintptr_t, uintptr_t, void *); + void *data; +}; +static void address_map_do_for_each(uintptr_t k, uintptr_t v, void *data) { + struct address_map_for_each_data *for_each_data = data; + for_each_data->f(unhash_address(k), v, for_each_data->data); +} +static inline void address_map_for_each (struct address_map *map, + void (*f)(uintptr_t, uintptr_t, void*), + void *data) __attribute__((always_inline)); +static inline void address_map_for_each (struct address_map *map, + void (*f)(uintptr_t, uintptr_t, void*), + void *data) { + struct address_map_for_each_data for_each_data = { f, data }; + hash_map_for_each(&map->hash_map, address_map_do_for_each, &for_each_data); +} + +#endif // ADDRESS_MAP_H diff --git a/libguile/whippet/src/address-set.h b/libguile/whippet/src/address-set.h new file mode 100644 index 000000000..b1c27fa41 --- /dev/null +++ b/libguile/whippet/src/address-set.h @@ -0,0 +1,214 @@ +#ifndef ADDRESS_SET_H +#define ADDRESS_SET_H + +#include +#include +#include + +#include "address-hash.h" +#include "gc-assert.h" + +struct hash_set { + uintptr_t *data; + size_t size; // total number of slots + size_t n_items; // number of items in set + uint8_t *bits; // bitvector indicating set slots +}; + +static void hash_set_clear(struct hash_set *set) { + memset(set->bits, 0, set->size / 8); + set->n_items = 0; +} + +// Size must be a power of 2. +static void hash_set_init(struct hash_set *set, size_t size) { + set->size = size; + set->data = malloc(sizeof(uintptr_t) * size); + if (!set->data) GC_CRASH(); + set->bits = malloc(size / 8); + if (!set->bits) GC_CRASH(); + hash_set_clear(set); +} +static void hash_set_destroy(struct hash_set *set) { + free(set->data); + free(set->bits); +} + +static size_t hash_set_slot_index(struct hash_set *set, size_t idx) { + return idx & (set->size - 1); +} +static int hash_set_slot_is_empty(struct hash_set *set, size_t idx) { + idx = hash_set_slot_index(set, idx); + return (set->bits[idx / 8] & (1 << (idx % 8))) == 0; +} +static uintptr_t hash_set_slot_ref(struct hash_set *set, size_t idx) { + return set->data[hash_set_slot_index(set, idx)]; +} +static void hash_set_slot_set(struct hash_set *set, size_t idx, uintptr_t v) { + set->data[hash_set_slot_index(set, idx)] = v; +} +static void hash_set_slot_acquire(struct hash_set *set, size_t idx) { + idx = hash_set_slot_index(set, idx); + set->bits[idx / 8] |= (1 << (idx % 8)); + set->n_items++; +} +static void hash_set_slot_release(struct hash_set *set, size_t idx) { + idx = hash_set_slot_index(set, idx); + set->bits[idx / 8] &= ~(1 << (idx % 8)); + set->n_items--; +} +static size_t hash_set_slot_distance(struct hash_set *set, size_t idx) { + return hash_set_slot_index(set, idx - hash_set_slot_ref(set, idx)); +} +static int hash_set_should_shrink(struct hash_set *set) { + return set->size > 8 && set->n_items <= (set->size >> 3); +} +static int hash_set_should_grow(struct hash_set *set) { + return set->n_items >= set->size - (set->size >> 3); +} + +static void hash_set_do_insert(struct hash_set *set, uintptr_t v) { + size_t displacement = 0; + while (!hash_set_slot_is_empty(set, v + displacement) + && displacement < hash_set_slot_distance(set, v + displacement)) + displacement++; + while (!hash_set_slot_is_empty(set, v + displacement) + && displacement == hash_set_slot_distance(set, v + displacement)) { + if (hash_set_slot_ref(set, v + displacement) == v) + return; + displacement++; + } + size_t idx = v + displacement; + size_t slots_to_move = 0; + while (!hash_set_slot_is_empty(set, idx + slots_to_move)) + slots_to_move++; + hash_set_slot_acquire(set, idx + slots_to_move); + while (slots_to_move--) + hash_set_slot_set(set, idx + slots_to_move + 1, + hash_set_slot_ref(set, idx + slots_to_move)); + hash_set_slot_set(set, idx, v); +} + +static void hash_set_populate(struct hash_set *dst, struct hash_set *src) { + for (size_t i = 0; i < src->size; i++) + if (!hash_set_slot_is_empty(src, i)) + hash_set_do_insert(dst, hash_set_slot_ref(src, i)); +} +static void hash_set_grow(struct hash_set *set) { + struct hash_set fresh; + hash_set_init(&fresh, set->size << 1); + hash_set_populate(&fresh, set); + hash_set_destroy(set); + memcpy(set, &fresh, sizeof(fresh)); +} +static void hash_set_shrink(struct hash_set *set) { + struct hash_set fresh; + hash_set_init(&fresh, set->size >> 1); + hash_set_populate(&fresh, set); + hash_set_destroy(set); + memcpy(set, &fresh, sizeof(fresh)); +} + +static void hash_set_insert(struct hash_set *set, uintptr_t v) { + if (hash_set_should_grow(set)) + hash_set_grow(set); + hash_set_do_insert(set, v); +} + +static void hash_set_remove(struct hash_set *set, uintptr_t v) { + size_t slot = v; + while (!hash_set_slot_is_empty(set, slot) && hash_set_slot_ref(set, slot) != v) + slot++; + if (hash_set_slot_is_empty(set, slot)) + __builtin_trap(); + while (!hash_set_slot_is_empty(set, slot + 1) + && hash_set_slot_distance(set, slot + 1)) { + hash_set_slot_set(set, slot, hash_set_slot_ref(set, slot + 1)); + slot++; + } + hash_set_slot_release(set, slot); + if (hash_set_should_shrink(set)) + hash_set_shrink(set); +} +static int hash_set_contains(struct hash_set *set, uintptr_t v) { + for (size_t slot = v; !hash_set_slot_is_empty(set, slot); slot++) { + if (hash_set_slot_ref(set, slot) == v) + return 1; + if (hash_set_slot_distance(set, slot) < (slot - v)) + return 0; + } + return 0; +} +static inline void hash_set_find(struct hash_set *set, + int (*f)(uintptr_t, void*), void *data) __attribute__((always_inline)); +static inline void hash_set_find(struct hash_set *set, + int (*f)(uintptr_t, void*), void *data) { + for (size_t i = 0; i < set->size; i++) + if (!hash_set_slot_is_empty(set, i)) + if (f(hash_set_slot_ref(set, i), data)) + return; +} + +struct address_set { + struct hash_set hash_set; +}; + +static void address_set_init(struct address_set *set) { + hash_set_init(&set->hash_set, 8); +} +static void address_set_destroy(struct address_set *set) { + hash_set_destroy(&set->hash_set); +} +static void address_set_clear(struct address_set *set) { + hash_set_clear(&set->hash_set); +} + +static void address_set_add(struct address_set *set, uintptr_t addr) { + hash_set_insert(&set->hash_set, hash_address(addr)); +} +static void address_set_remove(struct address_set *set, uintptr_t addr) { + hash_set_remove(&set->hash_set, hash_address(addr)); +} +static int address_set_contains(struct address_set *set, uintptr_t addr) { + return hash_set_contains(&set->hash_set, hash_address(addr)); +} +static void address_set_union(struct address_set *set, struct address_set *other) { + while (set->hash_set.size < other->hash_set.size) + hash_set_grow(&set->hash_set); + hash_set_populate(&set->hash_set, &other->hash_set); +} + +struct address_set_for_each_data { + void (*f)(uintptr_t, void *); + void *data; +}; +static int address_set_do_for_each(uintptr_t v, void *data) { + struct address_set_for_each_data *for_each_data = data; + for_each_data->f(unhash_address(v), for_each_data->data); + return 0; +} +static inline void address_set_for_each(struct address_set *set, + void (*f)(uintptr_t, void*), void *data) __attribute__((always_inline)); +static inline void address_set_for_each(struct address_set *set, + void (*f)(uintptr_t, void*), void *data) { + struct address_set_for_each_data for_each_data = { f, data }; + hash_set_find(&set->hash_set, address_set_do_for_each, &for_each_data); +} + +struct address_set_find_data { + int (*f)(uintptr_t, void *); + void *data; +}; +static int address_set_do_find(uintptr_t v, void *data) { + struct address_set_find_data *find_data = data; + return find_data->f(unhash_address(v), find_data->data); +} +static inline void address_set_find(struct address_set *set, + int (*f)(uintptr_t, void*), void *data) __attribute__((always_inline)); +static inline void address_set_find(struct address_set *set, + int (*f)(uintptr_t, void*), void *data) { + struct address_set_find_data find_data = { f, data }; + hash_set_find(&set->hash_set, address_set_do_find, &find_data); +} + +#endif // ADDRESS_SET_H diff --git a/libguile/whippet/src/assert.h b/libguile/whippet/src/assert.h new file mode 100644 index 000000000..0c6db2f89 --- /dev/null +++ b/libguile/whippet/src/assert.h @@ -0,0 +1,16 @@ +#ifndef ASSERT_H +#define ASSERT_H + +#define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq") + +#define UNLIKELY(e) __builtin_expect(e, 0) +#define LIKELY(e) __builtin_expect(e, 1) + +#ifndef NDEBUG +#define ASSERT(x) do { if (UNLIKELY(!(x))) __builtin_trap(); } while (0) +#else +#define ASSERT(x) do { } while (0) +#endif +#define ASSERT_EQ(a,b) ASSERT((a) == (b)) + +#endif // ASSERT_H diff --git a/libguile/whippet/src/background-thread.h b/libguile/whippet/src/background-thread.h new file mode 100644 index 000000000..7a141cee0 --- /dev/null +++ b/libguile/whippet/src/background-thread.h @@ -0,0 +1,155 @@ +#ifndef BACKGROUND_THREAD_H +#define BACKGROUND_THREAD_H + +#include +#include +#include + +#include "assert.h" +#include "debug.h" + +enum { + GC_BACKGROUND_TASK_START = 0, + GC_BACKGROUND_TASK_MIDDLE = 100, + GC_BACKGROUND_TASK_END = 200 +}; + +struct gc_background_task { + int id; + int priority; + void (*run)(void *data); + void *data; +}; + +enum gc_background_thread_state { + GC_BACKGROUND_THREAD_STARTING, + GC_BACKGROUND_THREAD_RUNNING, + GC_BACKGROUND_THREAD_STOPPING +}; + +struct gc_background_thread { + size_t count; + size_t capacity; + struct gc_background_task *tasks; + int next_id; + enum gc_background_thread_state state; + pthread_t thread; + pthread_mutex_t lock; + pthread_cond_t cond; +}; + +static void* +gc_background_thread(void *data) { + struct gc_background_thread *thread = data; + pthread_mutex_lock(&thread->lock); + while (thread->state == GC_BACKGROUND_THREAD_STARTING) + pthread_cond_wait(&thread->cond, &thread->lock); + struct timespec ts; + if (clock_gettime(CLOCK_REALTIME, &ts)) { + perror("background thread: failed to get time!"); + return NULL; + } + while (thread->state == GC_BACKGROUND_THREAD_RUNNING) { + ts.tv_sec += 1; + pthread_cond_timedwait(&thread->cond, &thread->lock, &ts); + if (thread->state == GC_BACKGROUND_THREAD_RUNNING) + for (size_t i = 0; i < thread->count; i++) + thread->tasks[i].run(thread->tasks[i].data); + } + pthread_mutex_unlock(&thread->lock); + return NULL; +} + +static struct gc_background_thread* +gc_make_background_thread(void) { + struct gc_background_thread *thread; + thread = malloc(sizeof(*thread)); + if (!thread) + GC_CRASH(); + memset(thread, 0, sizeof(*thread)); + thread->tasks = NULL; + thread->count = 0; + thread->capacity = 0; + thread->state = GC_BACKGROUND_THREAD_STARTING; + pthread_mutex_init(&thread->lock, NULL); + pthread_cond_init(&thread->cond, NULL); + if (pthread_create(&thread->thread, NULL, gc_background_thread, thread)) { + perror("spawning background thread failed"); + GC_CRASH(); + } + return thread; +} + +static void +gc_background_thread_start(struct gc_background_thread *thread) { + pthread_mutex_lock(&thread->lock); + GC_ASSERT_EQ(thread->state, GC_BACKGROUND_THREAD_STARTING); + thread->state = GC_BACKGROUND_THREAD_RUNNING; + pthread_mutex_unlock(&thread->lock); + pthread_cond_signal(&thread->cond); +} + +static int +gc_background_thread_add_task(struct gc_background_thread *thread, + int priority, void (*run)(void *data), + void *data) { + pthread_mutex_lock(&thread->lock); + if (thread->count == thread->capacity) { + size_t new_capacity = thread->capacity * 2 + 1; + struct gc_background_task *new_tasks = + realloc(thread->tasks, sizeof(struct gc_background_task) * new_capacity); + if (!new_tasks) { + perror("ran out of space for background tasks!"); + GC_CRASH(); + } + thread->capacity = new_capacity; + thread->tasks = new_tasks; + } + size_t insert = 0; + for (; insert < thread->count; insert++) { + if (priority < thread->tasks[insert].priority) + break; + } + size_t bytes_to_move = + (thread->count - insert) * sizeof(struct gc_background_task); + memmove(&thread->tasks[insert + 1], &thread->tasks[insert], bytes_to_move); + int id = thread->next_id++; + thread->tasks[insert].id = id; + thread->tasks[insert].priority = priority; + thread->tasks[insert].run = run; + thread->tasks[insert].data = data; + thread->count++; + pthread_mutex_unlock(&thread->lock); + return id; +} + +static void +gc_background_thread_remove_task(struct gc_background_thread *thread, + int id) { + pthread_mutex_lock(&thread->lock); + size_t remove = 0; + for (; remove < thread->count; remove++) { + if (thread->tasks[remove].id == id) + break; + } + if (remove == thread->count) + GC_CRASH(); + size_t bytes_to_move = + (thread->count - (remove + 1)) * sizeof(struct gc_background_task); + memmove(&thread->tasks[remove], &thread->tasks[remove + 1], bytes_to_move); + pthread_mutex_unlock(&thread->lock); +} + +static void +gc_destroy_background_thread(struct gc_background_thread *thread) { + pthread_mutex_lock(&thread->lock); + GC_ASSERT(thread->state == GC_BACKGROUND_THREAD_RUNNING); + thread->state = GC_BACKGROUND_THREAD_STOPPING; + pthread_mutex_unlock(&thread->lock); + pthread_cond_signal(&thread->cond); + pthread_join(thread->thread, NULL); + free(thread->tasks); + free(thread); +} + +#endif // BACKGROUND_THREAD_H diff --git a/libguile/whippet/src/bdw.c b/libguile/whippet/src/bdw.c new file mode 100644 index 000000000..ea446557d --- /dev/null +++ b/libguile/whippet/src/bdw.c @@ -0,0 +1,647 @@ +#include +#include +#include +#include + +#include "gc-api.h" +#include "gc-ephemeron.h" +#include "gc-tracepoint.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "bdw-attrs.h" + +#if GC_PRECISE_ROOTS +#error bdw-gc is a conservative collector +#endif + +#if !GC_CONSERVATIVE_ROOTS +#error bdw-gc is a conservative collector +#endif + +#if !GC_CONSERVATIVE_TRACE +#error bdw-gc is a conservative collector +#endif + +// When pthreads are used, let `libgc' know about it and redirect +// allocation calls such as `GC_MALLOC ()' to (contention-free, faster) +// thread-local allocation. + +#define GC_THREADS 1 +#define GC_REDIRECT_TO_LOCAL 1 + +// Don't #define pthread routines to their GC_pthread counterparts. +// Instead we will be careful inside the benchmarks to use API to +// register threads with libgc. +#define GC_NO_THREAD_REDIRECTS 1 + +#include +#include /* GC_generic_malloc_many */ +#include /* GC_generic_malloc */ + +#define GC_INLINE_GRANULE_WORDS 2 +#define GC_INLINE_GRANULE_BYTES (sizeof(void *) * GC_INLINE_GRANULE_WORDS) + +/* A freelist set contains GC_INLINE_FREELIST_COUNT pointers to singly + linked lists of objects of different sizes, the ith one containing + objects i + 1 granules in size. This setting of + GC_INLINE_FREELIST_COUNT will hold freelists for allocations of + up to 256 bytes. */ +#define GC_INLINE_FREELIST_COUNT (256U / GC_INLINE_GRANULE_BYTES) + +struct gc_heap { + struct gc_heap *freelist; // see mark_heap + pthread_mutex_t lock; + struct gc_heap_roots *roots; + struct gc_mutator *mutators; + struct gc_event_listener event_listener; + struct gc_finalizer_state *finalizer_state; + gc_finalizer_callback have_finalizers; + void *event_listener_data; +}; + +struct gc_mutator { + void *freelists[GC_INLINE_FREELIST_COUNT]; + void *pointerless_freelists[GC_INLINE_FREELIST_COUNT]; + struct gc_heap *heap; + struct gc_mutator_roots *roots; + struct gc_mutator *next; // with heap lock + struct gc_mutator **prev; // with heap lock + void *event_listener_data; +}; + +struct gc_heap *__the_bdw_gc_heap; +#define HEAP_EVENT(event, ...) do { \ + __the_bdw_gc_heap->event_listener.event(__the_bdw_gc_heap->event_listener_data, \ + ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) +#define MUTATOR_EVENT(mut, event, ...) do { \ + __the_bdw_gc_heap->event_listener.event(mut->event_listener_data, \ + ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) +static inline size_t gc_inline_bytes_to_freelist_index(size_t bytes) { + return (bytes - 1U) / GC_INLINE_GRANULE_BYTES; +} +static inline size_t gc_inline_freelist_object_size(size_t idx) { + return (idx + 1U) * GC_INLINE_GRANULE_BYTES; +} + +struct gc_heap* gc_mutator_heap(struct gc_mutator *mutator) { + return __the_bdw_gc_heap; +} +uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap) { + GC_CRASH(); +} +uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap) { + GC_CRASH(); +} + +// The values of these must match the internal POINTERLESS and NORMAL +// definitions in libgc, for which unfortunately there are no external +// definitions. Alack. +enum gc_inline_kind { + GC_INLINE_KIND_POINTERLESS, + GC_INLINE_KIND_NORMAL +}; + +static inline void * +allocate_small(void **freelist, size_t idx, enum gc_inline_kind kind) { + void *head = *freelist; + + if (!head) { + size_t bytes = gc_inline_freelist_object_size(idx); + GC_generic_malloc_many(bytes, kind, freelist); + head = *freelist; + if (GC_UNLIKELY (!head)) { + fprintf(stderr, "ran out of space, heap size %zu\n", + GC_get_heap_size()); + GC_CRASH(); + } + } + + *freelist = *(void **)(head); + + if (kind == GC_INLINE_KIND_POINTERLESS) + memset(head, 0, gc_inline_freelist_object_size(idx)); + + return head; +} + +void* gc_allocate_slow(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size != 0); + if (size <= gc_allocator_large_threshold()) { + size_t idx = gc_inline_bytes_to_freelist_index(size); + void **freelists; + enum gc_inline_kind freelist_kind; + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + return allocate_small(&mut->freelists[idx], idx, GC_INLINE_KIND_NORMAL); + case GC_ALLOCATION_TAGGED_POINTERLESS: + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return allocate_small(&mut->pointerless_freelists[idx], idx, + GC_INLINE_KIND_POINTERLESS); + default: + GC_CRASH(); + } + } else { + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + return GC_malloc(size); + case GC_ALLOCATION_TAGGED_POINTERLESS: + case GC_ALLOCATION_UNTAGGED_POINTERLESS: { + void *ret = GC_malloc_atomic(size); + memset(ret, 0, size); + return ret; + } + default: + GC_CRASH(); + } + } +} + +void gc_pin_object(struct gc_mutator *mut, struct gc_ref ref) { + // Nothing to do. +} + +void gc_collect(struct gc_mutator *mut, + enum gc_collection_kind requested_kind) { + switch (requested_kind) { + case GC_COLLECTION_MINOR: + GC_collect_a_little(); + break; + case GC_COLLECTION_ANY: + case GC_COLLECTION_MAJOR: + GC_gcollect(); + break; + case GC_COLLECTION_COMPACTING: + GC_gcollect_and_unmap(); + break; + default: + GC_CRASH(); + } +} + +int gc_object_is_old_generation_slow(struct gc_mutator *mut, + struct gc_ref obj) { + return 0; +} + +void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) { +} + +int* gc_safepoint_flag_loc(struct gc_mutator *mut) { GC_CRASH(); } +void gc_safepoint_slow(struct gc_mutator *mut) { GC_CRASH(); } + +struct bdw_mark_state { + struct GC_ms_entry *mark_stack_ptr; + struct GC_ms_entry *mark_stack_limit; +}; + +static void bdw_mark_edge(struct gc_edge edge, struct gc_heap *heap, + void *visit_data) { + struct bdw_mark_state *state = visit_data; + uintptr_t addr = gc_ref_value(gc_edge_ref(edge)); + state->mark_stack_ptr = GC_MARK_AND_PUSH ((void *) addr, + state->mark_stack_ptr, + state->mark_stack_limit, + NULL); +} + +static int heap_gc_kind; +static int mutator_gc_kind; +static int ephemeron_gc_kind; +static int finalizer_gc_kind; + +// In BDW-GC, we can't hook into the mark phase to call +// gc_trace_ephemerons_for_object, so the advertised ephemeron strategy +// doesn't really work. The primitives that we have are mark functions, +// which run during GC and can't allocate; finalizers, which run after +// GC and can allocate but can't add to the connectivity graph; and +// disappearing links, which are cleared at the end of marking, in the +// stop-the-world phase. It does not appear to be possible to implement +// ephemerons using these primitives. Instead fall back to weak-key +// tables. + +struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) { + return GC_generic_malloc(gc_ephemeron_size(), ephemeron_gc_kind); +} + +unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) { + return GC_get_gc_no(); +} + +void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value) { + gc_ephemeron_init_internal(mut->heap, ephemeron, key, value); + if (GC_base((void*)gc_ref_value(key))) { + struct gc_ref *loc = gc_edge_loc(gc_ephemeron_key_edge(ephemeron)); + GC_register_disappearing_link((void**)loc); + } +} + +int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { + // Pretend the key is traced, to avoid adding this ephemeron to the + // global table. + return 1; +} + +struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) { + return GC_generic_malloc(gc_finalizer_size(), finalizer_gc_kind); +} + +static void finalize_object(void *obj, void *data) { + struct gc_finalizer *f = data; + gc_finalizer_externally_fired(__the_bdw_gc_heap->finalizer_state, f); +} + +void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer, + unsigned priority, struct gc_ref object, + struct gc_ref closure) { + // Don't bother much about the actual finalizer; just delegate to BDW-GC. + GC_finalization_proc prev = NULL; + void *prev_data = NULL; + gc_finalizer_init_internal(finalizer, object, closure); + gc_finalizer_externally_activated(finalizer); + GC_register_finalizer_no_order(gc_ref_heap_object(object), finalize_object, + finalizer, &prev, &prev_data); + // FIXME: Allow multiple finalizers per object. + GC_ASSERT(prev == NULL); + GC_ASSERT(prev_data == NULL); +} + +struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) { + GC_invoke_finalizers(); + return gc_finalizer_state_pop(mut->heap->finalizer_state); +} + +void gc_set_finalizer_callback(struct gc_heap *heap, + gc_finalizer_callback callback) { + heap->have_finalizers = callback; +} + +static void have_finalizers(void) { + struct gc_heap *heap = __the_bdw_gc_heap; + if (heap->have_finalizers) + heap->have_finalizers(heap, 1); +} + +static struct GC_ms_entry * +mark_ephemeron(GC_word *addr, struct GC_ms_entry *mark_stack_ptr, + struct GC_ms_entry *mark_stack_limit, GC_word env) { + + struct bdw_mark_state state = { + mark_stack_ptr, + mark_stack_limit, + }; + + struct gc_ephemeron *ephemeron = (struct gc_ephemeron*) addr; + + // If this ephemeron is on a freelist, its first word will be a + // freelist link and everything else will be NULL. + if (!gc_ref_value(gc_edge_ref(gc_ephemeron_value_edge(ephemeron)))) { + bdw_mark_edge(gc_edge(addr), NULL, &state); + return state.mark_stack_ptr; + } + + if (!gc_ref_value(gc_edge_ref(gc_ephemeron_key_edge(ephemeron)))) { + // If the key died in a previous collection, the disappearing link + // will have been cleared. Mark the ephemeron as dead. + gc_ephemeron_mark_dead(ephemeron); + } + + gc_trace_ephemeron(ephemeron, bdw_mark_edge, NULL, &state); + + return state.mark_stack_ptr; +} + +static struct GC_ms_entry * +mark_finalizer(GC_word *addr, struct GC_ms_entry *mark_stack_ptr, + struct GC_ms_entry *mark_stack_limit, GC_word env) { + + struct bdw_mark_state state = { + mark_stack_ptr, + mark_stack_limit, + }; + + struct gc_finalizer *finalizer = (struct gc_finalizer*) addr; + + // If this ephemeron is on a freelist, its first word will be a + // freelist link and everything else will be NULL. + if (!gc_ref_value(gc_finalizer_object(finalizer))) { + bdw_mark_edge(gc_edge(addr), NULL, &state); + return state.mark_stack_ptr; + } + + gc_trace_finalizer(finalizer, bdw_mark_edge, NULL, &state); + + return state.mark_stack_ptr; +} + +static struct GC_ms_entry * +mark_heap(GC_word *addr, struct GC_ms_entry *mark_stack_ptr, + struct GC_ms_entry *mark_stack_limit, GC_word env) { + struct bdw_mark_state state = { + mark_stack_ptr, + mark_stack_limit, + }; + + struct gc_heap *heap = (struct gc_heap*) addr; + + // If this heap is on a freelist... well probably we are screwed, BDW + // isn't really made to do multiple heaps in a process. But still, in + // this case, the first word is the freelist and the rest are null. + if (heap->freelist) { + bdw_mark_edge(gc_edge(addr), NULL, &state); + return state.mark_stack_ptr; + } + + if (heap->roots) + gc_trace_heap_roots(heap->roots, bdw_mark_edge, heap, &state); + + gc_visit_finalizer_roots(heap->finalizer_state, bdw_mark_edge, heap, &state); + + state.mark_stack_ptr = GC_MARK_AND_PUSH (heap->mutators, + state.mark_stack_ptr, + state.mark_stack_limit, + NULL); + + return state.mark_stack_ptr; +} + +static struct GC_ms_entry * +mark_mutator(GC_word *addr, struct GC_ms_entry *mark_stack_ptr, + struct GC_ms_entry *mark_stack_limit, GC_word env) { + struct bdw_mark_state state = { + mark_stack_ptr, + mark_stack_limit, + }; + + struct gc_mutator *mut = (struct gc_mutator*) addr; + + // If this mutator is on a freelist, its first word will be a + // freelist link and everything else will be NULL. + if (!mut->heap) { + bdw_mark_edge(gc_edge(addr), NULL, &state); + return state.mark_stack_ptr; + } + + for (int i = 0; i < GC_INLINE_FREELIST_COUNT; i++) + state.mark_stack_ptr = GC_MARK_AND_PUSH (mut->freelists[i], + state.mark_stack_ptr, + state.mark_stack_limit, + NULL); + + for (int i = 0; i < GC_INLINE_FREELIST_COUNT; i++) + for (void *head = mut->pointerless_freelists[i]; head; head = *(void**)head) + state.mark_stack_ptr = GC_MARK_AND_PUSH (head, + state.mark_stack_ptr, + state.mark_stack_limit, + NULL); + + if (mut->roots) + gc_trace_mutator_roots(mut->roots, bdw_mark_edge, mut->heap, &state); + + state.mark_stack_ptr = GC_MARK_AND_PUSH (mut->next, + state.mark_stack_ptr, + state.mark_stack_limit, + NULL); + + return state.mark_stack_ptr; +} + +static inline struct gc_mutator *add_mutator(struct gc_heap *heap) { + struct gc_mutator *ret = + GC_generic_malloc(sizeof(struct gc_mutator), mutator_gc_kind); + ret->heap = heap; + ret->event_listener_data = + heap->event_listener.mutator_added(heap->event_listener_data); + + pthread_mutex_lock(&heap->lock); + ret->next = heap->mutators; + ret->prev = &heap->mutators; + if (ret->next) + ret->next->prev = &ret->next; + heap->mutators = ret; + pthread_mutex_unlock(&heap->lock); + + return ret; +} + +struct gc_options { + struct gc_common_options common; +}; +int gc_option_from_string(const char *str) { + return gc_common_option_from_string(str); +} +struct gc_options* gc_allocate_options(void) { + struct gc_options *ret = malloc(sizeof(struct gc_options)); + gc_init_common_options(&ret->common); + return ret; +} +int gc_options_set_int(struct gc_options *options, int option, int value) { + return gc_common_options_set_int(&options->common, option, value); +} +int gc_options_set_size(struct gc_options *options, int option, + size_t value) { + return gc_common_options_set_size(&options->common, option, value); +} +int gc_options_set_double(struct gc_options *options, int option, + double value) { + return gc_common_options_set_double(&options->common, option, value); +} +int gc_options_parse_and_set(struct gc_options *options, int option, + const char *value) { + return gc_common_options_parse_and_set(&options->common, option, value); +} + +struct gc_pending_ephemerons * +gc_heap_pending_ephemerons(struct gc_heap *heap) { + GC_CRASH(); + return NULL; +} + +static void on_collection_event(GC_EventType event) { + switch (event) { + case GC_EVENT_START: { + HEAP_EVENT(requesting_stop); + HEAP_EVENT(waiting_for_stop); + break; + } + case GC_EVENT_MARK_START: + HEAP_EVENT(mutators_stopped); + HEAP_EVENT(prepare_gc, GC_COLLECTION_MAJOR); + break; + case GC_EVENT_MARK_END: + HEAP_EVENT(roots_traced); + HEAP_EVENT(heap_traced); + break; + case GC_EVENT_RECLAIM_START: + break; + case GC_EVENT_RECLAIM_END: + // Sloppily attribute finalizers and eager reclamation to + // ephemerons. + HEAP_EVENT(ephemerons_traced); + HEAP_EVENT(live_data_size, GC_get_heap_size() - GC_get_free_bytes()); + break; + case GC_EVENT_END: + HEAP_EVENT(restarting_mutators); + break; + case GC_EVENT_PRE_START_WORLD: + case GC_EVENT_POST_STOP_WORLD: + // Can't rely on these, as they are only fired when threads are + // enabled. + break; + case GC_EVENT_THREAD_SUSPENDED: + case GC_EVENT_THREAD_UNSUSPENDED: + // No nice way to map back to the mutator. + break; + default: + break; + } +} + +static void on_heap_resize(GC_word size) { + HEAP_EVENT(heap_resized, size); +} + +uint64_t gc_allocation_counter(struct gc_heap *heap) { + return GC_get_total_bytes(); +} + +int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, + struct gc_heap **heap, struct gc_mutator **mutator, + struct gc_event_listener event_listener, + void *event_listener_data) { + // Root the heap, which will also cause all mutators to be marked. + GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_INLINE_GRANULE_BYTES); + GC_ASSERT_EQ(gc_allocator_large_threshold(), + GC_INLINE_FREELIST_COUNT * GC_INLINE_GRANULE_BYTES); + + GC_ASSERT_EQ(__the_bdw_gc_heap, NULL); + + if (!options) options = gc_allocate_options(); + + // Ignore stack base for main thread. + + switch (options->common.heap_size_policy) { + case GC_HEAP_SIZE_FIXED: + GC_set_max_heap_size(options->common.heap_size); + break; + case GC_HEAP_SIZE_GROWABLE: { + if (options->common.maximum_heap_size) + GC_set_max_heap_size(options->common.maximum_heap_size); + // BDW uses a pretty weird heap-sizing heuristic: + // + // heap-size = live-data * (1 + (2 / GC_free_space_divisor)) + // heap-size-multiplier = heap-size/live-data = 1 + 2/GC_free_space_divisor + // GC_free_space_divisor = 2/(heap-size-multiplier-1) + // + // (Assumption: your heap is mostly "composite", i.e. not + // "atomic". See bdw's alloc.c:min_bytes_allocd.) + double fsd = 2.0/(options->common.heap_size_multiplier - 1); + // But, the divisor is an integer. WTF. This caps the effective + // maximum heap multiplier at 3. Oh well. + GC_set_free_space_divisor(fsd + 0.51); + break; + } + case GC_HEAP_SIZE_ADAPTIVE: + default: + fprintf(stderr, "adaptive heap sizing unsupported by bdw-gc\n"); + return 0; + } + + GC_set_all_interior_pointers (0); + GC_set_finalize_on_demand (1); + GC_set_finalizer_notifier(have_finalizers); + + // Not part of 7.3, sigh. Have to set an env var. + // GC_set_markers_count(options->common.parallelism); + char markers[21] = {0,}; // 21 bytes enough for 2**64 in decimal + NUL. + snprintf(markers, sizeof(markers), "%d", options->common.parallelism); + setenv("GC_MARKERS", markers, 1); + GC_init(); + size_t current_heap_size = GC_get_heap_size(); + if (options->common.heap_size > current_heap_size) + GC_expand_hp(options->common.heap_size - current_heap_size); + GC_allow_register_threads(); + + { + int add_size_to_descriptor = 0; + int clear_memory = 1; + + heap_gc_kind = GC_new_kind(GC_new_free_list(), + GC_MAKE_PROC(GC_new_proc(mark_heap), 0), + add_size_to_descriptor, clear_memory); + mutator_gc_kind = GC_new_kind(GC_new_free_list(), + GC_MAKE_PROC(GC_new_proc(mark_mutator), 0), + add_size_to_descriptor, clear_memory); + ephemeron_gc_kind = GC_new_kind(GC_new_free_list(), + GC_MAKE_PROC(GC_new_proc(mark_ephemeron), 0), + add_size_to_descriptor, clear_memory); + finalizer_gc_kind = GC_new_kind(GC_new_free_list(), + GC_MAKE_PROC(GC_new_proc(mark_finalizer), 0), + add_size_to_descriptor, clear_memory); + } + + *heap = GC_generic_malloc(sizeof(struct gc_heap), heap_gc_kind); + pthread_mutex_init(&(*heap)->lock, NULL); + + (*heap)->event_listener = event_listener; + (*heap)->event_listener_data = event_listener_data; + (*heap)->finalizer_state = gc_make_finalizer_state(); + + __the_bdw_gc_heap = *heap; + HEAP_EVENT(init, GC_get_heap_size()); + GC_set_on_collection_event(on_collection_event); + GC_set_on_heap_resize(on_heap_resize); + + *mutator = add_mutator(*heap); + + // Sanity check. + if (!GC_is_visible (&__the_bdw_gc_heap)) + abort (); + + return 1; +} + +struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base, + struct gc_heap *heap) { + struct GC_stack_base base = { stack_base }; + GC_register_my_thread(&base); + return add_mutator(heap); +} +void gc_finish_for_thread(struct gc_mutator *mut) { + pthread_mutex_lock(&mut->heap->lock); + MUTATOR_EVENT(mut, mutator_removed); + *mut->prev = mut->next; + if (mut->next) + mut->next->prev = mut->prev; + pthread_mutex_unlock(&mut->heap->lock); + + GC_unregister_my_thread(); +} + +void* gc_call_without_gc(struct gc_mutator *mut, + void* (*f)(void*), + void *data) { + return GC_do_blocking(f, data); +} + +void gc_mutator_set_roots(struct gc_mutator *mut, + struct gc_mutator_roots *roots) { + mut->roots = roots; +} +void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) { + heap->roots = roots; +} +void gc_heap_set_extern_space(struct gc_heap *heap, + struct gc_extern_space *space) { +} diff --git a/libguile/whippet/src/copy-space.h b/libguile/whippet/src/copy-space.h new file mode 100644 index 000000000..7f262c221 --- /dev/null +++ b/libguile/whippet/src/copy-space.h @@ -0,0 +1,979 @@ +#ifndef COPY_SPACE_H +#define COPY_SPACE_H + +#include +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "assert.h" +#include "background-thread.h" +#include "debug.h" +#include "extents.h" +#include "gc-align.h" +#include "gc-attrs.h" +#include "gc-inline.h" +#include "gc-lock.h" +#include "gc-platform.h" +#include "spin.h" + +// A copy space: a block-structured space that traces via evacuation. + +#define COPY_SPACE_SLAB_SIZE (64 * 1024 * 1024) +#define COPY_SPACE_REGION_SIZE (64 * 1024) +#define COPY_SPACE_BLOCK_SIZE (2 * COPY_SPACE_REGION_SIZE) +#define COPY_SPACE_BLOCKS_PER_SLAB \ + (COPY_SPACE_SLAB_SIZE / COPY_SPACE_BLOCK_SIZE) +#define COPY_SPACE_HEADER_BYTES_PER_BLOCK \ + (COPY_SPACE_BLOCK_SIZE / COPY_SPACE_BLOCKS_PER_SLAB) +#define COPY_SPACE_HEADER_BLOCKS_PER_SLAB 1 +#define COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB \ + (COPY_SPACE_BLOCKS_PER_SLAB - COPY_SPACE_HEADER_BLOCKS_PER_SLAB) +#define COPY_SPACE_HEADER_BYTES_PER_SLAB \ + (COPY_SPACE_HEADER_BYTES_PER_BLOCK * COPY_SPACE_HEADER_BLOCKS_PER_SLAB) + +struct copy_space_slab; + +struct copy_space_slab_header { + union { + struct { + struct copy_space_slab *next; + struct copy_space_slab *prev; + unsigned incore_block_count; + }; + uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_SLAB]; + }; +}; +STATIC_ASSERT_EQ(sizeof(struct copy_space_slab_header), + COPY_SPACE_HEADER_BYTES_PER_SLAB); + +// Really just the block header. +struct copy_space_block { + union { + struct { + struct copy_space_block *next; + uint8_t in_core; + uint8_t all_zeroes[2]; + uint8_t is_survivor[2]; + size_t allocated; // For partly-empty blocks. + }; + uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_BLOCK]; + }; +}; +STATIC_ASSERT_EQ(sizeof(struct copy_space_block), + COPY_SPACE_HEADER_BYTES_PER_BLOCK); + +struct copy_space_region { + char data[COPY_SPACE_REGION_SIZE]; +}; + +struct copy_space_block_payload { + struct copy_space_region regions[2]; +}; + +struct copy_space_slab { + struct copy_space_slab_header header; + struct copy_space_block headers[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB]; + struct copy_space_block_payload blocks[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB]; +}; +STATIC_ASSERT_EQ(sizeof(struct copy_space_slab), COPY_SPACE_SLAB_SIZE); + +static inline struct copy_space_block* +copy_space_block_for_addr(uintptr_t addr) { + uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE); + struct copy_space_slab *slab = (struct copy_space_slab*) base; + uintptr_t block_idx = + (addr / COPY_SPACE_BLOCK_SIZE) % COPY_SPACE_BLOCKS_PER_SLAB; + return &slab->headers[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB]; +} + +static inline struct copy_space_block* +copy_space_block_header(struct copy_space_block_payload *payload) { + return copy_space_block_for_addr((uintptr_t) payload); +} + +static inline struct copy_space_block_payload* +copy_space_block_payload(struct copy_space_block *block) { + uintptr_t addr = (uintptr_t) block; + uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE); + struct copy_space_slab *slab = (struct copy_space_slab*) base; + uintptr_t block_idx = + (addr / COPY_SPACE_HEADER_BYTES_PER_BLOCK) % COPY_SPACE_BLOCKS_PER_SLAB; + return &slab->blocks[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB]; +} + +static uint8_t +copy_space_object_region(struct gc_ref obj) { + return (gc_ref_value(obj) / COPY_SPACE_REGION_SIZE) & 1; +} + +#define COPY_SPACE_PAGE_OUT_QUEUE_SIZE 4 + +struct copy_space_block_list { + struct copy_space_block *head; +}; + +struct copy_space_block_stack { + struct copy_space_block_list list; +}; + +enum copy_space_flags { + COPY_SPACE_ATOMIC_FORWARDING = 1, + COPY_SPACE_ALIGNED = 2, + COPY_SPACE_HAS_FIELD_LOGGING_BITS = 4, +}; + +struct copy_space { + pthread_mutex_t lock; + struct copy_space_block_stack empty; + struct copy_space_block_stack partly_full; + struct copy_space_block_list full ALIGNED_TO_AVOID_FALSE_SHARING; + size_t allocated_bytes; + size_t fragmentation; + struct copy_space_block_stack paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE] + ALIGNED_TO_AVOID_FALSE_SHARING; + ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING; + // The rest of these members are only changed rarely and with the heap + // lock. + uint8_t active_region ALIGNED_TO_AVOID_FALSE_SHARING; + uint8_t atomic_forward; + uint8_t in_gc; + uint32_t flags; + size_t allocated_bytes_at_last_gc; + size_t fragmentation_at_last_gc; + struct extents *extents; + struct copy_space_slab **slabs; + size_t nslabs; +}; + +enum copy_space_forward_result { + // We went to forward an edge, but the target was already forwarded, so we + // just updated the edge. + COPY_SPACE_FORWARD_UPDATED, + // We went to forward an edge and evacuated the referent to a new location. + COPY_SPACE_FORWARD_EVACUATED, + // We went to forward an edge but failed to acquire memory for its new + // location. + COPY_SPACE_FORWARD_FAILED, +}; + +struct copy_space_allocator { + uintptr_t hp; + uintptr_t limit; + struct copy_space_block *block; +}; + +static struct gc_lock +copy_space_lock(struct copy_space *space) { + return gc_lock_acquire(&space->lock); +} + +static void +copy_space_block_list_push(struct copy_space_block_list *list, + struct copy_space_block *block) { + struct copy_space_block *next = + atomic_load_explicit(&list->head, memory_order_acquire); + do { + block->next = next; + } while (!atomic_compare_exchange_weak(&list->head, &next, block)); +} + +static struct copy_space_block* +copy_space_block_list_pop(struct copy_space_block_list *list) { + struct copy_space_block *head = + atomic_load_explicit(&list->head, memory_order_acquire); + struct copy_space_block *next; + do { + if (!head) + return NULL; + } while (!atomic_compare_exchange_weak(&list->head, &head, head->next)); + head->next = NULL; + return head; +} + +static void +copy_space_block_stack_push(struct copy_space_block_stack *stack, + struct copy_space_block *block, + const struct gc_lock *lock) { + struct copy_space_block *next = stack->list.head; + block->next = next; + stack->list.head = block; +} + +static struct copy_space_block* +copy_space_block_stack_pop(struct copy_space_block_stack *stack, + const struct gc_lock *lock) { + struct copy_space_block *head = stack->list.head; + if (head) { + stack->list.head = head->next; + head->next = NULL; + } + return head; +} + +static struct copy_space_block* +copy_space_pop_empty_block(struct copy_space *space, + const struct gc_lock *lock) { + struct copy_space_block *ret = copy_space_block_stack_pop(&space->empty, + lock); + if (ret) { + ret->allocated = 0; + ret->is_survivor[space->active_region] = 0; + } + return ret; +} + +static void +copy_space_push_empty_block(struct copy_space *space, + struct copy_space_block *block, + const struct gc_lock *lock) { + copy_space_block_stack_push(&space->empty, block, lock); +} + +static struct copy_space_block* +copy_space_pop_full_block(struct copy_space *space) { + return copy_space_block_list_pop(&space->full); +} + +static void +copy_space_push_full_block(struct copy_space *space, + struct copy_space_block *block) { + if (space->in_gc) + block->is_survivor[space->active_region] = 1; + copy_space_block_list_push(&space->full, block); +} + +static struct copy_space_block* +copy_space_pop_partly_full_block(struct copy_space *space, + const struct gc_lock *lock) { + return copy_space_block_stack_pop(&space->partly_full, lock); +} + +static void +copy_space_push_partly_full_block(struct copy_space *space, + struct copy_space_block *block, + const struct gc_lock *lock) { + copy_space_block_stack_push(&space->partly_full, block, lock); +} + +static void +copy_space_page_out_block(struct copy_space *space, + struct copy_space_block *block, + const struct gc_lock *lock) { + copy_space_block_stack_push + (block->in_core + ? &space->paged_out[0] + : &space->paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE-1], + block, + lock); +} + +static struct copy_space_block* +copy_space_page_in_block(struct copy_space *space, + const struct gc_lock *lock) { + for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) { + struct copy_space_block *block = + copy_space_block_stack_pop(&space->paged_out[age], lock); + if (block) return block; + } + return NULL; +} + +static ssize_t +copy_space_request_release_memory(struct copy_space *space, size_t bytes) { + return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes; +} + +static int +copy_space_page_out_blocks_until_memory_released(struct copy_space *space) { + ssize_t pending = atomic_load(&space->bytes_to_page_out); + struct gc_lock lock = copy_space_lock(space); + while (pending > 0) { + struct copy_space_block *block = copy_space_pop_empty_block(space, &lock); + if (!block) break; + copy_space_page_out_block(space, block, &lock); + pending = (atomic_fetch_sub(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE) + - COPY_SPACE_BLOCK_SIZE); + } + gc_lock_release(&lock); + return pending <= 0; +} + +static ssize_t +copy_space_maybe_reacquire_memory(struct copy_space *space, size_t bytes) { + ssize_t pending = + atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes; + struct gc_lock lock = copy_space_lock(space); + while (pending + COPY_SPACE_BLOCK_SIZE <= 0) { + struct copy_space_block *block = copy_space_page_in_block(space, &lock); + if (!block) break; + copy_space_push_empty_block(space, block, &lock); + pending = (atomic_fetch_add(&space->bytes_to_page_out, + COPY_SPACE_BLOCK_SIZE) + + COPY_SPACE_BLOCK_SIZE); + } + gc_lock_release(&lock); + return pending; +} + +static void +copy_space_reacquire_memory(struct copy_space *space, size_t bytes) { + ssize_t pending = copy_space_maybe_reacquire_memory(space, bytes); + GC_ASSERT(pending + COPY_SPACE_BLOCK_SIZE > 0); +} + +static inline int +copy_space_contains_address(struct copy_space *space, uintptr_t addr) { + return extents_contain_addr(space->extents, addr); +} + +static inline int +copy_space_contains(struct copy_space *space, struct gc_ref ref) { + return copy_space_contains_address(space, gc_ref_value(ref)); +} + +static int +copy_space_has_field_logging_bits(struct copy_space *space) { + return space->flags & COPY_SPACE_HAS_FIELD_LOGGING_BITS; +} + +static size_t +copy_space_field_logging_blocks(struct copy_space *space) { + if (!copy_space_has_field_logging_bits(space)) + return 0; + size_t bytes = COPY_SPACE_SLAB_SIZE / sizeof (uintptr_t) / 8; + size_t blocks = + align_up(bytes, COPY_SPACE_BLOCK_SIZE) / COPY_SPACE_BLOCK_SIZE; + return blocks; +} + +static uint8_t* +copy_space_field_logged_byte(struct gc_edge edge) { + uintptr_t addr = gc_edge_address(edge); + uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE); + base += offsetof(struct copy_space_slab, blocks); + uintptr_t field = (addr & (COPY_SPACE_SLAB_SIZE - 1)) / sizeof(uintptr_t); + uintptr_t byte = field / 8; + return (uint8_t*) (base + byte); +} + +static uint8_t +copy_space_field_logged_bit(struct gc_edge edge) { + // Each byte has 8 bytes, covering 8 fields. + size_t field = gc_edge_address(edge) / sizeof(uintptr_t); + return 1 << (field % 8); +} + +static void +copy_space_clear_field_logged_bits_for_region(struct copy_space *space, + void *region_base) { + uintptr_t addr = (uintptr_t)region_base; + GC_ASSERT_EQ(addr, align_down(addr, COPY_SPACE_REGION_SIZE)); + GC_ASSERT(copy_space_contains_address(space, addr)); + if (copy_space_has_field_logging_bits(space)) + memset(copy_space_field_logged_byte(gc_edge(region_base)), + 0, + COPY_SPACE_REGION_SIZE / sizeof(uintptr_t) / 8); +} + +static void +copy_space_clear_field_logged_bits_for_block(struct copy_space *space, + struct copy_space_block *block) { + struct copy_space_block_payload *payload = copy_space_block_payload(block); + copy_space_clear_field_logged_bits_for_region(space, &payload->regions[0]); + copy_space_clear_field_logged_bits_for_region(space, &payload->regions[1]); +} + +static inline void +copy_space_allocator_set_block(struct copy_space_allocator *alloc, + struct copy_space_block *block, + int active_region) { + struct copy_space_block_payload *payload = copy_space_block_payload(block); + struct copy_space_region *region = &payload->regions[active_region]; + alloc->block = block; + alloc->hp = (uintptr_t)®ion[0]; + alloc->limit = (uintptr_t)®ion[1]; +} + +static inline int +copy_space_allocator_acquire_block(struct copy_space_allocator *alloc, + struct copy_space_block *block, + int active_region) { + if (block) { + copy_space_allocator_set_block(alloc, block, active_region); + return 1; + } + return 0; +} + +static int +copy_space_allocator_acquire_empty_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + struct gc_lock lock = copy_space_lock(space); + struct copy_space_block *block = copy_space_pop_empty_block(space, &lock); + gc_lock_release(&lock); + if (copy_space_allocator_acquire_block(alloc, block, space->active_region)) { + block->in_core = 1; + if (block->all_zeroes[space->active_region]) { + block->all_zeroes[space->active_region] = 0; + } else { + memset((char*)alloc->hp, 0, COPY_SPACE_REGION_SIZE); + copy_space_clear_field_logged_bits_for_region(space, (void*)alloc->hp); + } + return 1; + } + return 0; +} + +static int +copy_space_allocator_acquire_partly_full_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + struct gc_lock lock = copy_space_lock(space); + struct copy_space_block *block = copy_space_pop_partly_full_block(space, + &lock); + gc_lock_release(&lock); + if (copy_space_allocator_acquire_block(alloc, block, space->active_region)) { + alloc->hp += block->allocated; + return 1; + } + return 0; +} + +static void +copy_space_allocator_release_full_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + size_t fragmentation = alloc->limit - alloc->hp; + size_t allocated = COPY_SPACE_REGION_SIZE - alloc->block->allocated; + atomic_fetch_add_explicit(&space->allocated_bytes, allocated, + memory_order_relaxed); + if (fragmentation) + atomic_fetch_add_explicit(&space->fragmentation, fragmentation, + memory_order_relaxed); + copy_space_push_full_block(space, alloc->block); + alloc->hp = alloc->limit = 0; + alloc->block = NULL; +} + +static void +copy_space_allocator_release_partly_full_block(struct copy_space_allocator *alloc, + struct copy_space *space) { + size_t allocated = alloc->hp & (COPY_SPACE_REGION_SIZE - 1); + if (allocated) { + atomic_fetch_add_explicit(&space->allocated_bytes, + allocated - alloc->block->allocated, + memory_order_relaxed); + alloc->block->allocated = allocated; + struct gc_lock lock = copy_space_lock(space); + copy_space_push_partly_full_block(space, alloc->block, &lock); + gc_lock_release(&lock); + } else { + // In this case, hp was bumped all the way to the limit, in which + // case allocated wraps to 0; the block is full. + atomic_fetch_add_explicit(&space->allocated_bytes, + COPY_SPACE_REGION_SIZE - alloc->block->allocated, + memory_order_relaxed); + copy_space_push_full_block(space, alloc->block); + } + alloc->hp = alloc->limit = 0; + alloc->block = NULL; +} + +static inline struct gc_ref +copy_space_allocate(struct copy_space_allocator *alloc, + struct copy_space *space, + size_t size) { + GC_ASSERT(size > 0); + GC_ASSERT(size <= gc_allocator_large_threshold()); + size = align_up(size, gc_allocator_small_granule_size()); + + if (alloc->hp + size <= alloc->limit) + goto done; + + if (alloc->block) + copy_space_allocator_release_full_block(alloc, space); + while (copy_space_allocator_acquire_partly_full_block(alloc, space)) { + if (alloc->hp + size <= alloc->limit) + goto done; + copy_space_allocator_release_full_block(alloc, space); + } + if (!copy_space_allocator_acquire_empty_block(alloc, space)) + return gc_ref_null(); + // The newly acquired block is empty and is therefore large enough for + // a small allocation. + +done: + struct gc_ref ret = gc_ref(alloc->hp); + alloc->hp += size; + return ret; +} + +static struct copy_space_block* +copy_space_append_block_lists(struct copy_space_block *head, + struct copy_space_block *tail) { + if (!head) return tail; + if (tail) { + struct copy_space_block *walk = head; + while (walk->next) + walk = walk->next; + walk->next = tail; + } + return head; +} + +static void +copy_space_flip(struct copy_space *space) { + // Mutators stopped, can access nonatomically. + struct copy_space_block* flip = space->full.head; + flip = copy_space_append_block_lists(space->partly_full.list.head, flip); + flip = copy_space_append_block_lists(space->empty.list.head, flip); + space->empty.list.head = flip; + space->partly_full.list.head = NULL; + space->full.head = NULL; + space->allocated_bytes = 0; + space->fragmentation = 0; + space->active_region ^= 1; + space->in_gc = 1; +} + +static inline void +copy_space_allocator_init(struct copy_space_allocator *alloc) { + memset(alloc, 0, sizeof(*alloc)); +} + +static inline void +copy_space_allocator_finish(struct copy_space_allocator *alloc, + struct copy_space *space) { + if (alloc->block) + copy_space_allocator_release_partly_full_block(alloc, space); +} + +static void +copy_space_finish_gc(struct copy_space *space, int is_minor_gc) { + // Mutators stopped, can access nonatomically. + if (is_minor_gc) { + // Avoid mixing survivors and new objects on the same blocks. + struct copy_space_allocator alloc; + copy_space_allocator_init(&alloc); + while (copy_space_allocator_acquire_partly_full_block(&alloc, space)) + copy_space_allocator_release_full_block(&alloc, space); + copy_space_allocator_finish(&alloc, space); + } + + space->allocated_bytes_at_last_gc = space->allocated_bytes; + space->fragmentation_at_last_gc = space->fragmentation; + space->in_gc = 0; +} + +static size_t +copy_space_can_allocate(struct copy_space *space, size_t bytes) { + // With lock! + size_t count = 0; + for (struct copy_space_block *empties = space->empty.list.head; + empties && count < bytes; + empties = empties->next) { + count += COPY_SPACE_REGION_SIZE; + } + return count; +} + +static void +copy_space_add_to_allocation_counter(struct copy_space *space, + uint64_t *counter) { + *counter += space->allocated_bytes - space->allocated_bytes_at_last_gc; +} + +static void +copy_space_gc_during_evacuation(void *data) { + // If space is really tight and reordering of objects during + // evacuation resulted in more end-of-block fragmentation and thus + // block use than before collection started, we can actually run out + // of memory while collecting. We should probably attempt to expand + // the heap here, at least by a single block; it's better than the + // alternatives. + fprintf(stderr, "Out of memory\n"); + GC_CRASH(); +} + +static inline enum copy_space_forward_result +copy_space_forward_atomic(struct copy_space *space, struct gc_edge edge, + struct gc_ref old_ref, + struct copy_space_allocator *alloc) { + struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); + +retry: + if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) + gc_atomic_forward_acquire(&fwd); + + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + default: + // Impossible. + GC_CRASH(); + case GC_FORWARDING_STATE_ACQUIRED: { + // We claimed the object successfully; evacuating is up to us. + size_t bytes = gc_atomic_forward_object_size(&fwd); + struct gc_ref new_ref = copy_space_allocate(alloc, space, bytes); + if (gc_ref_is_null(new_ref)) { + gc_atomic_forward_abort(&fwd); + return COPY_SPACE_FORWARD_FAILED; + } + // Copy object contents before committing, as we don't know what + // part of the object (if any) will be overwritten by the + // commit. + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), bytes); + gc_atomic_forward_commit(&fwd, new_ref); + gc_edge_update(edge, new_ref); + return COPY_SPACE_FORWARD_EVACUATED; + } + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known, or evacuation aborts. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + goto retry; + yield_for_spin(spin_count); + } + GC_CRASH(); // Unreachable. + case GC_FORWARDING_STATE_FORWARDED: + // The object has been evacuated already. Update the edge; + // whoever forwarded the object will make sure it's eventually + // traced. + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return COPY_SPACE_FORWARD_UPDATED; + } +} + +static int +copy_space_forward_if_traced_atomic(struct copy_space *space, + struct gc_edge edge, + struct gc_ref old_ref) { + struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); +retry: + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + return 0; + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + goto retry; + yield_for_spin(spin_count); + } + GC_CRASH(); // Unreachable. + case GC_FORWARDING_STATE_FORWARDED: + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 1; + default: + GC_CRASH(); + } +} + +static inline enum copy_space_forward_result +copy_space_forward_nonatomic(struct copy_space *space, struct gc_edge edge, + struct gc_ref old_ref, + struct copy_space_allocator *alloc) { + uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref); + if (forwarded) { + gc_edge_update(edge, gc_ref(forwarded)); + return COPY_SPACE_FORWARD_UPDATED; + } else { + size_t size; + gc_trace_object(old_ref, NULL, NULL, NULL, &size); + struct gc_ref new_ref = copy_space_allocate(alloc, space, size); + if (gc_ref_is_null(new_ref)) + return COPY_SPACE_FORWARD_FAILED; + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), size); + gc_object_forward_nonatomic(old_ref, new_ref); + gc_edge_update(edge, new_ref); + return COPY_SPACE_FORWARD_EVACUATED; + } +} + +static int +copy_space_forward_if_traced_nonatomic(struct copy_space *space, + struct gc_edge edge, + struct gc_ref old_ref) { + uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref); + if (forwarded) { + gc_edge_update(edge, gc_ref(forwarded)); + return 1; + } + return 0; +} + +static inline enum copy_space_forward_result +copy_space_forward(struct copy_space *src_space, struct copy_space *dst_space, + struct gc_edge edge, + struct gc_ref old_ref, + struct copy_space_allocator *dst_alloc) { + GC_ASSERT(copy_space_contains(src_space, old_ref)); + GC_ASSERT(src_space != dst_space + || copy_space_object_region(old_ref) != src_space->active_region); + if (GC_PARALLEL && src_space->atomic_forward) + return copy_space_forward_atomic(dst_space, edge, old_ref, dst_alloc); + return copy_space_forward_nonatomic(dst_space, edge, old_ref, dst_alloc); +} + +static inline int +copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge, + struct gc_ref old_ref) { + GC_ASSERT(copy_space_contains(space, old_ref)); + GC_ASSERT(copy_space_object_region(old_ref) != space->active_region); + if (GC_PARALLEL && space->atomic_forward) + return copy_space_forward_if_traced_atomic(space, edge, old_ref); + return copy_space_forward_if_traced_nonatomic(space, edge, old_ref); +} + +static int +copy_space_is_aligned(struct copy_space *space) { + return space->flags & COPY_SPACE_ALIGNED; +} + +static int +copy_space_fixed_size(struct copy_space *space) { + // If the extent is aligned, it is fixed. + return copy_space_is_aligned(space); +} + +static inline uintptr_t +copy_space_low_aligned_address(struct copy_space *space) { + GC_ASSERT(copy_space_is_aligned(space)); + GC_ASSERT_EQ(space->extents->size, 1); + return space->extents->ranges[0].lo_addr; +} + +static inline uintptr_t +copy_space_high_aligned_address(struct copy_space *space) { + GC_ASSERT(copy_space_is_aligned(space)); + GC_ASSERT_EQ(space->extents->size, 1); + return space->extents->ranges[0].hi_addr; +} + +static inline int +copy_space_contains_address_aligned(struct copy_space *space, uintptr_t addr) { + uintptr_t low_addr = copy_space_low_aligned_address(space); + uintptr_t high_addr = copy_space_high_aligned_address(space); + uintptr_t size = high_addr - low_addr; + return (addr - low_addr) < size; +} + +static inline int +copy_space_contains_edge_aligned(struct copy_space *space, + struct gc_edge edge) { + return copy_space_contains_address_aligned(space, gc_edge_address(edge)); +} + +static inline int +copy_space_should_promote(struct copy_space *space, struct gc_ref ref) { + GC_ASSERT(copy_space_contains(space, ref)); + uintptr_t addr = gc_ref_value(ref); + struct copy_space_block *block = copy_space_block_for_addr(gc_ref_value(ref)); + GC_ASSERT_EQ(copy_space_object_region(ref), space->active_region ^ 1); + return block->is_survivor[space->active_region ^ 1]; +} + +static int +copy_space_contains_edge(struct copy_space *space, struct gc_edge edge) { + return copy_space_contains_address(space, gc_edge_address(edge)); +} + +static int +copy_space_remember_edge(struct copy_space *space, struct gc_edge edge) { + GC_ASSERT(copy_space_contains_edge(space, edge)); + uint8_t* loc = copy_space_field_logged_byte(edge); + uint8_t bit = copy_space_field_logged_bit(edge); + uint8_t byte = atomic_load_explicit(loc, memory_order_acquire); + do { + if (byte & bit) return 0; + } while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit, + memory_order_acq_rel, + memory_order_acquire)); + return 1; +} + +static int +copy_space_forget_edge(struct copy_space *space, struct gc_edge edge) { + GC_ASSERT(copy_space_contains_edge(space, edge)); + uint8_t* loc = copy_space_field_logged_byte(edge); + uint8_t bit = copy_space_field_logged_bit(edge); + uint8_t byte = atomic_load_explicit(loc, memory_order_acquire); + do { + if (!(byte & bit)) return 0; + } while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte&~bit, + memory_order_acq_rel, + memory_order_acquire)); + return 1; +} + +static size_t copy_space_is_power_of_two(size_t n) { + GC_ASSERT(n != 0); + return (n & (n - 1)) == 0; +} + +static size_t copy_space_round_up_power_of_two(size_t n) { + if (copy_space_is_power_of_two(n)) + return n; + + return 1ULL << (sizeof(size_t) * 8 - __builtin_clzll(n)); +} + +static struct copy_space_slab* +copy_space_allocate_slabs(size_t nslabs, uint32_t flags) { + size_t size = nslabs * COPY_SPACE_SLAB_SIZE; + size_t alignment = COPY_SPACE_SLAB_SIZE; + if (flags & COPY_SPACE_ALIGNED) { + GC_ASSERT(copy_space_is_power_of_two(size)); + alignment = size; + } + return gc_platform_acquire_memory(size, alignment); +} + +static void +copy_space_add_slabs(struct copy_space *space, struct copy_space_slab *slabs, + size_t nslabs) { + size_t old_size = space->nslabs * sizeof(struct copy_space_slab*); + size_t additional_size = nslabs * sizeof(struct copy_space_slab*); + space->extents = extents_adjoin(space->extents, slabs, + nslabs * sizeof(struct copy_space_slab)); + space->slabs = realloc(space->slabs, old_size + additional_size); + if (!space->slabs) + GC_CRASH(); + while (nslabs--) + space->slabs[space->nslabs++] = slabs++; +} + +static void +copy_space_shrink(struct copy_space *space, size_t bytes) { + ssize_t pending = copy_space_request_release_memory(space, bytes); + copy_space_page_out_blocks_until_memory_released(space); + + // It still may be the case we need to page out more blocks. Only collection + // can help us then! +} + +static size_t +copy_space_first_payload_block(struct copy_space *space) { + return copy_space_field_logging_blocks(space); +} + +static void +copy_space_expand(struct copy_space *space, size_t bytes) { + GC_ASSERT(!copy_space_fixed_size(space)); + ssize_t to_acquire = -copy_space_maybe_reacquire_memory(space, bytes); + if (to_acquire <= 0) return; + size_t reserved = align_up(to_acquire, COPY_SPACE_SLAB_SIZE); + size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE; + struct copy_space_slab *slabs = + copy_space_allocate_slabs(nslabs, space->flags); + copy_space_add_slabs(space, slabs, nslabs); + + struct gc_lock lock = copy_space_lock(space); + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = copy_space_first_payload_block(space); + idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; + idx++) { + struct copy_space_block *block = &slabs[slab].headers[idx]; + block->all_zeroes[0] = block->all_zeroes[1] = 1; + block->in_core = 0; + copy_space_page_out_block(space, block, &lock); + reserved -= COPY_SPACE_BLOCK_SIZE; + } + } + gc_lock_release(&lock); + copy_space_reacquire_memory(space, 0); +} + +static void +copy_space_advance_page_out_queue(void *data) { + struct copy_space *space = data; + struct gc_lock lock = copy_space_lock(space); + for (int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) { + while (1) { + struct copy_space_block *block = + copy_space_block_stack_pop(&space->paged_out[age], &lock); + if (!block) break; + copy_space_block_stack_push(&space->paged_out[age + 1], block, &lock); + } + } + gc_lock_release(&lock); +} + +static void +copy_space_page_out_blocks(void *data) { + struct copy_space *space = data; + int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 2; + struct gc_lock lock = copy_space_lock(space); + while (1) { + struct copy_space_block *block = + copy_space_block_stack_pop(&space->paged_out[age], &lock); + if (!block) break; + block->in_core = 0; + block->all_zeroes[0] = block->all_zeroes[1] = 1; + gc_platform_discard_memory(copy_space_block_payload(block), + COPY_SPACE_BLOCK_SIZE); + copy_space_clear_field_logged_bits_for_block(space, block); + copy_space_block_stack_push(&space->paged_out[age + 1], block, &lock); + } + gc_lock_release(&lock); +} + +static int +copy_space_init(struct copy_space *space, size_t size, uint32_t flags, + struct gc_background_thread *thread) { + size = align_up(size, COPY_SPACE_BLOCK_SIZE); + size_t reserved = align_up(size, COPY_SPACE_SLAB_SIZE); + if (flags & COPY_SPACE_ALIGNED) + reserved = copy_space_round_up_power_of_two(reserved); + size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE; + struct copy_space_slab *slabs = copy_space_allocate_slabs(nslabs, flags); + if (!slabs) + return 0; + + pthread_mutex_init(&space->lock, NULL); + space->empty.list.head = NULL; + space->partly_full.list.head = NULL; + space->full.head = NULL; + for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) + space->paged_out[age].list.head = NULL; + space->allocated_bytes = 0; + space->fragmentation = 0; + space->bytes_to_page_out = 0; + space->active_region = 0; + space->atomic_forward = flags & COPY_SPACE_ATOMIC_FORWARDING; + space->flags = flags; + space->allocated_bytes_at_last_gc = 0; + space->fragmentation_at_last_gc = 0; + space->extents = extents_allocate((flags & COPY_SPACE_ALIGNED) ? 1 : 10); + copy_space_add_slabs(space, slabs, nslabs); + struct gc_lock lock = copy_space_lock(space); + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = copy_space_first_payload_block(space); + idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB; + idx++) { + struct copy_space_block *block = &slabs[slab].headers[idx]; + block->all_zeroes[0] = block->all_zeroes[1] = 1; + block->in_core = 0; + block->is_survivor[0] = block->is_survivor[1] = 0; + if (reserved > size) { + copy_space_page_out_block(space, block, &lock); + reserved -= COPY_SPACE_BLOCK_SIZE; + } else { + copy_space_push_empty_block(space, block, &lock); + } + } + } + gc_lock_release(&lock); + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START, + copy_space_advance_page_out_queue, + space); + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END, + copy_space_page_out_blocks, + space); + return 1; +} + +#endif // COPY_SPACE_H diff --git a/libguile/whippet/src/debug.h b/libguile/whippet/src/debug.h new file mode 100644 index 000000000..7b161c556 --- /dev/null +++ b/libguile/whippet/src/debug.h @@ -0,0 +1,10 @@ +#ifndef DEBUG_H +#define DEBUG_H + +#ifndef NDEBUG +#define DEBUG(...) fprintf (stderr, "DEBUG: " __VA_ARGS__) +#else +#define DEBUG(...) do { } while (0) +#endif + +#endif // DEBUG_H diff --git a/libguile/whippet/src/extents.h b/libguile/whippet/src/extents.h new file mode 100644 index 000000000..62dba92b9 --- /dev/null +++ b/libguile/whippet/src/extents.h @@ -0,0 +1,88 @@ +#ifndef EXTENTS_H +#define EXTENTS_H + +#include +#include + +#include "gc-assert.h" + +struct extent_range { + uintptr_t lo_addr; + uintptr_t hi_addr; +}; + +struct extents { + size_t size; + size_t capacity; + struct extent_range ranges[]; +}; + +static inline int +extents_contain_addr(struct extents *extents, uintptr_t addr) { + size_t lo = 0; + size_t hi = extents->size; + while (lo != hi) { + size_t mid = (lo + hi) / 2; + struct extent_range range = extents->ranges[mid]; + if (addr < range.lo_addr) { + hi = mid; + } else if (addr < range.hi_addr) { + return 1; + } else { + lo = mid + 1; + } + } + return 0; +} + +static struct extents* +extents_allocate(size_t capacity) { + size_t byte_size = + sizeof(struct extents) + sizeof(struct extent_range) * capacity; + struct extents *ret = malloc(byte_size); + if (!ret) __builtin_trap(); + memset(ret, 0, byte_size); + ret->capacity = capacity; + return ret; +} + +static struct extents* +extents_insert(struct extents *old, size_t idx, struct extent_range range) { + if (old->size < old->capacity) { + size_t bytes_to_move = sizeof(struct extent_range) * (old->size - idx); + memmove(&old->ranges[idx + 1], &old->ranges[idx], bytes_to_move); + old->ranges[idx] = range; + old->size++; + return old; + } else { + struct extents *new_ = extents_allocate(old->capacity * 2 + 1); + memcpy(&new_->ranges[0], &old->ranges[0], + sizeof(struct extent_range) * idx); + memcpy(&new_->ranges[idx + 1], &old->ranges[idx], + sizeof(struct extent_range) * (old->size - idx)); + new_->ranges[idx] = range; + new_->size = old->size + 1; + free(old); + return new_; + } +} + +static struct extents* +extents_adjoin(struct extents *extents, void *lo_addr, size_t size) { + size_t i; + struct extent_range range = { (uintptr_t)lo_addr, (uintptr_t)lo_addr + size }; + for (i = 0; i < extents->size; i++) { + if (range.hi_addr < extents->ranges[i].lo_addr) { + break; + } else if (range.hi_addr == extents->ranges[i].lo_addr) { + extents->ranges[i].lo_addr = range.lo_addr; + return extents; + } else if (range.lo_addr == extents->ranges[i].hi_addr) { + extents->ranges[i].hi_addr = range.hi_addr; + return extents; + } + } + return extents_insert(extents, i, range); +} + +#endif // EXTENTS_H diff --git a/libguile/whippet/src/field-set.h b/libguile/whippet/src/field-set.h new file mode 100644 index 000000000..ff9a68e83 --- /dev/null +++ b/libguile/whippet/src/field-set.h @@ -0,0 +1,229 @@ +#ifndef FIELD_SET_H +#define FIELD_SET_H + +#include +#include +#include + +#include "assert.h" +#include "gc-edge.h" +#include "gc-lock.h" +#include "tracer.h" + +#define GC_EDGE_BUFFER_CAPACITY 510 + +struct gc_edge_buffer { + struct gc_edge_buffer *next; + size_t size; + struct gc_edge edges[GC_EDGE_BUFFER_CAPACITY]; +}; + +// Lock-free. +struct gc_edge_buffer_list { + struct gc_edge_buffer *head; +}; + +// With a lock. +struct gc_edge_buffer_stack { + struct gc_edge_buffer_list list; +}; + +struct gc_field_set { + struct gc_edge_buffer_list full; + struct gc_edge_buffer_stack partly_full; + struct gc_edge_buffer_list empty; + size_t count; + pthread_mutex_t lock; +}; + +struct gc_field_set_writer { + struct gc_edge_buffer *buf; + struct gc_field_set *set; +}; + +static void +gc_edge_buffer_list_push(struct gc_edge_buffer_list *list, + struct gc_edge_buffer *buf) { + GC_ASSERT(!buf->next); + struct gc_edge_buffer *next = + atomic_load_explicit(&list->head, memory_order_relaxed); + do { + buf->next = next; + } while (!atomic_compare_exchange_weak_explicit(&list->head, &next, buf, + memory_order_acq_rel, + memory_order_acquire)); +} + +static struct gc_edge_buffer* +gc_edge_buffer_list_pop(struct gc_edge_buffer_list *list) { + struct gc_edge_buffer *head = + atomic_load_explicit(&list->head, memory_order_acquire); + struct gc_edge_buffer *next; + do { + if (!head) return NULL; + next = head->next; + } while (!atomic_compare_exchange_weak_explicit(&list->head, &head, next, + memory_order_acq_rel, + memory_order_acquire)); + head->next = NULL; + return head; +} + +static void +gc_edge_buffer_stack_push(struct gc_edge_buffer_stack *stack, + struct gc_edge_buffer *buf, + const struct gc_lock *lock) { + GC_ASSERT(!buf->next); + buf->next = stack->list.head; + stack->list.head = buf; +} + +static struct gc_edge_buffer* +gc_edge_buffer_stack_pop(struct gc_edge_buffer_stack *stack, + const struct gc_lock *lock) { + struct gc_edge_buffer *head = stack->list.head; + if (head) { + stack->list.head = head->next; + head->next = NULL; + } + return head; +} + +static void +gc_field_set_init(struct gc_field_set *set) { + memset(set, 0, sizeof(*set)); + pthread_mutex_init(&set->lock, NULL); +} + +static struct gc_edge_buffer* +gc_field_set_acquire_buffer(struct gc_field_set *set) { + struct gc_edge_buffer *ret; + + ret = gc_edge_buffer_list_pop(&set->empty); + if (ret) return ret; + + struct gc_lock lock = gc_lock_acquire(&set->lock); + ret = gc_edge_buffer_stack_pop(&set->partly_full, &lock); + gc_lock_release(&lock); + if (ret) return ret; + + // atomic inc count + ret = malloc(sizeof(*ret)); + if (!ret) { + perror("Failed to allocate remembered set"); + GC_CRASH(); + } + memset(ret, 0, sizeof(*ret)); + return ret; +} + +static void +gc_field_set_release_buffer(struct gc_field_set *set, + struct gc_edge_buffer *buf) { + if (buf->size == GC_EDGE_BUFFER_CAPACITY) { + gc_edge_buffer_list_push(&set->full, buf); + } else { + struct gc_lock lock = gc_lock_acquire(&set->lock); + gc_edge_buffer_stack_push(&set->partly_full, buf, &lock); + gc_lock_release(&lock); + } +} + +static void +gc_field_set_add_roots(struct gc_field_set *set, struct gc_tracer *tracer) { + struct gc_edge_buffer *buf; + struct gc_lock lock = gc_lock_acquire(&set->lock); + while ((buf = gc_edge_buffer_stack_pop(&set->partly_full, &lock))) + gc_tracer_add_root(tracer, gc_root_edge_buffer(buf)); + while ((buf = gc_edge_buffer_list_pop(&set->full))) + gc_tracer_add_root(tracer, gc_root_edge_buffer(buf)); + gc_lock_release(&lock); +} + +static void +gc_field_set_clear(struct gc_field_set *set, + void (*forget_edge)(struct gc_edge, struct gc_heap*), + struct gc_heap *heap) { + struct gc_edge_buffer *partly_full = set->partly_full.list.head; + struct gc_edge_buffer *full = set->full.head; + // Clear the full and partly full sets now so that if a collector + // wanted to it could re-add an edge to the remembered set. + set->partly_full.list.head = NULL; + set->full.head = NULL; + struct gc_edge_buffer *buf, *next; + for (buf = partly_full; buf; buf = next) { + next = buf->next; + buf->next = NULL; + if (forget_edge) + for (size_t i = 0; i < buf->size; i++) + forget_edge(buf->edges[i], heap); + buf->size = 0; + gc_edge_buffer_list_push(&set->empty, buf); + } + for (buf = full; buf; buf = next) { + next = buf->next; + buf->next = NULL; + if (forget_edge) + for (size_t i = 0; i < buf->size; i++) + forget_edge(buf->edges[i], heap); + buf->size = 0; + gc_edge_buffer_list_push(&set->empty, buf); + } +} + +static inline void +gc_field_set_visit_edge_buffer(struct gc_field_set *set, + struct gc_edge_buffer *buf, + int (*visit)(struct gc_edge, + struct gc_heap*, + void *data), + struct gc_heap *heap, + void *data) GC_ALWAYS_INLINE; +static inline void +gc_field_set_visit_edge_buffer(struct gc_field_set *set, + struct gc_edge_buffer *buf, + int (*visit)(struct gc_edge, + struct gc_heap*, + void *data), + struct gc_heap *heap, + void *data) { + size_t i = 0; + while (i < buf->size) { + if (visit(buf->edges[i], heap, data)) + i++; + else + buf->edges[i] = buf->edges[--buf->size]; + } + gc_field_set_release_buffer(set, buf); +} + +static void +gc_field_set_writer_release_buffer(struct gc_field_set_writer *writer) { + if (writer->buf) { + gc_field_set_release_buffer(writer->set, writer->buf); + writer->buf = NULL; + } +} + +static void +gc_field_set_writer_init(struct gc_field_set_writer *writer, + struct gc_field_set *set) { + writer->set = set; + writer->buf = NULL; +} + +static void +gc_field_set_writer_add_edge(struct gc_field_set_writer *writer, + struct gc_edge edge) { + struct gc_edge_buffer *buf = writer->buf; + if (GC_UNLIKELY(!buf)) + writer->buf = buf = gc_field_set_acquire_buffer(writer->set); + GC_ASSERT(buf->size < GC_EDGE_BUFFER_CAPACITY); + buf->edges[buf->size++] = edge; + if (GC_UNLIKELY(buf->size == GC_EDGE_BUFFER_CAPACITY)) { + gc_edge_buffer_list_push(&writer->set->full, buf); + writer->buf = NULL; + } +} + +#endif // FIELD_SET_H diff --git a/libguile/whippet/src/freelist.h b/libguile/whippet/src/freelist.h new file mode 100644 index 000000000..6eec6dbac --- /dev/null +++ b/libguile/whippet/src/freelist.h @@ -0,0 +1,31 @@ +#ifndef FREELIST_H +#define FREELIST_H + +// A size-segregated freelist with linear-log buckets Ă  la +// https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/. + +#include "gc-assert.h" +#include "gc-histogram.h" + +#include + +#define DEFINE_FREELIST(name, max_value_bits, precision, node) \ + struct name { node buckets[((max_value_bits) << (precision)) + 1]; }; \ + static inline size_t name##_num_size_classes(void) { \ + return ((max_value_bits) << (precision)) + 1; \ + } \ + static inline uint64_t name##_bucket_min_val(size_t idx) { \ + GC_ASSERT(idx < name##_num_size_classes()); \ + return gc_histogram_bucket_min_val((precision), idx); \ + } \ + static inline void name##_init(struct name *f) { \ + memset(f, 0, sizeof(*f)); \ + } \ + static inline size_t name##_size_class(uint64_t val) { \ + return gc_histogram_bucket((max_value_bits), (precision), val); \ + } \ + static inline node* name##_bucket(struct name *f, uint64_t val) { \ + return &f->buckets[name##_size_class(val)]; \ + } + +#endif // FREELIST_H diff --git a/libguile/whippet/src/gc-align.h b/libguile/whippet/src/gc-align.h new file mode 100644 index 000000000..c0758b1e0 --- /dev/null +++ b/libguile/whippet/src/gc-align.h @@ -0,0 +1,22 @@ +#ifndef GC_ALIGN_H +#define GC_ALIGN_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include + +static inline uintptr_t align_down(uintptr_t addr, size_t align) { + return addr & ~(align - 1); +} +static inline uintptr_t align_up(uintptr_t addr, size_t align) { + return align_down(addr + align - 1, align); +} + +// Poor man's equivalent of std::hardware_destructive_interference_size. +#define AVOID_FALSE_SHARING 128 +#define ALIGNED_TO_AVOID_FALSE_SHARING \ + __attribute__((aligned(AVOID_FALSE_SHARING))) + +#endif // GC_ALIGN_H diff --git a/libguile/whippet/src/gc-ephemeron-internal.h b/libguile/whippet/src/gc-ephemeron-internal.h new file mode 100644 index 000000000..3d34cf188 --- /dev/null +++ b/libguile/whippet/src/gc-ephemeron-internal.h @@ -0,0 +1,55 @@ +#ifndef GC_EPHEMERON_INTERNAL_H +#define GC_EPHEMERON_INTERNAL_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include "gc-ephemeron.h" + +struct gc_pending_ephemerons; + +// API implemented by collector, for use by ephemerons: +GC_INTERNAL int gc_visit_ephemeron_key(struct gc_edge edge, + struct gc_heap *heap); +GC_INTERNAL struct gc_pending_ephemerons* +gc_heap_pending_ephemerons(struct gc_heap *heap); +GC_INTERNAL unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap); + +// API implemented by ephemerons, for use by collector: +GC_INTERNAL struct gc_edge gc_ephemeron_key_edge(struct gc_ephemeron *eph); +GC_INTERNAL struct gc_edge gc_ephemeron_value_edge(struct gc_ephemeron *eph); + +GC_INTERNAL struct gc_pending_ephemerons* +gc_prepare_pending_ephemerons(struct gc_pending_ephemerons *state, + size_t target_size, double slop); + +GC_INTERNAL void +gc_resolve_pending_ephemerons(struct gc_ref obj, struct gc_heap *heap); + +GC_INTERNAL void +gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state, + struct gc_heap *heap, size_t shard, + size_t nshards); + +GC_INTERNAL struct gc_ephemeron* +gc_pop_resolved_ephemerons(struct gc_heap *heap); + +GC_INTERNAL void +gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data); + +GC_INTERNAL void +gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state, + size_t shard, size_t nshards); + +GC_INTERNAL void gc_ephemeron_init_internal(struct gc_heap *heap, + struct gc_ephemeron *ephemeron, + struct gc_ref key, + struct gc_ref value); + +#endif // GC_EPHEMERON_INTERNAL_H diff --git a/libguile/whippet/src/gc-ephemeron.c b/libguile/whippet/src/gc-ephemeron.c new file mode 100644 index 000000000..0f1f9720a --- /dev/null +++ b/libguile/whippet/src/gc-ephemeron.c @@ -0,0 +1,583 @@ +#include +#include +#include + +#define GC_IMPL 1 + +#include "address-hash.h" +#include "debug.h" +#include "gc-embedder-api.h" +#include "gc-ephemeron-internal.h" + +// # Overview +// +// An ephemeron is a conjunction consisting of the ephemeron object +// itself, a "key" object, and a "value" object. If the ephemeron and +// the key are live, then the value is kept live and can be looked up +// given the ephemeron object. +// +// Sometimes we write this as E×K⇒V, indicating that you need both E and +// K to get V. We'll use this notation in these comments sometimes. +// +// The key and the value of an ephemeron are never modified, except +// possibly via forwarding during GC. +// +// If the key of an ephemeron ever becomes unreachable, the ephemeron +// object will be marked as dead by the collector, and neither key nor +// value will be accessible. Users can also explicitly mark an +// ephemeron as dead. +// +// Users can build collections of ephemerons by chaining them together. +// If an ephemeron ever becomes dead, the ephemeron will be removed from +// the chain by the garbage collector. +// +// # Tracing algorithm +// +// Tracing ephemerons is somewhat complicated. Tracing the live objects +// in a heap is usually a parallelizable fan-out kind of operation, +// requiring minimal synchronization between tracing worker threads. +// However with ephemerons, each worker thread may need to check if +// there is a pending ephemeron E for an object K, marking the +// associated V for later traversal by the tracer. Doing this without +// introducing excessive global serialization points is the motivation +// for the complications that follow. +// +// From the viewpoint of the garbage collector, an ephemeron E×K⇒V has 4 +// possible states: +// +// - Traced: An E that was already fully traced as of a given GC epoch. +// +// - Claimed: GC discovers E for the first time in a GC epoch +// +// - Pending: K's liveness is unknown +// +// - Resolved: K is live; V needs tracing +// +// The ephemeron state is kept in an atomic variable. The pending and +// resolved states also have associated atomic list link fields as well; +// it doesn't appear possible to coalesce them into a single field +// without introducing serialization. Finally, there is a bit to +// indicate whether a "traced" ephemeron is live or dead, and a field to +// indicate the epoch at which it was last traced. +// +// Here is a diagram of the state transitions: +// +// ,----->Traced<-----. +// , | | . +// , v / . +// | Claimed | +// | ,-----/ \---. | +// | v v | +// Pending--------->Resolved +// +// Ephemerons are born in the traced state, for the current GC epoch. +// +// When the tracer sees an ephemeron E in the traced state it checks the +// epoch. If the epoch is up to date, E stays in the traced state and +// we are done. +// +// Otherwise, E transitions from traced to claimed. The thread that +// claims E is then responsible for resetting E's pending and resolved +// links, updating E's epoch, and tracing E's user-controlled chain +// link. +// +// If the claiming thread sees that E was already marked dead by a +// previous GC, or explicitly by the user, the ephemeron then +// transitions from back to traced, ready for the next epoch. +// +// If the claiming thread sees K to already be known to be live, then E +// is added to the global resolved set and E's state becomes resolved. +// +// Otherwise the claiming thread publishes K⇒E to the global pending +// ephemeron table, via the pending link, and E transitions to pending. +// +// A pending ephemeron is a link in a buckets-of-chains concurrent hash +// table. If its K is ever determined to be live, it becomes resolved, +// and is added to a global set of resolved ephemerons. At the end of +// GC, any ephemerons still pending are marked dead, transitioning their +// states to traced. +// +// Note that the claiming thread -- the one that publishes K⇒E to the +// global pending ephemeron table -- needs to re-check that K is still +// untraced after adding K⇒E to the pending table, and move to resolved +// if so. +// +// A resolved ephemeron needs its V to be traced. Incidentally its K +// also needs tracing, to relocate any forwarding pointer. The thread +// that pops an ephemeron from the resolved set is responsible for +// tracing and for moving E's state to traced. +// +// # Concurrency +// +// All operations on ephemerons are wait-free. Sometimes only one +// thread can make progress (for example for an ephemeron in the claimed +// state), but no thread will be stalled waiting on other threads to +// proceed. +// +// There is one interesting (from a concurrency point of view) data +// structure used by the implementation of ephemerons, the singly-linked +// list. Actually there are three of these; one is used as a stack and +// the other two is used as sets. +// +// The resolved set is implemented via a global `struct gc_ephemeron +// *resolved` variable. Resolving an ephemeron does an atomic push to +// this stack, via compare-and-swap (CAS); popping from the stack (also +// via CAS) yields an ephemeron for tracing. Ephemerons are added to +// the resolved set at most once per GC cycle, and the resolved set is +// empty outside of GC. +// +// The operations that are supported on atomic stacks are: +// +// push(LOC, E, OFFSET) -> void +// +// The user-visible chain link and the link for the pending ephemeron +// table are used to build atomic sets. In these you can add an +// ephemeron to the beginning of the list, traverse the list link by +// link to the end (indicated by NULL), and remove any list item. +// Removing a list node proceeds in two phases: one, you mark the node +// for removal, by changing the ephemeron's state; then, possibly on a +// subsequent traversal, any predecessor may forward its link past +// removed nodes. Because node values never change and nodes only go +// from live to dead, the live list tail can always be reached by any +// node, even from dead nodes. +// +// The operations that are supported on these atomic lists: +// +// push(LOC, E, OFFSET) -> void +// pop(LOC, OFFSET) -> ephemeron or null +// follow(LOC, OFFSET, STATE_OFFSET, LIVE_STATE) -> ephemeron or null +// +// These operations are all wait-free. The "push" operation is shared +// between stack and set use cases. "pop" is for stack-like use cases. +// The "follow" operation traverses a list, opportunistically eliding +// nodes that have been marked dead, atomically updating the location +// storing the next item. +// +// There are also accessors on ephemerons to their fields: +// +// key(E) -> value or null +// value(E) -> value or null +// +// These operations retrieve the key and value, respectively, provided +// that the ephemeron is not marked dead. + +//////////////////////////////////////////////////////////////////////// +// Concurrent operations on ephemeron lists +//////////////////////////////////////////////////////////////////////// + +static void +ephemeron_list_push(struct gc_ephemeron **loc, + struct gc_ephemeron *head, + struct gc_ephemeron** (*get_next)(struct gc_ephemeron*)) { + struct gc_ephemeron *tail = atomic_load_explicit(loc, memory_order_acquire); + while (1) { + // There must be no concurrent readers of HEAD, a precondition that + // we ensure by only publishing HEAD to LOC at most once per cycle. + // Therefore we can use a normal store for the tail pointer. + *get_next(head) = tail; + if (atomic_compare_exchange_weak(loc, &tail, head)) + break; + } +} + +static struct gc_ephemeron* +ephemeron_list_pop(struct gc_ephemeron **loc, + struct gc_ephemeron** (*get_next)(struct gc_ephemeron*)) { + struct gc_ephemeron *head = atomic_load_explicit(loc, memory_order_acquire); + while (head) { + // Precondition: the result of get_next on an ephemeron is never + // updated concurrently; OK to load non-atomically. + struct gc_ephemeron *tail = *get_next(head); + if (atomic_compare_exchange_weak(loc, &head, tail)) + break; + } + return head; +} + +static struct gc_ephemeron* +ephemeron_list_follow(struct gc_ephemeron **loc, + struct gc_ephemeron** (*get_next)(struct gc_ephemeron*), + int (*is_live)(struct gc_ephemeron*)) { + struct gc_ephemeron *head = atomic_load_explicit(loc, memory_order_acquire); + if (!head) return NULL; + + while (1) { + struct gc_ephemeron *new_head = head; + + // Skip past any dead nodes. + while (new_head && !is_live(new_head)) + new_head = atomic_load_explicit(get_next(new_head), memory_order_acquire); + + if (// If we didn't have to advance past any dead nodes, no need to + // update LOC. + (head == new_head) + // Otherwise if we succeed in updating LOC, we're done. + || atomic_compare_exchange_strong(loc, &head, new_head) + // Someone else managed to advance LOC; that's fine too. + || (head == new_head)) + return new_head; + + // Otherwise we lost a race; loop and retry. + } +} + +//////////////////////////////////////////////////////////////////////// +// The ephemeron object type +//////////////////////////////////////////////////////////////////////// + +#ifndef GC_EMBEDDER_EPHEMERON_HEADER +#error Embedder should define GC_EMBEDDER_EPHEMERON_HEADER +#endif + +enum { + EPHEMERON_STATE_TRACED, + EPHEMERON_STATE_CLAIMED, + EPHEMERON_STATE_PENDING, + EPHEMERON_STATE_RESOLVED, +}; + +struct gc_ephemeron { + GC_EMBEDDER_EPHEMERON_HEADER + uint8_t state; + unsigned epoch; + struct gc_ephemeron *chain; + struct gc_ephemeron *pending; + struct gc_ephemeron *resolved; + struct gc_ref key; + struct gc_ref value; +}; + +size_t gc_ephemeron_size(void) { return sizeof(struct gc_ephemeron); } + +struct gc_edge gc_ephemeron_key_edge(struct gc_ephemeron *e) { + return gc_edge(&e->key); +} +struct gc_edge gc_ephemeron_value_edge(struct gc_ephemeron *e) { + return gc_edge(&e->value); +} + +//////////////////////////////////////////////////////////////////////// +// Operations on the user-controlled chain field +//////////////////////////////////////////////////////////////////////// + +static struct gc_ephemeron** ephemeron_chain(struct gc_ephemeron *e) { + return &e->chain; +} +static int ephemeron_is_dead(struct gc_ephemeron *e) { + return !atomic_load_explicit(&e->key.value, memory_order_acquire); +} +static int ephemeron_is_not_dead(struct gc_ephemeron *e) { + return !ephemeron_is_dead(e); +} + +void gc_ephemeron_chain_push(struct gc_ephemeron **loc, + struct gc_ephemeron *e) { + ephemeron_list_push(loc, e, ephemeron_chain); +} +static struct gc_ephemeron* follow_chain(struct gc_ephemeron **loc) { + return ephemeron_list_follow(loc, ephemeron_chain, ephemeron_is_not_dead); +} +struct gc_ephemeron* gc_ephemeron_chain_head(struct gc_ephemeron **loc) { + return follow_chain(loc); +} +struct gc_ephemeron* gc_ephemeron_chain_next(struct gc_ephemeron *e) { + return follow_chain(ephemeron_chain(e)); +} +void gc_ephemeron_mark_dead(struct gc_ephemeron *e) { + atomic_store_explicit(&e->key.value, 0, memory_order_release); +} + +//////////////////////////////////////////////////////////////////////// +// Operations on the GC-managed pending link +//////////////////////////////////////////////////////////////////////// + +static struct gc_ephemeron** ephemeron_pending(struct gc_ephemeron *e) { + return &e->pending; +} +static uint8_t ephemeron_state(struct gc_ephemeron *e) { + return atomic_load_explicit(&e->state, memory_order_acquire); +} +static int ephemeron_is_pending(struct gc_ephemeron *e) { + return ephemeron_state(e) == EPHEMERON_STATE_PENDING; +} + +static void push_pending(struct gc_ephemeron **loc, struct gc_ephemeron *e) { + ephemeron_list_push(loc, e, ephemeron_pending); +} +static struct gc_ephemeron* follow_pending(struct gc_ephemeron **loc) { + return ephemeron_list_follow(loc, ephemeron_pending, ephemeron_is_pending); +} + +//////////////////////////////////////////////////////////////////////// +// Operations on the GC-managed resolved link +//////////////////////////////////////////////////////////////////////// + +static struct gc_ephemeron** ephemeron_resolved(struct gc_ephemeron *e) { + return &e->resolved; +} +static void push_resolved(struct gc_ephemeron **loc, struct gc_ephemeron *e) { + ephemeron_list_push(loc, e, ephemeron_resolved); +} +static struct gc_ephemeron* pop_resolved(struct gc_ephemeron **loc) { + return ephemeron_list_pop(loc, ephemeron_resolved); +} + +//////////////////////////////////////////////////////////////////////// +// Access to the association +//////////////////////////////////////////////////////////////////////// + +struct gc_ref gc_ephemeron_key(struct gc_ephemeron *e) { + return gc_ref(atomic_load_explicit(&e->key.value, memory_order_acquire)); +} + +struct gc_ref gc_ephemeron_value(struct gc_ephemeron *e) { + return ephemeron_is_dead(e) ? gc_ref_null() : e->value; +} + +//////////////////////////////////////////////////////////////////////// +// Tracing ephemerons +//////////////////////////////////////////////////////////////////////// + +struct gc_pending_ephemerons { + struct gc_ephemeron* resolved; + size_t nbuckets; + double scale; + struct gc_ephemeron* buckets[0]; +}; + +static const size_t MIN_PENDING_EPHEMERONS_SIZE = 32; + +static size_t pending_ephemerons_byte_size(size_t nbuckets) { + return sizeof(struct gc_pending_ephemerons) + + sizeof(struct gc_ephemeron*) * nbuckets; +} + +static struct gc_pending_ephemerons* +gc_make_pending_ephemerons(size_t byte_size) { + size_t nbuckets = byte_size / sizeof(struct gc_ephemeron*); + if (nbuckets < MIN_PENDING_EPHEMERONS_SIZE) + nbuckets = MIN_PENDING_EPHEMERONS_SIZE; + + struct gc_pending_ephemerons *ret = + malloc(pending_ephemerons_byte_size(nbuckets)); + if (!ret) + return NULL; + + ret->resolved = NULL; + ret->nbuckets = nbuckets; + ret->scale = nbuckets / pow(2.0, sizeof(uintptr_t) * 8); + for (size_t i = 0; i < nbuckets; i++) + ret->buckets[i] = NULL; + + return ret; +} + +struct gc_pending_ephemerons* +gc_prepare_pending_ephemerons(struct gc_pending_ephemerons *state, + size_t target_byte_size, double slop) { + size_t existing = + state ? pending_ephemerons_byte_size(state->nbuckets) : 0; + slop += 1.0; + if (existing * slop > target_byte_size && existing < target_byte_size * slop) + return state; + + struct gc_pending_ephemerons *new_state = + gc_make_pending_ephemerons(target_byte_size); + + if (!new_state) + return state; + + free(state); + return new_state; +} + +static struct gc_ephemeron** +pending_ephemeron_bucket(struct gc_pending_ephemerons *state, + struct gc_ref ref) { + uintptr_t hash = hash_address(gc_ref_value(ref)); + size_t idx = hash * state->scale; + GC_ASSERT(idx < state->nbuckets); + return &state->buckets[idx]; +} + +static void +add_pending_ephemeron(struct gc_pending_ephemerons *state, + struct gc_ephemeron *e) { + struct gc_ephemeron **bucket = pending_ephemeron_bucket(state, e->key); + atomic_store_explicit(&e->state, EPHEMERON_STATE_PENDING, + memory_order_release); + push_pending(bucket, e); +} + +static void maybe_resolve_ephemeron(struct gc_pending_ephemerons *state, + struct gc_ephemeron *e) { + uint8_t expected = EPHEMERON_STATE_PENDING; + if (atomic_compare_exchange_strong(&e->state, &expected, + EPHEMERON_STATE_RESOLVED)) + push_resolved(&state->resolved, e); +} + +// Precondition: OBJ has already been copied to tospace, but OBJ is a +// fromspace ref. +void gc_resolve_pending_ephemerons(struct gc_ref obj, struct gc_heap *heap) { + struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap); + struct gc_ephemeron **bucket = pending_ephemeron_bucket(state, obj); + for (struct gc_ephemeron *link = follow_pending(bucket); + link; + link = follow_pending(&link->pending)) { + if (gc_ref_value(obj) == gc_ref_value(link->key)) { + gc_visit_ephemeron_key(gc_ephemeron_key_edge(link), heap); + // PENDING -> RESOLVED, if it was pending. + maybe_resolve_ephemeron(state, link); + } + } +} + +void gc_trace_ephemeron(struct gc_ephemeron *e, + void (*visit)(struct gc_edge edge, struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data) { + unsigned epoch = gc_heap_ephemeron_trace_epoch(heap); + uint8_t expected = EPHEMERON_STATE_TRACED; + // TRACED[_] -> CLAIMED[_]. + if (!atomic_compare_exchange_strong(&e->state, &expected, + EPHEMERON_STATE_CLAIMED)) + return; + + + if (e->epoch == epoch) { + // CLAIMED[epoch] -> TRACED[epoch]. + atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED, + memory_order_release); + return; + } + + // CLAIMED[!epoch] -> CLAIMED[epoch]. + e->epoch = epoch; + e->pending = NULL; + e->resolved = NULL; + + // Trace chain successors, eliding any intermediate dead links. Note + // that there is a race between trace-time evacuation of the next link + // in the chain and any mutation of that link pointer by the mutator + // (which can only be to advance the chain forward past dead links). + // Collectors using this API have to eliminate this race, for example + // by not evacuating while the mutator is running. + follow_chain(&e->chain); + visit(gc_edge(&e->chain), heap, trace_data); + + // Similarly there is a race between the mutator marking an ephemeron + // as dead and here; the consequence would be that we treat an + // ephemeron as live when it's not, but only for this cycle. No big + // deal. + if (ephemeron_is_dead(e)) { + // CLAIMED[epoch] -> TRACED[epoch]. + atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED, + memory_order_release); + return; + } + + // If K is live, trace V and we are done. + if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap)) { + visit(gc_ephemeron_value_edge(e), heap, trace_data); + // CLAIMED[epoch] -> TRACED[epoch]. + atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED, + memory_order_release); + return; + } + + // Otherwise K is not yet traced, so we don't know if it is live. + // Publish the ephemeron to a global table. + struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap); + // CLAIMED[epoch] -> PENDING. + add_pending_ephemeron(state, e); + + // Given an ephemeron E×K⇒V, there is a race between marking K and E. + // One thread could go to mark E and see that K is unmarked, so we get + // here. Meanwhile another thread could go to mark K and not see E in + // the global table yet. Therefore after publishing E, we have to + // check the mark on K again. + if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap)) + // K visited by another thread while we published E; PENDING -> + // RESOLVED, if still PENDING. + maybe_resolve_ephemeron(state, e); +} + +void +gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state, + struct gc_heap *heap, size_t shard, + size_t nshards) { + GC_ASSERT(shard < nshards); + size_t start = state->nbuckets * 1.0 * shard / nshards; + size_t end = state->nbuckets * 1.0 * (shard + 1) / nshards; + for (size_t idx = start; idx < end; idx++) { + for (struct gc_ephemeron *e = follow_pending(&state->buckets[idx]); + e; + e = follow_pending(&e->pending)) { + if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap)) + // PENDING -> RESOLVED, if PENDING. + maybe_resolve_ephemeron(state, e); + } + } +} + +struct gc_ephemeron* +gc_pop_resolved_ephemerons(struct gc_heap *heap) { + struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap); + return atomic_exchange(&state->resolved, NULL); +} + +void +gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data) { + for (; resolved; resolved = resolved->resolved) { + visit(gc_ephemeron_value_edge(resolved), heap, trace_data); + // RESOLVED -> TRACED. + atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED, + memory_order_release); + } +} + +void +gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state, + size_t shard, size_t nshards) { + GC_ASSERT(shard < nshards); + size_t start = state->nbuckets * 1.0 * shard / nshards; + size_t end = state->nbuckets * 1.0 * (shard + 1) / nshards; + for (size_t idx = start; idx < end; idx++) { + struct gc_ephemeron **bucket = &state->buckets[idx]; + for (struct gc_ephemeron *e = follow_pending(bucket); + e; + e = follow_pending(&e->pending)) { + // PENDING -> TRACED, but dead. + atomic_store_explicit(&e->key.value, 0, memory_order_release); + atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED, + memory_order_release); + } + atomic_store_explicit(bucket, NULL, memory_order_release); + } +} + +//////////////////////////////////////////////////////////////////////// +// Allocation & initialization +//////////////////////////////////////////////////////////////////////// + +void gc_ephemeron_init_internal(struct gc_heap *heap, + struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value) { + // Caller responsible for any write barrier, though really the + // assumption is that the ephemeron is younger than the key and the + // value. + ephemeron->state = EPHEMERON_STATE_TRACED; + ephemeron->epoch = gc_heap_ephemeron_trace_epoch(heap) - 1; + ephemeron->chain = NULL; + ephemeron->pending = NULL; + ephemeron->resolved = NULL; + ephemeron->key = key; + ephemeron->value = value; +} diff --git a/libguile/whippet/src/gc-finalizer-internal.h b/libguile/whippet/src/gc-finalizer-internal.h new file mode 100644 index 000000000..529a087ee --- /dev/null +++ b/libguile/whippet/src/gc-finalizer-internal.h @@ -0,0 +1,65 @@ +#ifndef GC_FINALIZER_INTERNAL_H +#define GC_FINALIZER_INTERNAL_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include "gc-finalizer.h" +#include "root.h" + +struct gc_finalizer_state; + +GC_INTERNAL +struct gc_finalizer_state* gc_make_finalizer_state(void); + +GC_INTERNAL +void gc_finalizer_init_internal(struct gc_finalizer *f, + struct gc_ref object, + struct gc_ref closure); + +GC_INTERNAL +void gc_finalizer_attach_internal(struct gc_finalizer_state *state, + struct gc_finalizer *f, + unsigned priority); + +GC_INTERNAL +void gc_finalizer_externally_activated(struct gc_finalizer *f); + +GC_INTERNAL +void gc_finalizer_externally_fired(struct gc_finalizer_state *state, + struct gc_finalizer *finalizer); + +GC_INTERNAL +struct gc_finalizer* gc_finalizer_state_pop(struct gc_finalizer_state *state); + +GC_INTERNAL +void gc_finalizer_fire(struct gc_finalizer **fired_list_loc, + struct gc_finalizer *finalizer); + +GC_INTERNAL +void gc_finalizer_state_set_callback(struct gc_finalizer_state *state, + gc_finalizer_callback callback); + +GC_INTERNAL +size_t gc_visit_finalizer_roots(struct gc_finalizer_state *state, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data); + +GC_INTERNAL +size_t gc_resolve_finalizers(struct gc_finalizer_state *state, + size_t priority, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data); + +GC_INTERNAL +void gc_notify_finalizers(struct gc_finalizer_state *state, + struct gc_heap *heap); + +#endif // GC_FINALIZER_INTERNAL_H diff --git a/libguile/whippet/src/gc-finalizer.c b/libguile/whippet/src/gc-finalizer.c new file mode 100644 index 000000000..ae795cccf --- /dev/null +++ b/libguile/whippet/src/gc-finalizer.c @@ -0,0 +1,307 @@ +#include +#include +#include +#include + +#define GC_IMPL 1 + +#include "debug.h" +#include "gc-embedder-api.h" +#include "gc-ephemeron-internal.h" // for gc_visit_ephemeron_key +#include "gc-finalizer-internal.h" + +// # Overview +// +// See gc-finalizer.h for a overview of finalizers from the user and +// embedder point of view. +// +// ## Tracing +// +// From the perspecive of the collector implementation, finalizers are +// GC-managed objects, allowing their size to be accounted for within +// the heap size. They get traced during collection, allowing for +// relocation of their object references, and allowing the finalizer +// object itself to be evacuated if appropriate. +// +// The collector holds on to outstanding finalizers in a *finalizer +// state*, which holds one *finalizer table* for each priority. We +// don't need to look up finalizers by object, so we could just hold +// them in a big list, but to facilitate parallelism we slice them +// across some number of shards, where the "next" pointer is part of the +// finalizer object. +// +// There are a number of ways you could imagine integrating finalizers +// into a system. The way Whippet does it goes like this. See +// https://wingolog.org/archives/2022/10/31/ephemerons-and-finalizers +// and +// https://wingolog.org/archives/2024/07/22/finalizers-guardians-phantom-references-et-cetera +// for some further discussion. +// +// 1. The collector should begin a cycle by adding all shards from all +// priorities to the root set. When the embedder comes across a +// finalizer (as it will, because we added them to the root set), +// it traces it via gc_trace_finalizer(), which will visit the +// finalizer's closure and its "next" pointer. +// +// 2. After the full trace, and then the fix-point on pending +// ephemerons, for each priority from 0 upwards: +// +// i. Visit each finalizable object in the table. If the object +// was as-yet unvisited, then it is unreachable and thus +// finalizable; the finalizer is added to the global "fired" +// list, and changes state from "attached" to "fired". +// Otherwise it is re-added to the finalizer table. +// +// ii. If any finalizer was added to the fired list, then those +// objects were also added to the grey worklist; run tracing +// again until the grey set is empty, including ephemerons. +// +// 3. Finally, call the finalizer callback if the list of fired finalizers is +// nonempty. +// +// ## Concurrency +// +// The finalizer table is wait-free. It keeps a count of active finalizers, and +// chooses a bucket based on the count modulo the number of buckets. Adding a +// finalizer to the table is an atomic push on a linked list. The table is +// completely rebuilt during the GC pause, redistributing survivor entries +// across the buckets, and pushing all finalizable entries onto the single +// "fired" linked list. +// +// The fired list is also wait-free. As noted above, it is built +// during the pause, and mutators pop items off of it atomically. +// +// ## Generations +// +// It would be ideal if a young generation had its own finalizer table. +// Promoting an object would require promoting its finalizer to the old +// finalizer table. Not yet implemented (but would be nice). + +#ifndef GC_EMBEDDER_FINALIZER_HEADER +#error Embedder should define GC_EMBEDDER_FINALIZER_HEADER +#endif + +enum finalizer_state { + FINALIZER_STATE_INIT = 0, // Finalizer is newborn. + FINALIZER_STATE_ACTIVE, // Finalizer is ours and in the finalizer table. + FINALIZER_STATE_FIRED, // Finalizer is handed back to mutator. +}; + +struct gc_finalizer { + GC_EMBEDDER_FINALIZER_HEADER + enum finalizer_state state; + struct gc_ref object; + struct gc_ref closure; + struct gc_finalizer *next; +}; + +// Enough buckets to parallelize closure marking. No need to look up a +// finalizer for a given object. +#define BUCKET_COUNT 32 + +struct gc_finalizer_table { + size_t finalizer_count; + struct gc_finalizer* buckets[BUCKET_COUNT]; +}; + +struct gc_finalizer_state { + gc_finalizer_callback have_finalizers; + struct gc_finalizer *fired; + size_t fired_this_cycle; + size_t table_count; + struct gc_finalizer_table tables[0]; +}; + +// public +size_t gc_finalizer_size(void) { return sizeof(struct gc_finalizer); } +struct gc_ref gc_finalizer_object(struct gc_finalizer *f) { return f->object; } +struct gc_ref gc_finalizer_closure(struct gc_finalizer *f) { return f->closure; } + +// internal +struct gc_finalizer_state* gc_make_finalizer_state(void) { + size_t ntables = gc_finalizer_priority_count(); + size_t size = (sizeof(struct gc_finalizer_state) + + sizeof(struct gc_finalizer_table) * ntables); + struct gc_finalizer_state *ret = malloc(size); + if (!ret) + return NULL; + memset(ret, 0, size); + ret->table_count = ntables; + return ret; +} + +static void finalizer_list_push(struct gc_finalizer **loc, + struct gc_finalizer *head) { + struct gc_finalizer *tail = atomic_load_explicit(loc, memory_order_acquire); + do { + head->next = tail; + } while (!atomic_compare_exchange_weak(loc, &tail, head)); +} + +static struct gc_finalizer* finalizer_list_pop(struct gc_finalizer **loc) { + struct gc_finalizer *head = atomic_load_explicit(loc, memory_order_acquire); + do { + if (!head) return NULL; + } while (!atomic_compare_exchange_weak(loc, &head, head->next)); + head->next = NULL; + return head; +} + +static void add_finalizer_to_table(struct gc_finalizer_table *table, + struct gc_finalizer *f) { + size_t count = atomic_fetch_add_explicit(&table->finalizer_count, 1, + memory_order_relaxed); + struct gc_finalizer **loc = &table->buckets[count % BUCKET_COUNT]; + finalizer_list_push(loc, f); +} + +// internal +void gc_finalizer_init_internal(struct gc_finalizer *f, + struct gc_ref object, + struct gc_ref closure) { + // Caller responsible for any write barrier, though really the + // assumption is that the finalizer is younger than the key and the + // value. + if (f->state != FINALIZER_STATE_INIT) + GC_CRASH(); + GC_ASSERT(gc_ref_is_null(f->object)); + f->object = object; + f->closure = closure; +} + +// internal +void gc_finalizer_attach_internal(struct gc_finalizer_state *state, + struct gc_finalizer *f, + unsigned priority) { + // Caller responsible for any write barrier, though really the + // assumption is that the finalizer is younger than the key and the + // value. + if (f->state != FINALIZER_STATE_INIT) + GC_CRASH(); + if (gc_ref_is_null(f->object)) + GC_CRASH(); + + f->state = FINALIZER_STATE_ACTIVE; + + GC_ASSERT(priority < state->table_count); + add_finalizer_to_table(&state->tables[priority], f); +} + +// internal +struct gc_finalizer* gc_finalizer_state_pop(struct gc_finalizer_state *state) { + return finalizer_list_pop(&state->fired); +} + +static void +add_fired_finalizer(struct gc_finalizer_state *state, + struct gc_finalizer *f) { + if (f->state != FINALIZER_STATE_ACTIVE) + GC_CRASH(); + f->state = FINALIZER_STATE_FIRED; + finalizer_list_push(&state->fired, f); +} + +// internal +void +gc_finalizer_externally_activated(struct gc_finalizer *f) { + if (f->state != FINALIZER_STATE_INIT) + GC_CRASH(); + f->state = FINALIZER_STATE_ACTIVE; +} + +// internal +void +gc_finalizer_externally_fired(struct gc_finalizer_state *state, + struct gc_finalizer *f) { + add_fired_finalizer(state, f); +} + +// internal +size_t gc_visit_finalizer_roots(struct gc_finalizer_state *state, + void (*visit)(struct gc_edge, + struct gc_heap*, + void *), + struct gc_heap *heap, + void *visit_data) { + size_t count = 0; + for (size_t tidx = 0; tidx < state->table_count; tidx++) { + struct gc_finalizer_table *table = &state->tables[tidx]; + if (table->finalizer_count) { + count += table->finalizer_count; + for (size_t bidx = 0; bidx < BUCKET_COUNT; bidx++) + visit(gc_edge(&table->buckets[bidx]), heap, visit_data); + } + } + visit(gc_edge(&state->fired), heap, visit_data); + return count; +} + +// public +void gc_trace_finalizer(struct gc_finalizer *f, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *trace_data) { + if (f->state != FINALIZER_STATE_ACTIVE) + visit(gc_edge(&f->object), heap, trace_data); + visit(gc_edge(&f->closure), heap, trace_data); + visit(gc_edge(&f->next), heap, trace_data); +} + +// Sweeping is currently serial. It could run in parallel but we want to +// resolve all finalizers before shading any additional node. Perhaps we should +// relax this restriction though; if the user attaches two finalizers to the +// same object, it's probably OK to only have one finalizer fire per cycle. + +// internal +size_t gc_resolve_finalizers(struct gc_finalizer_state *state, + size_t priority, + void (*visit)(struct gc_edge edge, + struct gc_heap *heap, + void *visit_data), + struct gc_heap *heap, + void *visit_data) { + GC_ASSERT(priority < state->table_count); + struct gc_finalizer_table *table = &state->tables[priority]; + size_t finalizers_fired = 0; + // Visit each finalizer in the table. If its object was already visited, + // re-add the finalizer to the table. Otherwise enqueue its object edge for + // tracing and mark the finalizer as fired. + if (table->finalizer_count) { + struct gc_finalizer_table scratch = { 0, }; + for (size_t bidx = 0; bidx < BUCKET_COUNT; bidx++) { + struct gc_finalizer *next; + for (struct gc_finalizer *f = table->buckets[bidx]; f; f = next) { + next = f->next; + f->next = NULL; + struct gc_edge edge = gc_edge(&f->object); + if (gc_visit_ephemeron_key(edge, heap)) { + add_finalizer_to_table(&scratch, f); + } else { + finalizers_fired++; + visit(edge, heap, visit_data); + add_fired_finalizer(state, f); + } + } + } + memcpy(table, &scratch, sizeof(*table)); + } + state->fired_this_cycle += finalizers_fired; + return finalizers_fired; +} + +// internal +void gc_notify_finalizers(struct gc_finalizer_state *state, + struct gc_heap *heap) { + if (state->fired_this_cycle && state->have_finalizers) { + state->have_finalizers(heap, state->fired_this_cycle); + state->fired_this_cycle = 0; + } +} + +// internal +void gc_finalizer_state_set_callback(struct gc_finalizer_state *state, + gc_finalizer_callback callback) { + state->have_finalizers = callback; +} diff --git a/libguile/whippet/src/gc-internal.h b/libguile/whippet/src/gc-internal.h new file mode 100644 index 000000000..715b72a99 --- /dev/null +++ b/libguile/whippet/src/gc-internal.h @@ -0,0 +1,16 @@ +#ifndef GC_INTERNAL_H +#define GC_INTERNAL_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include "gc-ephemeron-internal.h" +#include "gc-finalizer-internal.h" +#include "gc-options-internal.h" + +uint64_t gc_heap_total_bytes_allocated(struct gc_heap *heap); +void gc_mutator_adjust_heap_size(struct gc_mutator *mut, uint64_t new_size); + + +#endif // GC_INTERNAL_H diff --git a/libguile/whippet/src/gc-lock.h b/libguile/whippet/src/gc-lock.h new file mode 100644 index 000000000..89c5f4ac0 --- /dev/null +++ b/libguile/whippet/src/gc-lock.h @@ -0,0 +1,24 @@ +#ifndef GC_LOCK_H +#define GC_LOCK_H + +#include +#include "gc-assert.h" + +struct gc_lock { + pthread_mutex_t *lock; +}; + +static struct gc_lock +gc_lock_acquire(pthread_mutex_t *lock) { + pthread_mutex_lock(lock); + return (struct gc_lock){ lock }; +} + +static void +gc_lock_release(struct gc_lock *lock) { + GC_ASSERT(lock->lock); + pthread_mutex_unlock(lock->lock); + lock->lock = NULL; +} + +#endif // GC_LOCK_H diff --git a/libguile/whippet/src/gc-options-internal.h b/libguile/whippet/src/gc-options-internal.h new file mode 100644 index 000000000..9e9fbca22 --- /dev/null +++ b/libguile/whippet/src/gc-options-internal.h @@ -0,0 +1,32 @@ +#ifndef GC_OPTIONS_INTERNAL_H +#define GC_OPTIONS_INTERNAL_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include "gc-options.h" + +struct gc_common_options { + enum gc_heap_size_policy heap_size_policy; + size_t heap_size; + size_t maximum_heap_size; + double heap_size_multiplier; + double heap_expansiveness; + int parallelism; +}; + +GC_INTERNAL void gc_init_common_options(struct gc_common_options *options); + +GC_INTERNAL int gc_common_option_from_string(const char *str); + +GC_INTERNAL int gc_common_options_set_int(struct gc_common_options *options, + int option, int value); +GC_INTERNAL int gc_common_options_set_size(struct gc_common_options *options, + int option, size_t value); +GC_INTERNAL int gc_common_options_set_double(struct gc_common_options *options, + int option, double value); +GC_INTERNAL int gc_common_options_parse_and_set(struct gc_common_options *options, + int option, const char *value); + +#endif // GC_OPTIONS_INTERNAL_H diff --git a/libguile/whippet/src/gc-options.c b/libguile/whippet/src/gc-options.c new file mode 100644 index 000000000..31de02745 --- /dev/null +++ b/libguile/whippet/src/gc-options.c @@ -0,0 +1,198 @@ +#include +#include +#include +#include + +#define GC_IMPL 1 + +#include "gc-options-internal.h" +#include "gc-platform.h" + +// M(UPPER, lower, repr, type, parser, default, min, max) +#define FOR_EACH_INT_GC_OPTION(M) \ + M(HEAP_SIZE_POLICY, heap_size_policy, "heap-size-policy", \ + int, heap_size_policy, GC_HEAP_SIZE_FIXED, GC_HEAP_SIZE_FIXED, \ + GC_HEAP_SIZE_ADAPTIVE) \ + M(PARALLELISM, parallelism, "parallelism", \ + int, int, default_parallelism(), 1, 64) + +#define FOR_EACH_SIZE_GC_OPTION(M) \ + M(HEAP_SIZE, heap_size, "heap-size", \ + size, size, 6 * 1024 * 1024, 0, -1) \ + M(MAXIMUM_HEAP_SIZE, maximum_heap_size, "maximum-heap-size", \ + size, size, 0, 0, -1) + +#define FOR_EACH_DOUBLE_GC_OPTION(M) \ + M(HEAP_SIZE_MULTIPLIER, heap_size_multiplier, "heap-size-multiplier", \ + double, double, 1.75, 1.0, 1e6) \ + M(HEAP_EXPANSIVENESS, heap_expansiveness, "heap-expansiveness", \ + double, double, 1.0, 0.0, 50.0) + +typedef int gc_option_int; +typedef size_t gc_option_size; +typedef double gc_option_double; + +#define FOR_EACH_COMMON_GC_OPTION(M) \ + FOR_EACH_INT_GC_OPTION(M) \ + FOR_EACH_SIZE_GC_OPTION(M) \ + FOR_EACH_DOUBLE_GC_OPTION(M) + +static int clamp_int(int n, int lo, int hi) { + return n < lo ? lo : n > hi ? hi : n; +} +static size_t clamp_size(size_t n, size_t lo, size_t hi) { + return n < lo ? lo : n > hi ? hi : n; +} +static double clamp_double(double n, double lo, double hi) { + return n < lo ? lo : n > hi ? hi : n; +} + +static int default_parallelism(void) { + return clamp_int(gc_platform_processor_count(), 1, 8); +} + +void gc_init_common_options(struct gc_common_options *options) { +#define INIT(UPPER, lower, repr, type, parser, default, min, max) \ + options->lower = default; + FOR_EACH_COMMON_GC_OPTION(INIT) +#undef INIT +} + +int gc_common_option_from_string(const char *str) { +#define GET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \ + if (strcmp(str, repr) == 0) return GC_OPTION_##UPPER; + FOR_EACH_COMMON_GC_OPTION(GET_OPTION) +#undef GET_OPTION + return -1; +} + +#define SET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \ + case GC_OPTION_##UPPER: \ + if (value != clamp_##type(value, min, max)) return 0; \ + options->lower = value; \ + return 1; +#define DEFINE_SETTER(STEM, stem, type) \ + int gc_common_options_set_##stem(struct gc_common_options *options, \ + int option, type value) { \ + switch (option) { \ + FOR_EACH_##STEM##_GC_OPTION(SET_OPTION) \ + default: return 0; \ + } \ + } +DEFINE_SETTER(INT, int, int) +DEFINE_SETTER(SIZE, size, size_t) +DEFINE_SETTER(DOUBLE, double, double) +#undef SET_OPTION +#undef DEFINE_SETTER + +static int parse_size(const char *arg, size_t *val) { + char *end; + long i = strtol(arg, &end, 0); + if (i < 0 || i == LONG_MAX) return 0; + if (end == arg) return 0; + char delim = *end; + if (delim == 'k' || delim == 'K') + ++end, i *= 1024L; + else if (delim == 'm' || delim == 'M') + ++end, i *= 1024L * 1024L; + else if (delim == 'g' || delim == 'G') + ++end, i *= 1024L * 1024L * 1024L; + else if (delim == 't' || delim == 'T') + ++end, i *= 1024L * 1024L * 1024L * 1024L; + + if (*end != '\0') return 0; + *val = i; + return 1; +} + +static int parse_int(const char *arg, int *val) { + char *end; + long i = strtol(arg, &end, 0); + if (i == LONG_MIN || i == LONG_MAX || end == arg || *end) + return 0; + *val = i; + return 1; +} + +static int parse_heap_size_policy(const char *arg, int *val) { + if (strcmp(arg, "fixed") == 0) { + *val = GC_HEAP_SIZE_FIXED; + return 1; + } + if (strcmp(arg, "growable") == 0) { + *val = GC_HEAP_SIZE_GROWABLE; + return 1; + } + if (strcmp(arg, "adaptive") == 0) { + *val = GC_HEAP_SIZE_ADAPTIVE; + return 1; + } + return parse_int(arg, val); +} + +static int parse_double(const char *arg, double *val) { + char *end; + double d = strtod(arg, &end); + if (end == arg || *end) + return 0; + *val = d; + return 1; +} + +int gc_common_options_parse_and_set(struct gc_common_options *options, + int option, const char *value) { + switch (option) { +#define SET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \ + case GC_OPTION_##UPPER: { \ + gc_option_##type v; \ + if (!parse_##parser(value, &v)) return 0; \ + return gc_common_options_set_##type(options, option, v); \ + } + FOR_EACH_COMMON_GC_OPTION(SET_OPTION) + default: return 0; + } +} + +static int is_lower(char c) { return 'a' <= c && c <= 'z'; } +static int is_digit(char c) { return '0' <= c && c <= '9'; } +static int is_option(char c) { return is_lower(c) || c == '-'; } +static int is_option_end(char c) { return c == '='; } +static int is_value(char c) { + return is_lower(c) || is_digit(c) || c == '-' || c == '+' || c == '.'; +} +static int is_value_end(char c) { return c == '\0' || c == ','; } +static char* read_token(char *p, int (*is_tok)(char c), int (*is_end)(char c), + char *delim) { + char c; + for (c = *p; is_tok(c); c = *++p); + if (!is_end(c)) return NULL; + *delim = c; + *p = '\0'; + return p + 1; +} +int gc_options_parse_and_set_many(struct gc_options *options, + const char *str) { + if (!*str) return 1; + char *copy = strdup(str); + char *cur = copy; + int ret = 0; + while (1) { + char delim; + char *next = read_token(cur, is_option, is_option_end, &delim); + if (!next) break; + int option = gc_option_from_string(cur); + if (option < 0) break; + + cur = next; + next = read_token(cur, is_value, is_value_end, &delim); + if (!next) break; + if (!gc_options_parse_and_set(options, option, cur)) break; + cur = next; + if (delim == '\0') { + ret = 1; + break; + } + } + free(copy); + return ret; +} diff --git a/libguile/whippet/src/gc-platform-gnu-linux.c b/libguile/whippet/src/gc-platform-gnu-linux.c new file mode 100644 index 000000000..3ace1890d --- /dev/null +++ b/libguile/whippet/src/gc-platform-gnu-linux.c @@ -0,0 +1,211 @@ +// For pthread_getattr_np. +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#define GC_IMPL 1 + +#include "debug.h" +#include "gc-align.h" +#include "gc-assert.h" +#include "gc-inline.h" +#include "gc-platform.h" + +void gc_platform_init(void) { + // Nothing to do. +} + +static uintptr_t fallback_current_thread_stack_base(void) GC_NEVER_INLINE; +static uintptr_t fallback_current_thread_stack_base(void) { + // Sloppily assume that there are very few frames between us and the + // thread entry or main function, and that therefore we haven't + // consumed more than a page of stack; we can then just round up the + // stack pointer to the page boundary. + fprintf(stderr, + "Using fallback strategy to capture stack base for thread %p.\n", + (void*)pthread_self()); + int local; + uintptr_t hot = (uintptr_t)&local; + size_t page_size = getpagesize(); + return (hot + page_size) & ~(page_size - 1); +} + +uintptr_t gc_platform_current_thread_stack_base(void) { + pthread_t me = pthread_self(); + pthread_attr_t attr; + int err = pthread_getattr_np(me, &attr); + if (err) { + errno = err; + // This case can occur for the main thread when running in a + // filesystem without /proc/stat. + perror("Failed to capture stack base via pthread_getattr_np"); + return fallback_current_thread_stack_base(); + } + + void *stack_low_addr; + size_t stack_size; + err = pthread_attr_getstack(&attr, &stack_low_addr, &stack_size); + pthread_attr_destroy(&attr); + if (err) { + // Should never occur. + errno = err; + perror("pthread_attr_getstack"); + return fallback_current_thread_stack_base(); + } + + return (uintptr_t)stack_low_addr + stack_size; +} + +struct visit_data { + void (*f)(uintptr_t start, uintptr_t end, struct gc_heap *heap, void *data); + struct gc_heap *heap; + void *data; +}; + +static int visit_roots(struct dl_phdr_info *info, size_t size, void *data) { + struct visit_data *visit_data = data; + uintptr_t object_addr = info->dlpi_addr; + const char *object_name = info->dlpi_name; + const ElfW(Phdr) *program_headers = info->dlpi_phdr; + size_t program_headers_count = info->dlpi_phnum; + + // From the loader's perspective, an ELF image is broken up into + // "segments", each of which is described by a "program header". + // Treat all writable data segments as potential edges into the + // GC-managed heap. + // + // Note that there are some RELRO segments which are initially + // writable but then remapped read-only. BDW-GC will exclude these, + // but we just punt for the time being and treat them as roots + for (size_t i = 0; i < program_headers_count; i++) { + const ElfW(Phdr) *p = &program_headers[i]; + if (p->p_type == PT_LOAD && (p->p_flags & PF_W)) { + uintptr_t start = p->p_vaddr + object_addr; + uintptr_t end = start + p->p_memsz; + DEBUG("found roots for '%s': [%p,%p)\n", object_name, + (void*)start, (void*)end); + visit_data->f(start, end, visit_data->heap, visit_data->data); + } + } + + return 0; +} + +void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start, + uintptr_t end, + struct gc_heap*, + void *data), + struct gc_heap *heap, + void *data) { + struct visit_data visit_data = { f, heap, data }; + dl_iterate_phdr(visit_roots, &visit_data); +} + +int gc_platform_processor_count(void) { + cpu_set_t set; + if (sched_getaffinity(0, sizeof (set), &set) != 0) + return 1; + return CPU_COUNT(&set); +} + +uint64_t gc_platform_monotonic_nanoseconds(void) { + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + GC_CRASH(); + uint64_t s = ts.tv_sec; + uint64_t ns = ts.tv_nsec; + uint64_t ns_per_sec = 1000000000; + return s * ns_per_sec + ns; +} + +size_t gc_platform_page_size(void) { + return getpagesize(); +} + +struct gc_reservation gc_platform_reserve_memory(size_t size, + size_t alignment) { + GC_ASSERT_EQ(size, align_down(size, getpagesize())); + GC_ASSERT_EQ(alignment & (alignment - 1), 0); + GC_ASSERT_EQ(alignment, align_down(alignment, getpagesize())); + + size_t extent = size + alignment; + void *mem = mmap(NULL, extent, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED) { + perror("failed to reserve address space"); + GC_CRASH(); + } + + uintptr_t base = (uintptr_t) mem; + uintptr_t end = base + extent; + uintptr_t aligned_base = alignment ? align_up(base, alignment) : base; + uintptr_t aligned_end = aligned_base + size; + + if (aligned_base - base) + munmap((void*)base, aligned_base - base); + if (end - aligned_end) + munmap((void*)aligned_end, end - aligned_end); + + return (struct gc_reservation){aligned_base, size}; +} + +void* +gc_platform_acquire_memory_from_reservation(struct gc_reservation reservation, + size_t offset, size_t size) { + GC_ASSERT_EQ(size, align_down(size, getpagesize())); + GC_ASSERT(size <= reservation.size); + GC_ASSERT(offset <= reservation.size - size); + + void *mem = mmap((void*)(reservation.base + offset), size, + PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (mem == MAP_FAILED) { + perror("mmap failed"); + return NULL; + } + + return mem; +} + +void +gc_platform_release_reservation(struct gc_reservation reservation) { + if (munmap((void*)reservation.base, reservation.size) != 0) + perror("failed to unmap memory"); +} + +void* +gc_platform_acquire_memory(size_t size, size_t alignment) { + struct gc_reservation reservation = + gc_platform_reserve_memory(size, alignment); + return gc_platform_acquire_memory_from_reservation(reservation, 0, size); +} + +void gc_platform_release_memory(void *ptr, size_t size) { + GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize())); + GC_ASSERT_EQ(size, align_down(size, getpagesize())); + if (munmap(ptr, size) != 0) + perror("failed to unmap memory"); +} + +int gc_platform_populate_memory(void *ptr, size_t size) { + GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize())); + GC_ASSERT_EQ(size, align_down(size, getpagesize())); + if (madvise(ptr, size, MADV_WILLNEED) == 0) + return 1; + perror("failed to populate memory"); + return 0; +} + +int gc_platform_discard_memory(void *ptr, size_t size) { + GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize())); + GC_ASSERT_EQ(size, align_down(size, getpagesize())); + if (madvise(ptr, size, MADV_DONTNEED) == 0) + return 1; + perror("failed to discard memory"); + return 0; +} diff --git a/libguile/whippet/src/gc-platform.h b/libguile/whippet/src/gc-platform.h new file mode 100644 index 000000000..b642e8157 --- /dev/null +++ b/libguile/whippet/src/gc-platform.h @@ -0,0 +1,48 @@ +#ifndef GC_PLATFORM_H +#define GC_PLATFORM_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include + +#include "gc-visibility.h" + +struct gc_heap; + +GC_INTERNAL void gc_platform_init(void); +GC_INTERNAL uintptr_t gc_platform_current_thread_stack_base(void); +GC_INTERNAL +void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start, + uintptr_t end, + struct gc_heap *heap, + void *data), + struct gc_heap *heap, + void *data); +GC_INTERNAL int gc_platform_processor_count(void); +GC_INTERNAL uint64_t gc_platform_monotonic_nanoseconds(void); + +GC_INTERNAL size_t gc_platform_page_size(void); + +struct gc_reservation { + uintptr_t base; + size_t size; +}; + +GC_INTERNAL +struct gc_reservation gc_platform_reserve_memory(size_t size, size_t alignment); +GC_INTERNAL +void* +gc_platform_acquire_memory_from_reservation(struct gc_reservation reservation, + size_t offset, size_t size); +GC_INTERNAL +void gc_platform_release_reservation(struct gc_reservation reservation); + +GC_INTERNAL void* gc_platform_acquire_memory(size_t size, size_t alignment); +GC_INTERNAL void gc_platform_release_memory(void *base, size_t size); + +GC_INTERNAL int gc_platform_populate_memory(void *addr, size_t size); +GC_INTERNAL int gc_platform_discard_memory(void *addr, size_t size); + +#endif // GC_PLATFORM_H diff --git a/libguile/whippet/src/gc-stack.c b/libguile/whippet/src/gc-stack.c new file mode 100644 index 000000000..318f5757f --- /dev/null +++ b/libguile/whippet/src/gc-stack.c @@ -0,0 +1,92 @@ +// For pthread_getattr_np. +#define _GNU_SOURCE +#include +#include +#include +#include + +#define GC_IMPL 1 + +#include "debug.h" +#include "gc-align.h" +#include "gc-assert.h" +#include "gc-inline.h" +#include "gc-platform.h" +#include "gc-stack.h" + +static uintptr_t current_thread_hot_stack_addr(void) { +#ifdef __GNUC__ + return (uintptr_t)__builtin_frame_address(0); +#else + uintptr_t local; + return (uintptr_t)&local; +#endif +} + +// FIXME: check platform stack growth direction. +#define HOTTER_THAN <= + +static void capture_current_thread_hot_stack_addr(struct gc_stack_addr *addr) { + addr->addr = current_thread_hot_stack_addr(); +} + +static void capture_current_thread_cold_stack_addr(struct gc_stack_addr *addr) { + addr->addr = gc_platform_current_thread_stack_base(); +} + +void gc_stack_init(struct gc_stack *stack, struct gc_stack_addr *base) { + if (base) + stack->cold = *base; + else + capture_current_thread_cold_stack_addr(&stack->cold); + stack->hot = stack->cold; +} + +void gc_stack_capture_hot(struct gc_stack *stack) { + capture_current_thread_hot_stack_addr(&stack->hot); + setjmp(stack->registers); + GC_ASSERT(stack->hot.addr HOTTER_THAN stack->cold.addr); +} + +static void* call_with_stack(void* (*)(struct gc_stack_addr*, void*), + struct gc_stack_addr*, void*) GC_NEVER_INLINE; +static void* call_with_stack(void* (*f)(struct gc_stack_addr *, void *), + struct gc_stack_addr *addr, void *arg) { + return f(addr, arg); +} +void* gc_call_with_stack_addr(void* (*f)(struct gc_stack_addr *base, + void *arg), + void *arg) { + struct gc_stack_addr base; + capture_current_thread_hot_stack_addr(&base); + return call_with_stack(f, &base, arg); +} + +void gc_stack_visit(struct gc_stack *stack, + void (*visit)(uintptr_t low, uintptr_t high, + struct gc_heap *heap, void *data), + struct gc_heap *heap, + void *data) { + { + uintptr_t low = (uintptr_t)stack->registers; + GC_ASSERT(low == align_down(low, sizeof(uintptr_t))); + uintptr_t high = low + sizeof(jmp_buf); + DEBUG("found mutator register roots for %p: [%p,%p)\n", stack, + (void*)low, (void*)high); + visit(low, high, heap, data); + } + + if (0 HOTTER_THAN 1) { + DEBUG("found mutator stack roots for %p: [%p,%p)\n", stack, + (void*)stack->hot.addr, (void*)stack->cold.addr); + visit(align_up(stack->hot.addr, sizeof(uintptr_t)), + align_down(stack->cold.addr, sizeof(uintptr_t)), + heap, data); + } else { + DEBUG("found mutator stack roots for %p: [%p,%p)\n", stack, + (void*)stack->cold.addr, (void*)stack->hot.addr); + visit(align_up(stack->cold.addr, sizeof(uintptr_t)), + align_down(stack->hot.addr, sizeof(uintptr_t)), + heap, data); + } +} diff --git a/libguile/whippet/src/gc-stack.h b/libguile/whippet/src/gc-stack.h new file mode 100644 index 000000000..15df9df6d --- /dev/null +++ b/libguile/whippet/src/gc-stack.h @@ -0,0 +1,33 @@ +#ifndef GC_STACK_H +#define GC_STACK_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include "gc-inline.h" +#include + +struct gc_stack_addr { + uintptr_t addr; +}; + +struct gc_stack { + struct gc_stack_addr cold; + struct gc_stack_addr hot; + jmp_buf registers; +}; + +struct gc_heap; + +GC_INTERNAL void gc_stack_init(struct gc_stack *stack, + struct gc_stack_addr *base); +GC_INTERNAL void gc_stack_capture_hot(struct gc_stack *stack); +GC_INTERNAL void gc_stack_visit(struct gc_stack *stack, + void (*visit)(uintptr_t low, uintptr_t high, + struct gc_heap *heap, + void *data), + struct gc_heap *heap, + void *data); + +#endif // GC_STACK_H diff --git a/libguile/whippet/src/gc-trace.h b/libguile/whippet/src/gc-trace.h new file mode 100644 index 000000000..cc1dd2808 --- /dev/null +++ b/libguile/whippet/src/gc-trace.h @@ -0,0 +1,56 @@ +#ifndef GC_TRACE_H +#define GC_TRACE_H + +#ifndef GC_IMPL +#error internal header file, not part of API +#endif + +#include "gc-config.h" +#include "gc-assert.h" +#include "gc-conservative-ref.h" +#include "gc-embedder-api.h" + +static inline int gc_has_mutator_conservative_roots(void) { + return GC_CONSERVATIVE_ROOTS; +} +static inline int gc_mutator_conservative_roots_may_be_interior(void) { + return 1; +} +static inline int gc_has_global_conservative_roots(void) { + return GC_CONSERVATIVE_ROOTS; +} +static inline int gc_has_conservative_intraheap_edges(void) { + return GC_CONSERVATIVE_TRACE; +} + +static inline int gc_has_conservative_roots(void) { + return gc_has_mutator_conservative_roots() || + gc_has_global_conservative_roots(); +} + +enum gc_trace_kind { + GC_TRACE_PRECISELY, + GC_TRACE_NONE, + GC_TRACE_CONSERVATIVELY, + GC_TRACE_EPHEMERON, +}; + +struct gc_trace_plan { + enum gc_trace_kind kind; + size_t size; // For conservative tracing. +}; + +static inline int +gc_conservative_ref_might_be_a_heap_object(struct gc_conservative_ref ref, + int possibly_interior) { + // Assume that the minimum page size is 4096, and that the first page + // will contain no heap objects. + if (gc_conservative_ref_value(ref) < 4096) + return 0; + if (possibly_interior) + return 1; + return gc_is_valid_conservative_ref_displacement + (gc_conservative_ref_value(ref) & (sizeof(uintptr_t) - 1)); +} + +#endif // GC_TRACE_H diff --git a/libguile/whippet/src/gc-tracepoint.c b/libguile/whippet/src/gc-tracepoint.c new file mode 100644 index 000000000..aa8ebc4a1 --- /dev/null +++ b/libguile/whippet/src/gc-tracepoint.c @@ -0,0 +1,6 @@ +#include +#ifdef GC_TRACEPOINT_LTTNG +#define LTTNG_UST_TRACEPOINT_DEFINE +#define LTTNG_UST_TRACEPOINT_CREATE_PROBES +#include "gc-lttng.h" +#endif // GC_TRACEPOINT_LTTNG diff --git a/libguile/whippet/src/growable-heap-sizer.h b/libguile/whippet/src/growable-heap-sizer.h new file mode 100644 index 000000000..49e5ad377 --- /dev/null +++ b/libguile/whippet/src/growable-heap-sizer.h @@ -0,0 +1,59 @@ +#ifndef GROWABLE_HEAP_SIZER_H +#define GROWABLE_HEAP_SIZER_H + +#include +#include +#include + +#include "assert.h" +#include "heap-sizer.h" + +// This is a simple heap-sizing algorithm that will grow the heap if it is +// smaller than a given multiplier of the live data size. It does not shrink +// the heap. + +struct gc_growable_heap_sizer { + struct gc_heap *heap; + double multiplier; + pthread_mutex_t lock; +}; + +static void +gc_growable_heap_sizer_set_multiplier(struct gc_growable_heap_sizer *sizer, + double multiplier) { + pthread_mutex_lock(&sizer->lock); + sizer->multiplier = multiplier; + pthread_mutex_unlock(&sizer->lock); +} + +static void +gc_growable_heap_sizer_on_gc(struct gc_growable_heap_sizer *sizer, + size_t heap_size, size_t live_bytes, + uint64_t pause_ns, + void (*set_heap_size)(struct gc_heap*, size_t)) { + pthread_mutex_lock(&sizer->lock); + size_t target_size = live_bytes * sizer->multiplier; + if (target_size > heap_size) + set_heap_size(sizer->heap, target_size); + pthread_mutex_unlock(&sizer->lock); +} + +static struct gc_growable_heap_sizer* +gc_make_growable_heap_sizer(struct gc_heap *heap, double multiplier) { + struct gc_growable_heap_sizer *sizer; + sizer = malloc(sizeof(*sizer)); + if (!sizer) + GC_CRASH(); + memset(sizer, 0, sizeof(*sizer)); + sizer->heap = heap; + sizer->multiplier = multiplier; + pthread_mutex_init(&sizer->lock, NULL); + return sizer; +} + +static void +gc_destroy_growable_heap_sizer(struct gc_growable_heap_sizer *sizer) { + free(sizer); +} + +#endif // GROWABLE_HEAP_SIZER_H diff --git a/libguile/whippet/src/heap-sizer.h b/libguile/whippet/src/heap-sizer.h new file mode 100644 index 000000000..46ef841c8 --- /dev/null +++ b/libguile/whippet/src/heap-sizer.h @@ -0,0 +1,74 @@ +#ifndef HEAP_SIZER_H +#define HEAP_SIZER_H + +#include "gc-api.h" + +#include "gc-options-internal.h" +#include "growable-heap-sizer.h" +#include "adaptive-heap-sizer.h" + +struct gc_heap_sizer { + enum gc_heap_size_policy policy; + union { + struct gc_growable_heap_sizer* growable; + struct gc_adaptive_heap_sizer* adaptive; + }; +}; + +static struct gc_heap_sizer +gc_make_heap_sizer(struct gc_heap *heap, + const struct gc_common_options *options, + uint64_t (*get_allocation_counter_from_thread)(struct gc_heap*), + void (*set_heap_size_from_thread)(struct gc_heap*, size_t), + struct gc_background_thread *thread) { + struct gc_heap_sizer ret = { options->heap_size_policy, }; + switch (options->heap_size_policy) { + case GC_HEAP_SIZE_FIXED: + break; + + case GC_HEAP_SIZE_GROWABLE: + ret.growable = + gc_make_growable_heap_sizer(heap, options->heap_size_multiplier); + break; + + case GC_HEAP_SIZE_ADAPTIVE: + ret.adaptive = + gc_make_adaptive_heap_sizer (heap, options->heap_expansiveness, + get_allocation_counter_from_thread, + set_heap_size_from_thread, + thread); + break; + + default: + GC_CRASH(); + } + return ret; +} + +static void +gc_heap_sizer_on_gc(struct gc_heap_sizer sizer, size_t heap_size, + size_t live_bytes, size_t pause_ns, + void (*set_heap_size)(struct gc_heap*, size_t)) { + switch (sizer.policy) { + case GC_HEAP_SIZE_FIXED: + break; + + case GC_HEAP_SIZE_GROWABLE: + gc_growable_heap_sizer_on_gc(sizer.growable, heap_size, live_bytes, + pause_ns, set_heap_size); + break; + + case GC_HEAP_SIZE_ADAPTIVE: + if (sizer.adaptive->background_task_id < 0) + gc_adaptive_heap_sizer_background_task(sizer.adaptive); + gc_adaptive_heap_sizer_on_gc(sizer.adaptive, live_bytes, pause_ns, + set_heap_size); + break; + + default: + GC_CRASH(); + } +} + + +#endif // HEAP_SIZER_H diff --git a/libguile/whippet/src/large-object-space.h b/libguile/whippet/src/large-object-space.h new file mode 100644 index 000000000..cdd798343 --- /dev/null +++ b/libguile/whippet/src/large-object-space.h @@ -0,0 +1,525 @@ +#ifndef LARGE_OBJECT_SPACE_H +#define LARGE_OBJECT_SPACE_H + +#include +#include +#include +#include +#include +#include + +#include "gc-assert.h" +#include "gc-ref.h" +#include "gc-conservative-ref.h" +#include "gc-trace.h" +#include "address-map.h" +#include "address-set.h" +#include "background-thread.h" +#include "freelist.h" + +// A mark-sweep space with generational support. + +struct gc_heap; + +enum large_object_state { + LARGE_OBJECT_NURSERY = 0, + LARGE_OBJECT_MARKED_BIT = 1, + LARGE_OBJECT_MARK_TOGGLE_BIT = 2, + LARGE_OBJECT_MARK_0 = LARGE_OBJECT_MARKED_BIT, + LARGE_OBJECT_MARK_1 = LARGE_OBJECT_MARKED_BIT | LARGE_OBJECT_MARK_TOGGLE_BIT +}; + +struct large_object { + uintptr_t addr; + size_t size; +}; +struct large_object_node; +struct large_object_live_data { + uint8_t mark; + enum gc_trace_kind trace; +}; +struct large_object_dead_data { + uint8_t age; + struct large_object_node **prev; + struct large_object_node *next; +}; +struct large_object_data { + uint8_t is_live; + union { + struct large_object_live_data live; + struct large_object_dead_data dead; + }; +}; + +#define SPLAY_TREE_PREFIX large_object_ +typedef struct large_object large_object_key_span; +typedef uintptr_t large_object_key; +typedef struct large_object_data large_object_value; +static inline int +large_object_compare(uintptr_t addr, struct large_object obj) { + if (addr < obj.addr) return -1; + if (addr - obj.addr < obj.size) return 0; + return 1; +} +static inline uintptr_t +large_object_span_start(struct large_object obj) { + return obj.addr; +} +#include "splay-tree.h" + +DEFINE_FREELIST(large_object_freelist, sizeof(uintptr_t) * 8 - 1, 2, + struct large_object_node*); + +struct large_object_space { + // Lock for object_map, quarantine, nursery, and marked. + pthread_mutex_t lock; + // Lock for object_tree. + pthread_mutex_t object_tree_lock; + // Lock for remembered_edges. + pthread_mutex_t remembered_edges_lock; + // Locking order: You must hold the space lock when taking + // object_tree_lock. Take no other lock while holding + // object_tree_lock. remembered_edges_lock is a leaf; take no locks + // when holding it. + + // The value for a large_object_node's "mark" field indicating a + // marked object; always nonzero, and alternating between two values + // at every major GC. + uint8_t marked; + + // Splay tree of objects, keyed by tuple. Useful when + // looking up object-for-address. + struct large_object_tree object_tree; + + // Hash table of objects, where values are pointers to splay tree + // nodes. Useful when you have the object address and just want to + // check something about it (for example its size). + struct address_map object_map; + + // In generational configurations, we collect all allocations in the + // last cycle into the nursery. + struct address_map nursery; + + // Size-segregated freelist of dead objects. Allocations are first + // served from the quarantine freelist before falling back to the OS + // if needed. Collected objects spend a second or two in quarantine + // before being returned to the OS. This is an optimization to avoid + // mucking about too much with the TLB and so on. + struct large_object_freelist quarantine; + + // Set of edges from lospace that may reference young objects, + // possibly in other spaces. + struct address_set remembered_edges; + + size_t page_size; + size_t page_size_log2; + size_t total_pages; + size_t free_pages; + size_t live_pages_at_last_collection; + size_t pages_freed_by_last_collection; + int synchronous_release; +}; + +static size_t +large_object_space_npages(struct large_object_space *space, size_t bytes) { + return (bytes + space->page_size - 1) >> space->page_size_log2; +} + +static size_t +large_object_space_size_at_last_collection(struct large_object_space *space) { + return space->live_pages_at_last_collection << space->page_size_log2; +} + +static inline int +large_object_space_contains_with_lock(struct large_object_space *space, + struct gc_ref ref) { + return address_map_contains(&space->object_map, gc_ref_value(ref)); +} + +static inline int +large_object_space_contains(struct large_object_space *space, + struct gc_ref ref) { + pthread_mutex_lock(&space->lock); + int ret = large_object_space_contains_with_lock(space, ref); + pthread_mutex_unlock(&space->lock); + return ret; +} + +static inline struct gc_ref +large_object_space_object_containing_edge(struct large_object_space *space, + struct gc_edge edge) { + pthread_mutex_lock(&space->object_tree_lock); + struct large_object_node *node = + large_object_tree_lookup(&space->object_tree, gc_edge_address(edge)); + uintptr_t addr = (node && node->value.is_live) ? node->key.addr : 0; + pthread_mutex_unlock(&space->object_tree_lock); + return gc_ref(addr); +} + +static void +large_object_space_start_gc(struct large_object_space *space, int is_minor_gc) { + // Take the space lock to prevent + // large_object_space_process_quarantine from concurrently mutating + // the object map. + pthread_mutex_lock(&space->lock); + if (!is_minor_gc) { + space->marked ^= LARGE_OBJECT_MARK_TOGGLE_BIT; + space->live_pages_at_last_collection = 0; + } +} + +static inline struct gc_trace_plan +large_object_space_object_trace_plan(struct large_object_space *space, + struct gc_ref ref) { + uintptr_t node_bits = + address_map_lookup(&space->object_map, gc_ref_value(ref), 0); + GC_ASSERT(node_bits); + struct large_object_node *node = (struct large_object_node*) node_bits; + switch (node->value.live.trace) { + case GC_TRACE_PRECISELY: + return (struct gc_trace_plan){ GC_TRACE_PRECISELY, }; + case GC_TRACE_NONE: + return (struct gc_trace_plan){ GC_TRACE_NONE, }; +#if GC_CONSERVATIVE_TRACE + case GC_TRACE_CONSERVATIVELY: { + return (struct gc_trace_plan){ GC_TRACE_CONSERVATIVELY, node->key.size }; + } + // No large ephemerons. +#endif + default: + GC_CRASH(); + } +} + +static uint8_t* +large_object_node_mark_loc(struct large_object_node *node) { + GC_ASSERT(node->value.is_live); + return &node->value.live.mark; +} + +static uint8_t +large_object_node_get_mark(struct large_object_node *node) { + return atomic_load_explicit(large_object_node_mark_loc(node), + memory_order_acquire); +} + +static struct large_object_node* +large_object_space_lookup(struct large_object_space *space, struct gc_ref ref) { + return (struct large_object_node*) address_map_lookup(&space->object_map, + gc_ref_value(ref), + 0); +} + +static int +large_object_space_mark(struct large_object_space *space, struct gc_ref ref) { + struct large_object_node *node = large_object_space_lookup(space, ref); + if (!node) + return 0; + GC_ASSERT(node->value.is_live); + + uint8_t *loc = large_object_node_mark_loc(node); + uint8_t mark = atomic_load_explicit(loc, memory_order_relaxed); + do { + if (mark == space->marked) + return 0; + } while (!atomic_compare_exchange_weak_explicit(loc, &mark, space->marked, + memory_order_acq_rel, + memory_order_acquire)); + + size_t pages = node->key.size >> space->page_size_log2; + atomic_fetch_add(&space->live_pages_at_last_collection, pages); + + return 1; +} + +static int +large_object_space_is_marked(struct large_object_space *space, + struct gc_ref ref) { + struct large_object_node *node = large_object_space_lookup(space, ref); + if (!node) + return 0; + GC_ASSERT(node->value.is_live); + + return atomic_load_explicit(large_object_node_mark_loc(node), + memory_order_acquire) == space->marked; +} + +static int +large_object_space_is_survivor(struct large_object_space *space, + struct gc_ref ref) { + GC_ASSERT(large_object_space_contains(space, ref)); + pthread_mutex_lock(&space->lock); + int old = large_object_space_is_marked(space, ref); + pthread_mutex_unlock(&space->lock); + return old; +} + +static int +large_object_space_remember_edge(struct large_object_space *space, + struct gc_ref obj, + struct gc_edge edge) { + GC_ASSERT(large_object_space_contains(space, obj)); + if (!large_object_space_is_survivor(space, obj)) + return 0; + + uintptr_t edge_addr = gc_edge_address(edge); + int remembered = 0; + pthread_mutex_lock(&space->remembered_edges_lock); + if (!address_set_contains(&space->remembered_edges, edge_addr)) { + address_set_add(&space->remembered_edges, edge_addr); + remembered = 1; + } + pthread_mutex_unlock(&space->remembered_edges_lock); + return remembered; +} + +static void +large_object_space_forget_edge(struct large_object_space *space, + struct gc_edge edge) { + uintptr_t edge_addr = gc_edge_address(edge); + pthread_mutex_lock(&space->remembered_edges_lock); + GC_ASSERT(address_set_contains(&space->remembered_edges, edge_addr)); + address_set_remove(&space->remembered_edges, edge_addr); + pthread_mutex_unlock(&space->remembered_edges_lock); +} + +static void +large_object_space_clear_remembered_edges(struct large_object_space *space) { + address_set_clear(&space->remembered_edges); +} + +static void +large_object_space_add_to_freelist(struct large_object_space *space, + struct large_object_node *node) { + node->value.is_live = 0; + struct large_object_dead_data *data = &node->value.dead; + memset(data, 0, sizeof(*data)); + data->age = 0; + struct large_object_node **bucket = + large_object_freelist_bucket(&space->quarantine, node->key.size); + data->next = *bucket; + if (data->next) + data->next->value.dead.prev = &data->next; + data->prev = bucket; + *bucket = node; +} + +static void +large_object_space_remove_from_freelist(struct large_object_space *space, + struct large_object_node *node) { + GC_ASSERT(!node->value.is_live); + struct large_object_dead_data *dead = &node->value.dead; + GC_ASSERT(dead->prev); + if (dead->next) + dead->next->value.dead.prev = dead->prev; + *dead->prev = dead->next; + dead->prev = NULL; + dead->next = NULL; +} + +static void +large_object_space_sweep_one(uintptr_t addr, uintptr_t node_bits, + void *data) { + struct large_object_space *space = data; + struct large_object_node *node = (struct large_object_node*) node_bits; + if (!node->value.is_live) + return; + GC_ASSERT(node->value.is_live); + uint8_t mark = atomic_load_explicit(large_object_node_mark_loc(node), + memory_order_acquire); + if (mark != space->marked) + large_object_space_add_to_freelist(space, node); +} + +static void +large_object_space_process_quarantine(void *data) { + struct large_object_space *space = data; + pthread_mutex_lock(&space->lock); + pthread_mutex_lock(&space->object_tree_lock); + for (size_t idx = 0; idx < large_object_freelist_num_size_classes(); idx++) { + struct large_object_node **link = &space->quarantine.buckets[idx]; + for (struct large_object_node *node = *link; node; node = *link) { + GC_ASSERT(!node->value.is_live); + if (++node->value.dead.age < 2) { + link = &node->value.dead.next; + } else { + struct large_object obj = node->key; + large_object_space_remove_from_freelist(space, node); + address_map_remove(&space->object_map, obj.addr); + large_object_tree_remove(&space->object_tree, obj.addr); + gc_platform_release_memory((void*)obj.addr, obj.size); + } + } + } + pthread_mutex_unlock(&space->object_tree_lock); + pthread_mutex_unlock(&space->lock); +} + +static void +large_object_space_finish_gc(struct large_object_space *space, + int is_minor_gc) { + if (GC_GENERATIONAL) { + address_map_for_each(is_minor_gc ? &space->nursery : &space->object_map, + large_object_space_sweep_one, + space); + address_map_clear(&space->nursery); + } else { + address_map_for_each(&space->object_map, + large_object_space_sweep_one, + space); + } + size_t free_pages = + space->total_pages - space->live_pages_at_last_collection; + space->pages_freed_by_last_collection = free_pages - space->free_pages; + space->free_pages = free_pages; + pthread_mutex_unlock(&space->lock); + if (space->synchronous_release) + large_object_space_process_quarantine(space); +} + +static void +large_object_space_add_to_allocation_counter(struct large_object_space *space, + uint64_t *counter) { + size_t pages = space->total_pages - space->free_pages; + pages -= space->live_pages_at_last_collection; + *counter += pages << space->page_size_log2; +} + +static inline struct gc_ref +large_object_space_mark_conservative_ref(struct large_object_space *space, + struct gc_conservative_ref ref, + int possibly_interior) { + uintptr_t addr = gc_conservative_ref_value(ref); + + if (!possibly_interior) { + // Addr not aligned on page boundary? Not a large object. + // Otherwise strip the displacement to obtain the true base address. + uintptr_t displacement = addr & (space->page_size - 1); + if (!gc_is_valid_conservative_ref_displacement(displacement)) + return gc_ref_null(); + addr -= displacement; + } + + struct large_object_node *node; + if (possibly_interior) { + pthread_mutex_lock(&space->object_tree_lock); + node = large_object_tree_lookup(&space->object_tree, addr); + pthread_mutex_unlock(&space->object_tree_lock); + } else { + node = large_object_space_lookup(space, gc_ref(addr)); + } + + if (node && node->value.is_live && + large_object_space_mark(space, gc_ref(node->key.addr))) + return gc_ref(node->key.addr); + + return gc_ref_null(); +} + +static void* +large_object_space_alloc(struct large_object_space *space, size_t npages, + enum gc_trace_kind trace) { + void *ret = NULL; + pthread_mutex_lock(&space->lock); + + size_t size = npages << space->page_size_log2; + for (size_t idx = large_object_freelist_size_class(size); + idx < large_object_freelist_num_size_classes(); + idx++) { + struct large_object_node *node = space->quarantine.buckets[idx]; + while (node && node->key.size < size) + node = node->value.dead.next; + if (node) { + // We found a suitable hole in quarantine. Unlink it from the + // freelist. + large_object_space_remove_from_freelist(space, node); + + // Mark the hole as live. + node->value.is_live = 1; + memset(&node->value.live, 0, sizeof(node->value.live)); + node->value.live.mark = LARGE_OBJECT_NURSERY; + node->value.live.trace = trace; + + // If the hole is actually too big, trim its tail. + if (node->key.size > size) { + struct large_object tail = {node->key.addr + size, node->key.size - size}; + struct large_object_data tail_value = {0,}; + node->key.size = size; + pthread_mutex_lock(&space->object_tree_lock); + struct large_object_node *tail_node = + large_object_tree_insert(&space->object_tree, tail, tail_value); + pthread_mutex_unlock(&space->object_tree_lock); + uintptr_t tail_node_bits = (uintptr_t)tail_node; + address_map_add(&space->object_map, tail_node->key.addr, + tail_node_bits); + large_object_space_add_to_freelist(space, tail_node); + } + + // Add the object to the nursery. + if (GC_GENERATIONAL) + address_map_add(&space->nursery, node->key.addr, (uintptr_t)node); + + space->free_pages -= npages; + ret = (void*)node->key.addr; + memset(ret, 0, size); + break; + } + } + + // If we didn't find anything in the quarantine, get fresh pages from the OS. + if (!ret) { + ret = gc_platform_acquire_memory(size, 0); + if (ret) { + uintptr_t addr = (uintptr_t)ret; + struct large_object k = { addr, size }; + struct large_object_data v = {0,}; + v.is_live = 1; + v.live.mark = LARGE_OBJECT_NURSERY; + v.live.trace = trace; + + pthread_mutex_lock(&space->object_tree_lock); + struct large_object_node *node = + large_object_tree_insert(&space->object_tree, k, v); + uintptr_t node_bits = (uintptr_t)node; + address_map_add(&space->object_map, addr, node_bits); + space->total_pages += npages; + pthread_mutex_unlock(&space->object_tree_lock); + } + } + + pthread_mutex_unlock(&space->lock); + return ret; +} + +static int +large_object_space_init(struct large_object_space *space, + struct gc_heap *heap, + struct gc_background_thread *thread) { + memset(space, 0, sizeof(*space)); + pthread_mutex_init(&space->lock, NULL); + pthread_mutex_init(&space->object_tree_lock, NULL); + pthread_mutex_init(&space->remembered_edges_lock, NULL); + + space->page_size = getpagesize(); + space->page_size_log2 = __builtin_ctz(space->page_size); + + space->marked = LARGE_OBJECT_MARK_0; + + large_object_tree_init(&space->object_tree); + address_map_init(&space->object_map); + address_map_init(&space->nursery); + large_object_freelist_init(&space->quarantine); + + address_set_init(&space->remembered_edges); + + if (thread) + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START, + large_object_space_process_quarantine, + space); + else + space->synchronous_release = 1; + + return 1; +} + +#endif // LARGE_OBJECT_SPACE_H diff --git a/libguile/whippet/src/local-worklist.h b/libguile/whippet/src/local-worklist.h new file mode 100644 index 000000000..8dcd3e20d --- /dev/null +++ b/libguile/whippet/src/local-worklist.h @@ -0,0 +1,59 @@ +#ifndef LOCAL_WORKLIST_H +#define LOCAL_WORKLIST_H + +#include "assert.h" + +#define LOCAL_WORKLIST_SIZE 1024 +#define LOCAL_WORKLIST_MASK (LOCAL_WORKLIST_SIZE - 1) +#define LOCAL_WORKLIST_SHARE_AMOUNT (LOCAL_WORKLIST_SIZE * 3 / 4) +struct local_worklist { + size_t read; + size_t write; + struct gc_ref data[LOCAL_WORKLIST_SIZE]; +}; + +static inline void +local_worklist_init(struct local_worklist *q) { + q->read = q->write = 0; +} +static inline void +local_worklist_poison(struct local_worklist *q) { + q->read = 0; q->write = LOCAL_WORKLIST_SIZE; +} +static inline size_t +local_worklist_size(struct local_worklist *q) { + return q->write - q->read; +} +static inline int +local_worklist_empty(struct local_worklist *q) { + return local_worklist_size(q) == 0; +} +static inline int +local_worklist_full(struct local_worklist *q) { + return local_worklist_size(q) >= LOCAL_WORKLIST_SIZE; +} +static inline void +local_worklist_push(struct local_worklist *q, struct gc_ref v) { + ASSERT(!local_worklist_full(q)); + q->data[q->write++ & LOCAL_WORKLIST_MASK] = v; +} +static inline struct gc_ref +local_worklist_pop(struct local_worklist *q) { + ASSERT(!local_worklist_empty(q)); + return q->data[q->read++ & LOCAL_WORKLIST_MASK]; +} + +static inline size_t +local_worklist_pop_many(struct local_worklist *q, struct gc_ref **objv, + size_t limit) { + size_t avail = local_worklist_size(q); + size_t read = q->read & LOCAL_WORKLIST_MASK; + size_t contig = LOCAL_WORKLIST_SIZE - read; + if (contig < avail) avail = contig; + if (limit < avail) avail = limit; + *objv = q->data + read; + q->read += avail; + return avail; +} + +#endif // LOCAL_WORKLIST_H diff --git a/libguile/whippet/src/mmc.c b/libguile/whippet/src/mmc.c new file mode 100644 index 000000000..661b7084b --- /dev/null +++ b/libguile/whippet/src/mmc.c @@ -0,0 +1,1266 @@ +#include +#include +#include +#include +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "background-thread.h" +#include "debug.h" +#include "field-set.h" +#include "gc-align.h" +#include "gc-inline.h" +#include "gc-platform.h" +#include "gc-stack.h" +#include "gc-trace.h" +#include "gc-tracepoint.h" +#include "heap-sizer.h" +#include "large-object-space.h" +#include "nofl-space.h" +#if GC_PARALLEL +#include "parallel-tracer.h" +#else +#include "serial-tracer.h" +#endif +#include "spin.h" +#include "mmc-attrs.h" + +#define LARGE_OBJECT_THRESHOLD 8192 + +struct gc_heap { + struct nofl_space nofl_space; + struct large_object_space large_object_space; + struct gc_extern_space *extern_space; + struct gc_field_set remembered_set; + size_t large_object_pages; + pthread_mutex_t lock; + pthread_cond_t collector_cond; + pthread_cond_t mutator_cond; + size_t size; + size_t total_allocated_bytes_at_last_gc; + size_t size_at_last_gc; + int collecting; + int check_pending_ephemerons; + struct gc_pending_ephemerons *pending_ephemerons; + struct gc_finalizer_state *finalizer_state; + enum gc_collection_kind gc_kind; + size_t mutator_count; + size_t paused_mutator_count; + size_t inactive_mutator_count; + struct gc_heap_roots *roots; + struct gc_mutator *mutators; + long count; + struct gc_tracer tracer; + double fragmentation_low_threshold; + double fragmentation_high_threshold; + double minor_gc_yield_threshold; + double major_gc_yield_threshold; + double minimum_major_gc_yield_threshold; + double pending_ephemerons_size_factor; + double pending_ephemerons_size_slop; + struct gc_background_thread *background_thread; + struct gc_heap_sizer sizer; + struct gc_event_listener event_listener; + void *event_listener_data; +}; + +#define HEAP_EVENT(heap, event, ...) do { \ + (heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) +#define MUTATOR_EVENT(mut, event, ...) do { \ + (mut)->heap->event_listener.event((mut)->event_listener_data, \ + ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) + +struct gc_mutator { + struct nofl_allocator allocator; + struct gc_field_set_writer logger; + struct gc_heap *heap; + struct gc_stack stack; + struct gc_mutator_roots *roots; + void *event_listener_data; + struct gc_mutator *next; + struct gc_mutator *prev; +}; + +struct gc_trace_worker_data { + struct nofl_allocator allocator; +}; + +static inline struct nofl_space* +heap_nofl_space(struct gc_heap *heap) { + return &heap->nofl_space; +} +static inline struct large_object_space* +heap_large_object_space(struct gc_heap *heap) { + return &heap->large_object_space; +} +static inline struct gc_extern_space* +heap_extern_space(struct gc_heap *heap) { + return heap->extern_space; +} +static inline struct gc_heap* +mutator_heap(struct gc_mutator *mutator) { + return mutator->heap; +} + +struct gc_heap* gc_mutator_heap(struct gc_mutator *mutator) { + return mutator_heap(mutator); +} +uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap) { + GC_CRASH(); +} +uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap) { + GC_CRASH(); +} + +static void +gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data), + struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker) { + struct gc_trace_worker_data data; + nofl_allocator_reset(&data.allocator); + f(tracer, heap, worker, &data); + nofl_allocator_finish(&data.allocator, heap_nofl_space(heap)); +} + +static inline int +do_trace(struct gc_heap *heap, struct gc_edge edge, struct gc_ref ref, + struct gc_trace_worker_data *data) { + if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref))) + return nofl_space_evacuate_or_mark_object(heap_nofl_space(heap), edge, ref, + &data->allocator); + else if (large_object_space_contains_with_lock(heap_large_object_space(heap), ref)) + return large_object_space_mark(heap_large_object_space(heap), ref); + else + return gc_extern_space_visit(heap_extern_space(heap), edge, ref); +} + +static inline int +trace_edge(struct gc_heap *heap, struct gc_edge edge, + struct gc_trace_worker_data *data) { + struct gc_ref ref = gc_edge_ref(edge); + if (gc_ref_is_null(ref) || gc_ref_is_immediate(ref)) + return 0; + + int is_new = do_trace(heap, edge, ref, data); + + if (is_new && + GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons, + memory_order_relaxed))) + gc_resolve_pending_ephemerons(ref, heap); + + return is_new; +} + +int +gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { + struct gc_ref ref = gc_edge_ref(edge); + GC_ASSERT(!gc_ref_is_null(ref)); + if (gc_ref_is_immediate(ref)) + return 1; + GC_ASSERT(gc_ref_is_heap_object(ref)); + struct nofl_space *nofl_space = heap_nofl_space(heap); + if (GC_LIKELY(nofl_space_contains(nofl_space, ref))) + return nofl_space_forward_or_mark_if_traced(nofl_space, edge, ref); + + struct large_object_space *lospace = heap_large_object_space(heap); + if (large_object_space_contains_with_lock(lospace, ref)) + return large_object_space_is_marked(lospace, ref); + + GC_CRASH(); +} + +static int +mutators_are_stopping(struct gc_heap *heap) { + return atomic_load_explicit(&heap->collecting, memory_order_relaxed); +} + +static inline void +heap_lock(struct gc_heap *heap) { + pthread_mutex_lock(&heap->lock); +} +static inline void +heap_unlock(struct gc_heap *heap) { + pthread_mutex_unlock(&heap->lock); +} + +// with heap lock +static inline int +all_mutators_stopped(struct gc_heap *heap) { + return heap->mutator_count == + heap->paused_mutator_count + heap->inactive_mutator_count; +} + +static void +add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + mut->heap = heap; + mut->event_listener_data = + heap->event_listener.mutator_added(heap->event_listener_data); + nofl_allocator_reset(&mut->allocator); + gc_field_set_writer_init(&mut->logger, &heap->remembered_set); + heap_lock(heap); + // We have no roots. If there is a GC currently in progress, we have + // nothing to add. Just wait until it's done. + while (mutators_are_stopping(heap)) + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + mut->next = mut->prev = NULL; + struct gc_mutator *tail = heap->mutators; + if (tail) { + mut->next = tail; + tail->prev = mut; + } + heap->mutators = mut; + heap->mutator_count++; + heap_unlock(heap); +} + +static void +remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); + MUTATOR_EVENT(mut, mutator_removed); + mut->heap = NULL; + heap_lock(heap); + heap->mutator_count--; + if (mut->next) + mut->next->prev = mut->prev; + if (mut->prev) + mut->prev->next = mut->next; + else + heap->mutators = mut->next; + // We have no roots. If there is a GC stop currently in progress, + // maybe tell the controller it can continue. + if (mutators_are_stopping(heap) && all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + heap_unlock(heap); +} + +void +gc_mutator_set_roots(struct gc_mutator *mut, struct gc_mutator_roots *roots) { + mut->roots = roots; +} +void +gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) { + heap->roots = roots; +} +void +gc_heap_set_extern_space(struct gc_heap *heap, struct gc_extern_space *space) { + heap->extern_space = space; +} + +static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap, + void *trace_data) GC_ALWAYS_INLINE; +static inline void +tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { + struct gc_trace_worker *worker = trace_data; + if (trace_edge(heap, edge, gc_trace_worker_data(worker))) + gc_trace_worker_enqueue(worker, gc_edge_ref(edge)); +} + +static inline int +trace_remembered_edge(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { + tracer_visit(edge, heap, trace_data); + // Keep the edge in the remembered set; we clear these in bulk later. + return 1; +} + +static inline struct gc_ref +do_trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref, + int possibly_interior) { + if (!gc_conservative_ref_might_be_a_heap_object(ref, possibly_interior)) + return gc_ref_null(); + + struct nofl_space *nofl_space = heap_nofl_space(heap); + if (GC_LIKELY(nofl_space_contains_conservative_ref(nofl_space, ref))) + return nofl_space_mark_conservative_ref(nofl_space, ref, possibly_interior); + + struct large_object_space *lospace = heap_large_object_space(heap); + return large_object_space_mark_conservative_ref(lospace, ref, + possibly_interior); +} + +static inline struct gc_ref +trace_conservative_ref(struct gc_heap *heap, struct gc_conservative_ref ref, + int possibly_interior) { + struct gc_ref ret = do_trace_conservative_ref(heap, ref, possibly_interior); + if (!gc_ref_is_null(ret)) { + if (GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons, + memory_order_relaxed))) + gc_resolve_pending_ephemerons(ret, heap); + } + + return ret; +} + +static inline void +tracer_trace_conservative_ref(struct gc_conservative_ref ref, + struct gc_heap *heap, + struct gc_trace_worker *worker, + int possibly_interior) { + struct gc_ref resolved = trace_conservative_ref(heap, ref, possibly_interior); + if (!gc_ref_is_null(resolved)) + gc_trace_worker_enqueue(worker, resolved); +} + +static inline struct gc_conservative_ref +load_conservative_ref(uintptr_t addr) { + GC_ASSERT((addr & (sizeof(uintptr_t) - 1)) == 0); + uintptr_t val; + memcpy(&val, (char*)addr, sizeof(uintptr_t)); + return gc_conservative_ref(val); +} + +static inline void +trace_conservative_edges(uintptr_t low, uintptr_t high, int possibly_interior, + struct gc_heap *heap, struct gc_trace_worker *worker) { + GC_ASSERT(low == align_down(low, sizeof(uintptr_t))); + GC_ASSERT(high == align_down(high, sizeof(uintptr_t))); + for (uintptr_t addr = low; addr < high; addr += sizeof(uintptr_t)) + tracer_trace_conservative_ref(load_conservative_ref(addr), heap, worker, + possibly_interior); +} + +static inline struct gc_trace_plan +trace_plan(struct gc_heap *heap, struct gc_ref ref) { + if (GC_LIKELY(nofl_space_contains(heap_nofl_space(heap), ref))) { + return nofl_space_object_trace_plan(heap_nofl_space(heap), ref); + } else { + return large_object_space_object_trace_plan(heap_large_object_space(heap), + ref); + } +} + +static inline void +trace_one(struct gc_ref ref, struct gc_heap *heap, + struct gc_trace_worker *worker) { + struct gc_trace_plan plan = trace_plan(heap, ref); + switch (plan.kind) { + case GC_TRACE_PRECISELY: + gc_trace_object(ref, tracer_visit, heap, worker, NULL); + break; + case GC_TRACE_NONE: + break; + case GC_TRACE_CONSERVATIVELY: { + // Intraheap edges are not interior. + uintptr_t addr = gc_ref_value(ref); + int possibly_interior = 0; + trace_conservative_edges(addr, addr + plan.size, possibly_interior, + heap, worker); + break; + } + case GC_TRACE_EPHEMERON: + gc_trace_ephemeron(gc_ref_heap_object(ref), tracer_visit, heap, + worker); + break; + default: + GC_CRASH(); + } +} + +static inline void +trace_root(struct gc_root root, struct gc_heap *heap, + struct gc_trace_worker *worker) { + switch (root.kind) { + case GC_ROOT_KIND_HEAP: + gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker); + break; + case GC_ROOT_KIND_MUTATOR: + gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker); + break; + case GC_ROOT_KIND_CONSERVATIVE_EDGES: + trace_conservative_edges(root.range.lo_addr, root.range.hi_addr, 0, + heap, worker); + break; + case GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES: + trace_conservative_edges(root.range.lo_addr, root.range.hi_addr, 1, + heap, worker); + break; + case GC_ROOT_KIND_RESOLVED_EPHEMERONS: + gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit, + heap, worker); + break; + case GC_ROOT_KIND_EDGE: + tracer_visit(root.edge, heap, worker); + break; + case GC_ROOT_KIND_EDGE_BUFFER: + gc_field_set_visit_edge_buffer(&heap->remembered_set, root.edge_buffer, + trace_remembered_edge, heap, worker); + break; + default: + GC_CRASH(); + } +} + +static void +request_mutators_to_stop(struct gc_heap *heap) { + GC_ASSERT(!mutators_are_stopping(heap)); + atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed); +} + +static void +allow_mutators_to_continue(struct gc_heap *heap) { + GC_ASSERT(mutators_are_stopping(heap)); + GC_ASSERT(all_mutators_stopped(heap)); + heap->paused_mutator_count--; + atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed); + GC_ASSERT(!mutators_are_stopping(heap)); + pthread_cond_broadcast(&heap->mutator_cond); +} + +static void +heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) { + size_t previous = heap->large_object_pages; + heap->large_object_pages = npages; + GC_ASSERT(npages <= previous); + size_t bytes = (previous - npages) << + heap_large_object_space(heap)->page_size_log2; + // If heap size is fixed, we won't need to allocate any more nofl blocks, as + // nothing uses paged-out blocks except large object allocation. But if the + // heap can grow, growth can consume nofl-space blocks that were paged out to + // allow for lospace allocations, which means that here we may need to + // allocate additional slabs. + nofl_space_expand(heap_nofl_space(heap), bytes); +} + +static void +wait_for_mutators_to_stop(struct gc_heap *heap) { + heap->paused_mutator_count++; + while (!all_mutators_stopped(heap)) + pthread_cond_wait(&heap->collector_cond, &heap->lock); +} + +static enum gc_collection_kind +pause_mutator_for_collection(struct gc_heap *heap, + struct gc_mutator *mut) GC_NEVER_INLINE; +static enum gc_collection_kind +pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) { + GC_ASSERT(mutators_are_stopping(heap)); + GC_ASSERT(!all_mutators_stopped(heap)); + MUTATOR_EVENT(mut, mutator_stopping); + MUTATOR_EVENT(mut, mutator_stopped); + heap->paused_mutator_count++; + enum gc_collection_kind collection_kind = heap->gc_kind; + if (all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + + do + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + while (mutators_are_stopping(heap)); + heap->paused_mutator_count--; + + MUTATOR_EVENT(mut, mutator_restarted); + return collection_kind; +} + +static void +resize_heap(struct gc_heap *heap, size_t new_size) { + if (new_size == heap->size) + return; + DEBUG("------ resizing heap\n"); + DEBUG("------ old heap size: %zu bytes\n", heap->size); + DEBUG("------ new heap size: %zu bytes\n", new_size); + if (new_size < heap->size) + nofl_space_shrink(heap_nofl_space(heap), heap->size - new_size); + else + nofl_space_expand(heap_nofl_space(heap), new_size - heap->size); + + heap->size = new_size; + HEAP_EVENT(heap, heap_resized, new_size); +} + +static double +heap_last_gc_yield(struct gc_heap *heap) { + size_t live_size = + nofl_space_live_size_at_last_collection(heap_nofl_space(heap)) + + large_object_space_size_at_last_collection(heap_large_object_space(heap)); + + if (live_size > heap->size_at_last_gc) + return 0; + return 1.0 - ((double) live_size) / heap->size_at_last_gc; +} + +static double +heap_fragmentation(struct gc_heap *heap) { + struct nofl_space *nofl_space = heap_nofl_space(heap); + size_t fragmentation = nofl_space_fragmentation(nofl_space); + return ((double)fragmentation) / heap->size; +} + +static size_t +heap_estimate_live_data_after_gc(struct gc_heap *heap, + size_t last_live_bytes, + double last_yield) { + size_t bytes = + nofl_space_estimate_live_bytes_after_gc(heap_nofl_space(heap), + last_yield) + + large_object_space_size_at_last_collection(heap_large_object_space(heap)); + if (bytes < last_live_bytes) + return last_live_bytes; + return bytes; +} + +static void +detect_out_of_memory(struct gc_heap *heap, uintptr_t allocation_since_last_gc) { + if (heap->sizer.policy != GC_HEAP_SIZE_FIXED) + return; + + if (allocation_since_last_gc > nofl_space_fragmentation(heap_nofl_space(heap))) + return; + + if (heap->gc_kind == GC_COLLECTION_MINOR) + return; + + // No allocation since last gc: out of memory. + fprintf(stderr, "ran out of space, heap size %zu\n", heap->size); + GC_CRASH(); +} + +static double +clamp_major_gc_yield_threshold(struct gc_heap *heap, double threshold) { + if (threshold < heap->minimum_major_gc_yield_threshold) + threshold = heap->minimum_major_gc_yield_threshold; + double one_block = NOFL_BLOCK_SIZE * 1.0 / heap->size; + if (threshold < one_block) + threshold = one_block; + return threshold; +} + +static enum gc_collection_kind +determine_collection_kind(struct gc_heap *heap, + enum gc_collection_kind requested) { + struct nofl_space *nofl_space = heap_nofl_space(heap); + enum gc_collection_kind previous_gc_kind = atomic_load(&heap->gc_kind); + enum gc_collection_kind gc_kind; + double yield = heap_last_gc_yield(heap); + double fragmentation = heap_fragmentation(heap); + ssize_t pending = atomic_load_explicit(&nofl_space->pending_unavailable_bytes, + memory_order_acquire); + + if (heap->count == 0) { + DEBUG("first collection is always major\n"); + gc_kind = GC_COLLECTION_MAJOR; + } else if (requested != GC_COLLECTION_ANY) { + DEBUG("user specifically requested collection kind %d\n", (int)requested); + gc_kind = requested; + } else if (pending > 0) { + DEBUG("evacuating due to need to reclaim %zd bytes\n", pending); + // During the last cycle, a large allocation could not find enough + // free blocks, and we decided not to expand the heap. Let's do an + // evacuating major collection to maximize the free block yield. + gc_kind = GC_COLLECTION_COMPACTING; + } else if (previous_gc_kind == GC_COLLECTION_COMPACTING + && fragmentation >= heap->fragmentation_low_threshold) { + DEBUG("continuing evacuation due to fragmentation %.2f%% > %.2f%%\n", + fragmentation * 100., + heap->fragmentation_low_threshold * 100.); + // For some reason, we already decided to compact in the past, + // and fragmentation hasn't yet fallen below a low-water-mark. + // Keep going. + gc_kind = GC_COLLECTION_COMPACTING; + } else if (fragmentation > heap->fragmentation_high_threshold) { + // Switch to evacuation mode if the heap is too fragmented. + DEBUG("triggering compaction due to fragmentation %.2f%% > %.2f%%\n", + fragmentation * 100., + heap->fragmentation_high_threshold * 100.); + gc_kind = GC_COLLECTION_COMPACTING; + } else if (previous_gc_kind == GC_COLLECTION_COMPACTING) { + // We were evacuating, but we're good now. Go back to minor + // collections. + DEBUG("returning to in-place collection, fragmentation %.2f%% < %.2f%%\n", + fragmentation * 100., + heap->fragmentation_low_threshold * 100.); + gc_kind = GC_GENERATIONAL ? GC_COLLECTION_MINOR : GC_COLLECTION_MAJOR; + } else if (!GC_GENERATIONAL) { + DEBUG("keeping on with major in-place GC\n"); + GC_ASSERT(previous_gc_kind == GC_COLLECTION_MAJOR); + gc_kind = GC_COLLECTION_MAJOR; + } else if (previous_gc_kind != GC_COLLECTION_MINOR) { + DEBUG("returning to minor collection\n"); + // Go back to minor collections. + gc_kind = GC_COLLECTION_MINOR; + } else if (yield < heap->major_gc_yield_threshold) { + DEBUG("collection yield too low, triggering major collection\n"); + // Nursery is getting tight; trigger a major GC. + gc_kind = GC_COLLECTION_MAJOR; + } else { + DEBUG("keeping on with minor GC\n"); + // Nursery has adequate space; keep trucking with minor GCs. + GC_ASSERT(previous_gc_kind == GC_COLLECTION_MINOR); + gc_kind = GC_COLLECTION_MINOR; + } + + if (gc_has_conservative_intraheap_edges() && + gc_kind == GC_COLLECTION_COMPACTING) { + DEBUG("welp. conservative heap scanning, no evacuation for you\n"); + gc_kind = GC_COLLECTION_MAJOR; + } + + // If this is the first in a series of minor collections, reset the + // threshold at which we should do a major GC. + if (gc_kind == GC_COLLECTION_MINOR && + previous_gc_kind != GC_COLLECTION_MINOR) { + double yield = heap_last_gc_yield(heap); + double threshold = yield * heap->minor_gc_yield_threshold; + double clamped = clamp_major_gc_yield_threshold(heap, threshold); + heap->major_gc_yield_threshold = clamped; + DEBUG("first minor collection at yield %.2f%%, threshold %.2f%%\n", + yield * 100., clamped * 100.); + } + + atomic_store(&heap->gc_kind, gc_kind); + return gc_kind; +} + +static void +enqueue_conservative_roots(uintptr_t low, uintptr_t high, + struct gc_heap *heap, void *data) { + int *possibly_interior = data; + gc_tracer_add_root(&heap->tracer, + gc_root_conservative_edges(low, high, *possibly_interior)); +} + +static int +enqueue_mutator_conservative_roots(struct gc_heap *heap) { + if (gc_has_mutator_conservative_roots()) { + int possibly_interior = gc_mutator_conservative_roots_may_be_interior(); + for (struct gc_mutator *mut = heap->mutators; + mut; + mut = mut->next) + gc_stack_visit(&mut->stack, enqueue_conservative_roots, heap, + &possibly_interior); + return 1; + } + return 0; +} + +static int +enqueue_global_conservative_roots(struct gc_heap *heap) { + if (gc_has_global_conservative_roots()) { + int possibly_interior = 0; + gc_platform_visit_global_conservative_roots + (enqueue_conservative_roots, heap, &possibly_interior); + return 1; + } + return 0; +} + +static int +enqueue_pinned_roots(struct gc_heap *heap) { + GC_ASSERT(!heap_nofl_space(heap)->evacuating); + int has_pinned_roots = enqueue_mutator_conservative_roots(heap); + has_pinned_roots |= enqueue_global_conservative_roots(heap); + return has_pinned_roots; +} + +static void +enqueue_root_edge(struct gc_edge edge, struct gc_heap *heap, void *unused) { + gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); +} + +static void +enqueue_generational_roots(struct gc_heap *heap, + enum gc_collection_kind gc_kind) { + if (!GC_GENERATIONAL) return; + if (gc_kind == GC_COLLECTION_MINOR) + gc_field_set_add_roots(&heap->remembered_set, &heap->tracer); +} + +static inline void +forget_remembered_edge(struct gc_edge edge, struct gc_heap *heap) { + struct nofl_space *space = heap_nofl_space(heap); + if (nofl_space_contains_edge(space, edge)) + nofl_space_forget_edge(space, edge); + // Otherwise the edge is in the lospace, whose remembered edges are + // cleared in bulk. +} + +static void +clear_remembered_set(struct gc_heap *heap) { + if (!GC_GENERATIONAL) return; + gc_field_set_clear(&heap->remembered_set, forget_remembered_edge, heap); + large_object_space_clear_remembered_edges(heap_large_object_space(heap)); +} + +static void +enqueue_relocatable_roots(struct gc_heap *heap, + enum gc_collection_kind gc_kind) { + for (struct gc_mutator *mut = heap->mutators; + mut; + mut = mut->next) { + if (mut->roots) + gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut)); + } + if (heap->roots) + gc_tracer_add_root(&heap->tracer, gc_root_heap(heap)); + gc_visit_finalizer_roots(heap->finalizer_state, enqueue_root_edge, heap, NULL); + enqueue_generational_roots(heap, gc_kind); +} + +static void +resolve_ephemerons_lazily(struct gc_heap *heap) { + atomic_store_explicit(&heap->check_pending_ephemerons, 0, + memory_order_release); +} + +static void +resolve_ephemerons_eagerly(struct gc_heap *heap) { + atomic_store_explicit(&heap->check_pending_ephemerons, 1, + memory_order_release); + gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); +} + +static void +trace_resolved_ephemerons(struct gc_heap *heap) { + for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); + resolved; + resolved = gc_pop_resolved_ephemerons(heap)) { + gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved)); + gc_tracer_trace(&heap->tracer); + } +} + +static void +resolve_finalizers(struct gc_heap *heap) { + for (size_t priority = 0; + priority < gc_finalizer_priority_count(); + priority++) { + if (gc_resolve_finalizers(heap->finalizer_state, priority, + enqueue_root_edge, heap, NULL)) { + gc_tracer_trace(&heap->tracer); + trace_resolved_ephemerons(heap); + } + } + gc_notify_finalizers(heap->finalizer_state, heap); +} + +static void +sweep_ephemerons(struct gc_heap *heap) { + return gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); +} + +static void collect(struct gc_mutator *mut, + enum gc_collection_kind requested_kind, + int requested_by_user) GC_NEVER_INLINE; +static void +collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind, + int requested_by_user) { + struct gc_heap *heap = mutator_heap(mut); + struct nofl_space *nofl_space = heap_nofl_space(heap); + struct large_object_space *lospace = heap_large_object_space(heap); + struct gc_extern_space *exspace = heap_extern_space(heap); + uint64_t start_ns = gc_platform_monotonic_nanoseconds(); + MUTATOR_EVENT(mut, mutator_cause_gc); + DEBUG("start collect #%ld:\n", heap->count); + HEAP_EVENT(heap, requesting_stop); + request_mutators_to_stop(heap); + nofl_finish_sweeping(&mut->allocator, nofl_space); + HEAP_EVENT(heap, waiting_for_stop); + wait_for_mutators_to_stop(heap); + HEAP_EVENT(heap, mutators_stopped); + uint64_t allocation_counter = 0; + nofl_space_add_to_allocation_counter(nofl_space, &allocation_counter); + large_object_space_add_to_allocation_counter(lospace, &allocation_counter); + heap->total_allocated_bytes_at_last_gc += allocation_counter; + if (!requested_by_user) + detect_out_of_memory(heap, allocation_counter); + enum gc_collection_kind gc_kind = + determine_collection_kind(heap, requested_kind); + int is_minor = gc_kind == GC_COLLECTION_MINOR; + HEAP_EVENT(heap, prepare_gc, gc_kind); + nofl_space_prepare_gc(nofl_space, gc_kind); + large_object_space_start_gc(lospace, is_minor); + gc_extern_space_start_gc(exspace, is_minor); + resolve_ephemerons_lazily(heap); + gc_tracer_prepare(&heap->tracer); + double yield = heap_last_gc_yield(heap); + double fragmentation = heap_fragmentation(heap); + size_t live_bytes = heap->size * (1.0 - yield); + HEAP_EVENT(heap, live_data_size, live_bytes); + DEBUG("last gc yield: %f; fragmentation: %f\n", yield, fragmentation); + // Eagerly trace pinned roots if we are going to relocate objects. + if (enqueue_pinned_roots(heap) && gc_kind == GC_COLLECTION_COMPACTING) + gc_tracer_trace_roots(&heap->tracer); + // Process the rest of the roots in parallel. This heap event should probably + // be removed, as there is no clear cutoff time. + HEAP_EVENT(heap, roots_traced); + enqueue_relocatable_roots(heap, gc_kind); + nofl_space_start_gc(nofl_space, gc_kind); + gc_tracer_trace(&heap->tracer); + HEAP_EVENT(heap, heap_traced); + resolve_ephemerons_eagerly(heap); + trace_resolved_ephemerons(heap); + HEAP_EVENT(heap, ephemerons_traced); + resolve_finalizers(heap); + HEAP_EVENT(heap, finalizers_traced); + sweep_ephemerons(heap); + gc_tracer_release(&heap->tracer); + clear_remembered_set(heap); + nofl_space_finish_gc(nofl_space, gc_kind); + large_object_space_finish_gc(lospace, is_minor); + gc_extern_space_finish_gc(exspace, is_minor); + heap->count++; + heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); + uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns; + size_t live_bytes_estimate = + heap_estimate_live_data_after_gc(heap, live_bytes, yield); + DEBUG("--- total live bytes estimate: %zu\n", live_bytes_estimate); + gc_heap_sizer_on_gc(heap->sizer, heap->size, live_bytes_estimate, pause_ns, + resize_heap); + heap->size_at_last_gc = heap->size; + HEAP_EVENT(heap, restarting_mutators); + allow_mutators_to_continue(heap); +} + +static void +trigger_collection(struct gc_mutator *mut, + enum gc_collection_kind requested_kind, + int requested_by_user) { + struct gc_heap *heap = mutator_heap(mut); + int prev_kind = -1; + gc_stack_capture_hot(&mut->stack); + nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); + heap_lock(heap); + while (mutators_are_stopping(heap)) + prev_kind = pause_mutator_for_collection(heap, mut); + if (prev_kind < (int)requested_kind) + collect(mut, requested_kind, requested_by_user); + heap_unlock(heap); +} + +void +gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) { + trigger_collection(mut, kind, 1); +} + +int* +gc_safepoint_flag_loc(struct gc_mutator *mut) { + return &mutator_heap(mut)->collecting; +} + +void +gc_safepoint_slow(struct gc_mutator *mut) { + struct gc_heap *heap = mutator_heap(mut); + gc_stack_capture_hot(&mut->stack); + nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); + heap_lock(heap); + while (mutators_are_stopping(mutator_heap(mut))) + pause_mutator_for_collection(heap, mut); + heap_unlock(heap); +} + +static enum gc_trace_kind +compute_trace_kind(enum gc_allocation_kind kind) { + if (GC_CONSERVATIVE_TRACE) { + switch (kind) { + case GC_ALLOCATION_TAGGED: + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + return GC_TRACE_CONSERVATIVELY; + case GC_ALLOCATION_TAGGED_POINTERLESS: + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return GC_TRACE_NONE; + default: + GC_CRASH(); + }; + } else { + switch (kind) { + case GC_ALLOCATION_TAGGED: + return GC_TRACE_PRECISELY; + case GC_ALLOCATION_TAGGED_POINTERLESS: + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + return GC_TRACE_NONE; + case GC_ALLOCATION_UNTAGGED_CONSERVATIVE: + default: + GC_CRASH(); + }; + } +} + +static void* +allocate_large(struct gc_mutator *mut, size_t size, + enum gc_trace_kind kind) { + struct gc_heap *heap = mutator_heap(mut); + struct nofl_space *nofl_space = heap_nofl_space(heap); + struct large_object_space *lospace = heap_large_object_space(heap); + + size_t npages = large_object_space_npages(lospace, size); + + nofl_space_request_release_memory(nofl_space, + npages << lospace->page_size_log2); + + while (!nofl_space_shrink(nofl_space, 0)) + trigger_collection(mut, GC_COLLECTION_COMPACTING, 0); + atomic_fetch_add(&heap->large_object_pages, npages); + + void *ret = large_object_space_alloc(lospace, npages, kind); + + if (!ret) { + perror("weird: we have the space but mmap didn't work"); + GC_CRASH(); + } + + return ret; +} + +static void +collect_for_small_allocation(void *mut) { + trigger_collection(mut, GC_COLLECTION_ANY, 0); +} + +void* +gc_allocate_slow(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + GC_ASSERT(size > 0); // allocating 0 bytes would be silly + + if (size > gc_allocator_large_threshold()) + return allocate_large(mut, size, compute_trace_kind(kind)); + + return gc_ref_heap_object(nofl_allocate(&mut->allocator, + heap_nofl_space(mutator_heap(mut)), + size, collect_for_small_allocation, + mut, kind)); +} + +void +gc_pin_object(struct gc_mutator *mut, struct gc_ref ref) { + struct nofl_space *nofl = heap_nofl_space(mutator_heap(mut)); + if (nofl_space_contains(nofl, ref)) + nofl_space_pin_object(nofl, ref); + // Otherwise if it's a large or external object, it won't move. +} + +int +gc_object_is_old_generation_slow(struct gc_mutator *mut, struct gc_ref obj) { + if (!GC_GENERATIONAL) + return 0; + + struct gc_heap *heap = mutator_heap(mut); + struct nofl_space *nofl_space = heap_nofl_space(heap); + if (nofl_space_contains(nofl_space, obj)) + return nofl_space_is_survivor(nofl_space, obj); + + struct large_object_space *lospace = heap_large_object_space(heap); + if (large_object_space_contains(lospace, obj)) + return large_object_space_is_survivor(lospace, obj); + + return 0; +} + +void +gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) { + GC_ASSERT(!gc_ref_is_null(new_val)); + if (!GC_GENERATIONAL) return; + if (gc_object_is_old_generation_slow(mut, new_val)) + return; + struct gc_heap *heap = mutator_heap(mut); + if ((obj_size <= gc_allocator_large_threshold()) + ? nofl_space_remember_edge(heap_nofl_space(heap), obj, edge) + : large_object_space_remember_edge(heap_large_object_space(heap), + obj, edge)) + gc_field_set_writer_add_edge(&mut->logger, edge); +} + +struct gc_ephemeron* +gc_allocate_ephemeron(struct gc_mutator *mut) { + struct gc_ref ret = + gc_ref_from_heap_object(gc_allocate(mut, gc_ephemeron_size(), + GC_ALLOCATION_TAGGED)); + nofl_space_set_ephemeron_flag(ret); + return gc_ref_heap_object(ret); +} + +void +gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value) { + gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value); + // No write barrier: we require that the ephemeron be newer than the + // key or the value. +} + +struct gc_pending_ephemerons * +gc_heap_pending_ephemerons(struct gc_heap *heap) { + return heap->pending_ephemerons; +} + +unsigned +gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) { + return heap->count; +} + +struct gc_finalizer* +gc_allocate_finalizer(struct gc_mutator *mut) { + return gc_allocate(mut, gc_finalizer_size(), GC_ALLOCATION_TAGGED); +} + +void +gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer, + unsigned priority, struct gc_ref object, + struct gc_ref closure) { + gc_finalizer_init_internal(finalizer, object, closure); + gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state, + finalizer, priority); + // No write barrier. +} + +struct gc_finalizer* +gc_pop_finalizable(struct gc_mutator *mut) { + return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state); +} + +void +gc_set_finalizer_callback(struct gc_heap *heap, + gc_finalizer_callback callback) { + gc_finalizer_state_set_callback(heap->finalizer_state, callback); +} + +static int +heap_prepare_pending_ephemerons(struct gc_heap *heap) { + struct gc_pending_ephemerons *cur = heap->pending_ephemerons; + size_t target = heap->size * heap->pending_ephemerons_size_factor; + double slop = heap->pending_ephemerons_size_slop; + + heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop); + + return !!heap->pending_ephemerons; +} + +struct gc_options { + struct gc_common_options common; +}; + +int +gc_option_from_string(const char *str) { + return gc_common_option_from_string(str); +} + +struct gc_options* +gc_allocate_options(void) { + struct gc_options *ret = malloc(sizeof(struct gc_options)); + gc_init_common_options(&ret->common); + return ret; +} + +int +gc_options_set_int(struct gc_options *options, int option, int value) { + return gc_common_options_set_int(&options->common, option, value); +} + +int +gc_options_set_size(struct gc_options *options, int option, + size_t value) { + return gc_common_options_set_size(&options->common, option, value); +} + +int +gc_options_set_double(struct gc_options *options, int option, + double value) { + return gc_common_options_set_double(&options->common, option, value); +} + +int +gc_options_parse_and_set(struct gc_options *options, int option, + const char *value) { + return gc_common_options_parse_and_set(&options->common, option, value); +} + +// with heap lock +static uint64_t allocation_counter(struct gc_heap *heap) { + uint64_t ret = heap->total_allocated_bytes_at_last_gc; + nofl_space_add_to_allocation_counter(heap_nofl_space(heap), &ret); + large_object_space_add_to_allocation_counter(heap_large_object_space(heap), + &ret); + return ret; +} + +uint64_t gc_allocation_counter(struct gc_heap *heap) { + pthread_mutex_lock(&heap->lock); + uint64_t ret = allocation_counter(heap); + pthread_mutex_unlock(&heap->lock); + return ret; +} + +static uint64_t allocation_counter_from_thread(struct gc_heap *heap) { + if (pthread_mutex_trylock(&heap->lock)) return 0; + uint64_t ret = allocation_counter(heap); + pthread_mutex_unlock(&heap->lock); + return ret; +} + +static void set_heap_size_from_thread(struct gc_heap *heap, size_t size) { + if (pthread_mutex_trylock(&heap->lock)) return; + resize_heap(heap, size); + pthread_mutex_unlock(&heap->lock); +} + +static int +heap_init(struct gc_heap *heap, const struct gc_options *options) { + // *heap is already initialized to 0. + + gc_field_set_init(&heap->remembered_set); + pthread_mutex_init(&heap->lock, NULL); + pthread_cond_init(&heap->mutator_cond, NULL); + pthread_cond_init(&heap->collector_cond, NULL); + heap->size = heap->size_at_last_gc = options->common.heap_size; + + if (!gc_tracer_init(&heap->tracer, heap, options->common.parallelism)) + GC_CRASH(); + + heap->pending_ephemerons_size_factor = 0.005; + heap->pending_ephemerons_size_slop = 0.5; + heap->fragmentation_low_threshold = 0.05; + heap->fragmentation_high_threshold = 0.10; + heap->minor_gc_yield_threshold = 0.30; + heap->minimum_major_gc_yield_threshold = 0.05; + heap->major_gc_yield_threshold = + clamp_major_gc_yield_threshold(heap, heap->minor_gc_yield_threshold); + + if (!heap_prepare_pending_ephemerons(heap)) + GC_CRASH(); + + heap->finalizer_state = gc_make_finalizer_state(); + if (!heap->finalizer_state) + GC_CRASH(); + + heap->background_thread = gc_make_background_thread(); + heap->sizer = gc_make_heap_sizer(heap, &options->common, + allocation_counter_from_thread, + set_heap_size_from_thread, + heap->background_thread); + + return 1; +} + +int +gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, + struct gc_heap **heap, struct gc_mutator **mut, + struct gc_event_listener event_listener, + void *event_listener_data) { + GC_ASSERT_EQ(gc_allocator_small_granule_size(), NOFL_GRANULE_SIZE); + GC_ASSERT_EQ(gc_allocator_large_threshold(), LARGE_OBJECT_THRESHOLD); + GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), + offsetof(struct nofl_allocator, alloc)); + GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), + offsetof(struct nofl_allocator, sweep)); + GC_ASSERT_EQ(gc_allocator_alloc_table_alignment(), NOFL_SLAB_SIZE); + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_TAGGED), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_PRECISELY); + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_TAGGED_POINTERLESS), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_NONE); + if (GC_CONSERVATIVE_TRACE) { + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_UNTAGGED_CONSERVATIVE), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_CONSERVATIVELY); + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_UNTAGGED_POINTERLESS), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_NONE); + } else { + GC_ASSERT_EQ(gc_allocator_alloc_table_begin_pattern(GC_ALLOCATION_UNTAGGED_POINTERLESS), + NOFL_METADATA_BYTE_YOUNG | NOFL_METADATA_BYTE_TRACE_NONE | + NOFL_METADATA_BYTE_PINNED); + } + GC_ASSERT_EQ(gc_allocator_alloc_table_end_pattern(), NOFL_METADATA_BYTE_END); + if (GC_GENERATIONAL) { + GC_ASSERT_EQ(gc_write_barrier_field_table_alignment(), NOFL_SLAB_SIZE); + GC_ASSERT_EQ(gc_write_barrier_field_fields_per_byte(), + NOFL_GRANULE_SIZE / sizeof(uintptr_t)); + GC_ASSERT_EQ(gc_write_barrier_field_first_bit_pattern(), + NOFL_METADATA_BYTE_LOGGED_0); + } + + *heap = calloc(1, sizeof(struct gc_heap)); + if (!*heap) GC_CRASH(); + + if (!heap_init(*heap, options)) + GC_CRASH(); + + (*heap)->event_listener = event_listener; + (*heap)->event_listener_data = event_listener_data; + HEAP_EVENT(*heap, init, (*heap)->size); + + struct nofl_space *space = heap_nofl_space(*heap); + if (!nofl_space_init(space, (*heap)->size, + options->common.parallelism != 1, + (*heap)->fragmentation_low_threshold, + (*heap)->background_thread)) { + free(*heap); + *heap = NULL; + return 0; + } + + if (!large_object_space_init(heap_large_object_space(*heap), *heap, + (*heap)->background_thread)) + GC_CRASH(); + + *mut = calloc(1, sizeof(struct gc_mutator)); + if (!*mut) GC_CRASH(); + gc_stack_init(&(*mut)->stack, stack_base); + add_mutator(*heap, *mut); + + gc_background_thread_start((*heap)->background_thread); + + return 1; +} + +struct gc_mutator* +gc_init_for_thread(struct gc_stack_addr *stack_base, + struct gc_heap *heap) { + struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator)); + if (!ret) + GC_CRASH(); + gc_stack_init(&ret->stack, stack_base); + add_mutator(heap, ret); + return ret; +} + +void +gc_finish_for_thread(struct gc_mutator *mut) { + remove_mutator(mutator_heap(mut), mut); + free(mut); +} + +static void +deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + GC_ASSERT(mut->next == NULL); + nofl_allocator_finish(&mut->allocator, heap_nofl_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(&mut->logger); + heap_lock(heap); + heap->inactive_mutator_count++; + gc_stack_capture_hot(&mut->stack); + if (all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + heap_unlock(heap); +} + +static void +reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + heap_lock(heap); + while (mutators_are_stopping(heap)) + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + heap->inactive_mutator_count--; + heap_unlock(heap); +} + +void* +gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*), void *data) { + struct gc_heap *heap = mutator_heap(mut); + deactivate_mutator(heap, mut); + void *ret = f(data); + reactivate_mutator(heap, mut); + return ret; +} diff --git a/libguile/whippet/src/nofl-space.h b/libguile/whippet/src/nofl-space.h new file mode 100644 index 000000000..2668232c6 --- /dev/null +++ b/libguile/whippet/src/nofl-space.h @@ -0,0 +1,1944 @@ +#ifndef NOFL_SPACE_H +#define NOFL_SPACE_H + +#include +#include +#include +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "assert.h" +#include "debug.h" +#include "extents.h" +#include "gc-align.h" +#include "gc-attrs.h" +#include "gc-inline.h" +#include "gc-lock.h" +#include "gc-platform.h" +#include "spin.h" +#include "swar.h" + +// This is the nofl space! It is a mark space which doesn't use +// free-lists to allocate, and which can evacuate objects if +// fragmentation is too high, inspired by Immix. Nofl stands for "no +// free-list", but also "novel", in the sense that it hasn't been tried +// before. + +#define NOFL_GRANULE_SIZE 16 +#define NOFL_GRANULE_SIZE_LOG_2 4 +#define NOFL_MEDIUM_OBJECT_THRESHOLD 256 +#define NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD 16 + +STATIC_ASSERT_EQ(NOFL_GRANULE_SIZE, 1 << NOFL_GRANULE_SIZE_LOG_2); +STATIC_ASSERT_EQ(NOFL_MEDIUM_OBJECT_THRESHOLD, + NOFL_MEDIUM_OBJECT_GRANULE_THRESHOLD * NOFL_GRANULE_SIZE); + +#define NOFL_SLAB_SIZE (4 * 1024 * 1024) +#define NOFL_BLOCK_SIZE (64 * 1024) +#define NOFL_METADATA_BYTES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE) +#define NOFL_BLOCKS_PER_SLAB (NOFL_SLAB_SIZE / NOFL_BLOCK_SIZE) +#define NOFL_META_BLOCKS_PER_SLAB (NOFL_METADATA_BYTES_PER_BLOCK * NOFL_BLOCKS_PER_SLAB / NOFL_BLOCK_SIZE) +#define NOFL_NONMETA_BLOCKS_PER_SLAB (NOFL_BLOCKS_PER_SLAB - NOFL_META_BLOCKS_PER_SLAB) +#define NOFL_METADATA_BYTES_PER_SLAB (NOFL_NONMETA_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK) +#define NOFL_SLACK_METADATA_BYTES_PER_SLAB (NOFL_META_BLOCKS_PER_SLAB * NOFL_METADATA_BYTES_PER_BLOCK) +#define NOFL_VESTIGIAL_BYTES_PER_BLOCK (NOFL_SLACK_METADATA_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) +#define NOFL_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * NOFL_NONMETA_BLOCKS_PER_SLAB) +#define NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB (NOFL_VESTIGIAL_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) +#define NOFL_SUMMARY_BYTES_PER_BLOCK (NOFL_SLACK_VESTIGIAL_BYTES_PER_SLAB / NOFL_BLOCKS_PER_SLAB) +#define NOFL_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NONMETA_BLOCKS_PER_SLAB) +#define NOFL_SLACK_SUMMARY_BYTES_PER_SLAB (NOFL_SUMMARY_BYTES_PER_BLOCK * NOFL_META_BLOCKS_PER_SLAB) +#define NOFL_HEADER_BYTES_PER_SLAB NOFL_SLACK_SUMMARY_BYTES_PER_SLAB + +struct nofl_slab; + +struct nofl_slab_header { + union { + struct { + uint8_t block_marks[NOFL_BLOCKS_PER_SLAB]; + }; + uint8_t padding[NOFL_HEADER_BYTES_PER_SLAB]; + }; +}; +STATIC_ASSERT_EQ(sizeof(struct nofl_slab_header), NOFL_HEADER_BYTES_PER_SLAB); + +// Sometimes we want to put a block on a singly-linked list. For that +// there's a pointer reserved in the block summary. But because the +// pointer is aligned (32kB on 32-bit, 64kB on 64-bit), we can portably +// hide up to 15 flags in the low bits. These flags are accessed +// non-atomically, in two situations: one, when a block is not on a +// list, which guarantees that no other thread can access it; or when no +// pushing or popping is happening, for example during an evacuation +// cycle. +enum nofl_block_summary_flag { + NOFL_BLOCK_EVACUATE = 0x1, + NOFL_BLOCK_ZERO = 0x2, + NOFL_BLOCK_UNAVAILABLE = 0x4, + NOFL_BLOCK_PAGED_OUT = 0x8, + NOFL_BLOCK_FLAG_UNUSED_3 = 0x8, + NOFL_BLOCK_FLAG_UNUSED_4 = 0x10, + NOFL_BLOCK_FLAG_UNUSED_5 = 0x20, + NOFL_BLOCK_FLAG_UNUSED_6 = 0x40, + NOFL_BLOCK_FLAG_UNUSED_7 = 0x80, + NOFL_BLOCK_FLAG_UNUSED_8 = 0x100, + NOFL_BLOCK_FLAG_UNUSED_9 = 0x200, + NOFL_BLOCK_FLAG_UNUSED_10 = 0x400, + NOFL_BLOCK_FLAG_UNUSED_11 = 0x800, + NOFL_BLOCK_FLAG_UNUSED_12 = 0x1000, + NOFL_BLOCK_FLAG_UNUSED_13 = 0x2000, + NOFL_BLOCK_FLAG_UNUSED_14 = 0x4000, +}; + +struct nofl_block_summary { + union { + struct { + // Counters related to previous collection: how many holes there + // were, and how much space they had. + uint16_t hole_count; + uint16_t hole_granules; + // Counters related to allocation since previous collection: + // wasted space due to fragmentation. Also used by blocks on the + // "partly full" list, which have zero holes_with_fragmentation + // but nonzero fragmentation_granules. + uint16_t holes_with_fragmentation; + uint16_t fragmentation_granules; + // Next pointer, and flags in low bits. See comment above + // regarding enum nofl_block_summary_flag. + uintptr_t next_and_flags; + }; + uint8_t padding[NOFL_SUMMARY_BYTES_PER_BLOCK]; + }; +}; +STATIC_ASSERT_EQ(sizeof(struct nofl_block_summary), + NOFL_SUMMARY_BYTES_PER_BLOCK); + +struct nofl_block { + char data[NOFL_BLOCK_SIZE]; +}; + +struct nofl_block_ref { + struct nofl_block_summary *summary; + uintptr_t addr; +}; + +struct nofl_slab { + struct nofl_slab_header header; + struct nofl_block_summary summaries[NOFL_NONMETA_BLOCKS_PER_SLAB]; + uint8_t unused[NOFL_VESTIGIAL_BYTES_PER_SLAB]; + uint8_t metadata[NOFL_METADATA_BYTES_PER_SLAB]; + struct nofl_block blocks[NOFL_NONMETA_BLOCKS_PER_SLAB]; +}; +STATIC_ASSERT_EQ(sizeof(struct nofl_slab), NOFL_SLAB_SIZE); + +// Lock-free block list, which either only has threads removing items +// from it or only has threads adding items to it -- i.e., adding and +// removing items don't happen concurrently. +struct nofl_block_list { + size_t count; + uintptr_t blocks; +}; + +// A block list that has concurrent threads adding and removing items +// from it. +struct nofl_block_stack { + struct nofl_block_list list; +}; + +#define NOFL_PAGE_OUT_QUEUE_SIZE 4 + +struct nofl_space { + uint8_t current_mark; + uint8_t survivor_mark; + uint8_t evacuating; + struct extents *extents; + size_t heap_size; + uint8_t last_collection_was_minor; + struct nofl_block_stack empty; + struct nofl_block_stack paged_out[NOFL_PAGE_OUT_QUEUE_SIZE]; + struct nofl_block_list to_sweep; + struct nofl_block_stack partly_full; + struct nofl_block_list full; + struct nofl_block_list promoted; + struct nofl_block_list old; + struct nofl_block_list evacuation_targets; + pthread_mutex_t lock; + double evacuation_minimum_reserve; + double evacuation_reserve; + double promotion_threshold; + ssize_t pending_unavailable_bytes; // atomically + struct nofl_slab **slabs; + size_t nslabs; + uintptr_t old_generation_granules; // atomically + uintptr_t survivor_granules_at_last_collection; // atomically + uintptr_t allocated_granules_since_last_collection; // atomically + uintptr_t fragmentation_granules_since_last_collection; // atomically +}; + +struct nofl_allocator { + uintptr_t alloc; + uintptr_t sweep; + struct nofl_block_ref block; +}; + +#if GC_CONSERVATIVE_TRACE && GC_CONCURRENT_TRACE +// There are just not enough bits in the mark table. +#error Unsupported configuration +#endif + +// Each granule has one mark byte stored in a side table. A granule's +// mark state is a whole byte instead of a bit to facilitate parallel +// marking. (Parallel markers are allowed to race.) We also use this +// byte to compute object extent, via a bit flag indicating +// end-of-object. +// +// Because we want to allow for conservative roots, we need to know +// whether an address indicates an object or not. That means that when +// an object is allocated, it has to set a bit, somewhere. We use the +// metadata byte for this purpose, setting the "young" mark. +// +// The "young" mark's name might make you think about generational +// collection, and indeed all objects collected in a minor collection +// will have this bit set. However, the nofl space never needs to check +// for the young mark; if it weren't for the need to identify +// conservative roots, we wouldn't need a young mark at all. Perhaps in +// an all-precise system, we would be able to avoid the overhead of +// initializing mark byte upon each fresh allocation. +// +// When an object becomes dead after a GC, it will still have a mark set +// -- maybe the young mark, or maybe a survivor mark. The sweeper has +// to clear these marks before the next collection. If we add +// concurrent marking, we will also be marking "live" objects, updating +// their mark bits. So there are three and possibly four object states +// concurrently observable: young, dead, survivor, and marked. (We +// don't currently have concurrent marking, though.) We store this +// state in the low 3 bits of the byte. After each major collection, +// the dead, survivor, and marked states rotate. +// +// It can be useful to support "raw" allocations, most often +// pointerless, but for compatibility with BDW-GC, sometimes +// conservatively-traced tagless data. We reserve one or two bits for +// the "kind" of the allocation: either a normal object traceable via +// `gc_trace_object`, a pointerless untagged allocation that doesn't +// need tracing, an allocation that should be traced conservatively, or +// an ephemeron. The latter two states are only used when conservative +// tracing is enabled. +// +// An object can be pinned, preventing it from being evacuated during +// collection. Pinning does not keep the object alive; if it is +// otherwise unreachable, it will be collected. To pin an object, a +// running mutator can set the pinned bit, using atomic +// compare-and-swap. This bit overlaps the "trace conservatively" and +// "ephemeron" trace kinds, but that's OK because we don't use the +// pinned bit in those cases, as all objects are implicitly pinned. +// +// For generational collectors, the nofl space supports a field-logging +// write barrier. The two logging bits correspond to the two words in a +// granule. When a field is written to, the write barrier should check +// the logged bit; if it is unset, it should try to atomically set the +// bit, and if that works, then we record the field location as a +// generational root, adding it to a sequential-store buffer. +enum nofl_metadata_byte { + NOFL_METADATA_BYTE_NONE = 0, + NOFL_METADATA_BYTE_YOUNG = 1, + NOFL_METADATA_BYTE_MARK_0 = 2, + NOFL_METADATA_BYTE_MARK_1 = 3, + NOFL_METADATA_BYTE_MARK_2 = 4, + NOFL_METADATA_BYTE_MARK_MASK = 7, + NOFL_METADATA_BYTE_TRACE_PRECISELY = 0, + NOFL_METADATA_BYTE_TRACE_NONE = 8, + NOFL_METADATA_BYTE_TRACE_CONSERVATIVELY = 16, + NOFL_METADATA_BYTE_TRACE_EPHEMERON = 24, + NOFL_METADATA_BYTE_TRACE_KIND_MASK = 0|8|16|24, + NOFL_METADATA_BYTE_PINNED = 16, + NOFL_METADATA_BYTE_END = 32, + NOFL_METADATA_BYTE_LOGGED_0 = 64, + NOFL_METADATA_BYTE_LOGGED_1 = 128, +}; + +STATIC_ASSERT_EQ(0, + NOFL_METADATA_BYTE_TRACE_PRECISELY&NOFL_METADATA_BYTE_PINNED); +STATIC_ASSERT_EQ(0, + NOFL_METADATA_BYTE_TRACE_NONE&NOFL_METADATA_BYTE_PINNED); + +static uint8_t +nofl_advance_current_mark(uint8_t mark) { + switch (mark) { + case NOFL_METADATA_BYTE_MARK_0: + return NOFL_METADATA_BYTE_MARK_1; + case NOFL_METADATA_BYTE_MARK_1: + return NOFL_METADATA_BYTE_MARK_2; + case NOFL_METADATA_BYTE_MARK_2: + return NOFL_METADATA_BYTE_MARK_0; + default: + GC_CRASH(); + } +} + +static struct gc_lock +nofl_space_lock(struct nofl_space *space) { + return gc_lock_acquire(&space->lock); +} + +static struct nofl_slab* +nofl_object_slab(void *obj) { + uintptr_t addr = (uintptr_t) obj; + uintptr_t base = align_down(addr, NOFL_SLAB_SIZE); + return (struct nofl_slab*) base; +} + +static uint8_t* +nofl_metadata_byte_for_addr(uintptr_t addr) { + uintptr_t base = align_down(addr, NOFL_SLAB_SIZE); + uintptr_t granule = (addr & (NOFL_SLAB_SIZE - 1)) >> NOFL_GRANULE_SIZE_LOG_2; + return (uint8_t*) (base + granule); +} + +static uint8_t* +nofl_metadata_byte_for_object(struct gc_ref ref) { + return nofl_metadata_byte_for_addr(gc_ref_value(ref)); +} + +static uint8_t* +nofl_block_mark_loc(uintptr_t addr) { + uintptr_t base = align_down(addr, NOFL_SLAB_SIZE); + struct nofl_slab *slab = (struct nofl_slab *) base; + unsigned block_idx = (addr / NOFL_BLOCK_SIZE) % NOFL_BLOCKS_PER_SLAB; + return &slab->header.block_marks[block_idx]; +} + +static int +nofl_block_is_marked(uintptr_t addr) { + return atomic_load_explicit(nofl_block_mark_loc(addr), memory_order_relaxed); +} + +static void +nofl_block_set_mark(uintptr_t addr) { + uint8_t *loc = nofl_block_mark_loc(addr); + if (!atomic_load_explicit(loc, memory_order_relaxed)) + atomic_store_explicit(loc, 1, memory_order_relaxed); +} + +#define NOFL_GRANULES_PER_BLOCK (NOFL_BLOCK_SIZE / NOFL_GRANULE_SIZE) + +static struct nofl_block_summary* +nofl_block_summary_for_addr(uintptr_t addr) { + uintptr_t base = align_down(addr, NOFL_SLAB_SIZE); + uintptr_t block = (addr & (NOFL_SLAB_SIZE - 1)) / NOFL_BLOCK_SIZE; + return (struct nofl_block_summary*) + (base + block * sizeof(struct nofl_block_summary)); +} + +static uintptr_t +nofl_block_summary_has_flag(struct nofl_block_summary *summary, + enum nofl_block_summary_flag flag) { + return (summary->next_and_flags & flag) == flag; +} + +static void +nofl_block_summary_set_flag(struct nofl_block_summary *summary, + enum nofl_block_summary_flag flag) { + summary->next_and_flags |= flag; +} + +static void +nofl_block_summary_clear_flag(struct nofl_block_summary *summary, + enum nofl_block_summary_flag flag) { + summary->next_and_flags &= ~(uintptr_t)flag; +} + +static uintptr_t +nofl_block_summary_next(struct nofl_block_summary *summary) { + return align_down(summary->next_and_flags, NOFL_BLOCK_SIZE); +} + +static void +nofl_block_summary_set_next(struct nofl_block_summary *summary, + uintptr_t next) { + GC_ASSERT((next & (NOFL_BLOCK_SIZE - 1)) == 0); + summary->next_and_flags = + (summary->next_and_flags & (NOFL_BLOCK_SIZE - 1)) | next; +} + +static struct nofl_block_ref +nofl_block_for_addr(uintptr_t addr) { + return (struct nofl_block_ref) { + nofl_block_summary_for_addr(addr), + align_down(addr, NOFL_BLOCK_SIZE) + }; +} + +static struct nofl_block_ref +nofl_block_null(void) { + return (struct nofl_block_ref) { NULL, 0 }; +} + +static int +nofl_block_is_null(struct nofl_block_ref block) { + return block.summary == NULL; +} + +static uintptr_t +nofl_block_has_flag(struct nofl_block_ref block, uintptr_t flags) { + GC_ASSERT(!nofl_block_is_null(block)); + return nofl_block_summary_has_flag(block.summary, flags); +} + +static void +nofl_block_set_flag(struct nofl_block_ref block, uintptr_t flags) { + GC_ASSERT(!nofl_block_is_null(block)); + nofl_block_summary_set_flag(block.summary, flags); +} + +static void +nofl_block_clear_flag(struct nofl_block_ref block, uintptr_t flags) { + GC_ASSERT(!nofl_block_is_null(block)); + nofl_block_summary_clear_flag(block.summary, flags); +} + +static struct nofl_block_ref +nofl_block_next(struct nofl_block_ref block) { + GC_ASSERT(!nofl_block_is_null(block)); + return nofl_block_for_addr(nofl_block_summary_next(block.summary)); +} + +static void +nofl_block_set_next(struct nofl_block_ref head, struct nofl_block_ref tail) { + GC_ASSERT(!nofl_block_is_null(head)); + nofl_block_summary_set_next(head.summary, tail.addr); +} + +static int +nofl_allocator_has_block(struct nofl_allocator *alloc) { + return !nofl_block_is_null(alloc->block); +} + +static struct nofl_block_ref +nofl_block_head(struct nofl_block_list *list) { + uintptr_t head = atomic_load_explicit(&list->blocks, memory_order_acquire); + if (!head) + return nofl_block_null(); + return (struct nofl_block_ref){ nofl_block_summary_for_addr(head), head }; +} + +static int +nofl_block_compare_and_exchange(struct nofl_block_list *list, + struct nofl_block_ref *expected, + struct nofl_block_ref desired) { + if (atomic_compare_exchange_weak_explicit(&list->blocks, + &expected->addr, + desired.addr, + memory_order_acq_rel, + memory_order_acquire)) + return 1; + + expected->summary = nofl_block_summary_for_addr(expected->addr); + return 0; +} + +static void +nofl_block_list_push(struct nofl_block_list *list, + struct nofl_block_ref block) { + atomic_fetch_add_explicit(&list->count, 1, memory_order_acq_rel); + GC_ASSERT(nofl_block_is_null(nofl_block_next(block))); + struct nofl_block_ref next = nofl_block_head(list); + do { + nofl_block_set_next(block, next); + } while (!nofl_block_compare_and_exchange(list, &next, block)); +} + +static struct nofl_block_ref +nofl_block_list_pop(struct nofl_block_list *list) { + struct nofl_block_ref head = nofl_block_head(list); + struct nofl_block_ref next; + do { + if (nofl_block_is_null(head)) + return nofl_block_null(); + next = nofl_block_next(head); + } while (!nofl_block_compare_and_exchange(list, &head, next)); + nofl_block_set_next(head, nofl_block_null()); + atomic_fetch_sub_explicit(&list->count, 1, memory_order_acq_rel); + return head; +} + +static void +nofl_block_stack_push(struct nofl_block_stack *stack, + struct nofl_block_ref block, + const struct gc_lock *lock) { + struct nofl_block_list *list = &stack->list; + list->count++; + GC_ASSERT(nofl_block_is_null(nofl_block_next(block))); + struct nofl_block_ref next = nofl_block_head(list); + nofl_block_set_next(block, next); + list->blocks = block.addr; +} + +static struct nofl_block_ref +nofl_block_stack_pop(struct nofl_block_stack *stack, + const struct gc_lock *lock) { + struct nofl_block_list *list = &stack->list; + struct nofl_block_ref head = nofl_block_head(list); + if (!nofl_block_is_null(head)) { + list->count--; + list->blocks = nofl_block_next(head).addr; + nofl_block_set_next(head, nofl_block_null()); + } + return head; +} + +static size_t +nofl_block_count(struct nofl_block_list *list) { + return atomic_load_explicit(&list->count, memory_order_acquire); +} + +static void +nofl_push_unavailable_block(struct nofl_space *space, + struct nofl_block_ref block, + const struct gc_lock *lock) { + nofl_block_set_flag(block, NOFL_BLOCK_UNAVAILABLE); + nofl_block_stack_push(nofl_block_has_flag(block, NOFL_BLOCK_PAGED_OUT) + ? &space->paged_out[NOFL_PAGE_OUT_QUEUE_SIZE-1] + : &space->paged_out[0], + block, lock); +} + +static struct nofl_block_ref +nofl_pop_unavailable_block(struct nofl_space *space, + const struct gc_lock *lock) { + for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) { + struct nofl_block_ref block = + nofl_block_stack_pop(&space->paged_out[age], lock); + if (!nofl_block_is_null(block)) { + nofl_block_clear_flag(block, NOFL_BLOCK_UNAVAILABLE); + return block; + } + } + return nofl_block_null(); +} + +static void +nofl_push_empty_block(struct nofl_space *space, + struct nofl_block_ref block, + const struct gc_lock *lock) { + nofl_block_stack_push(&space->empty, block, lock); +} + +static struct nofl_block_ref +nofl_pop_empty_block_with_lock(struct nofl_space *space, + const struct gc_lock *lock) { + return nofl_block_stack_pop(&space->empty, lock); +} + +static struct nofl_block_ref +nofl_pop_empty_block(struct nofl_space *space) { + struct gc_lock lock = nofl_space_lock(space); + struct nofl_block_ref ret = nofl_pop_empty_block_with_lock(space, &lock); + gc_lock_release(&lock); + return ret; +} + +static size_t +nofl_active_block_count(struct nofl_space *space) { + size_t total = space->nslabs * NOFL_NONMETA_BLOCKS_PER_SLAB; + size_t unavailable = 0; + for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) + unavailable += nofl_block_count(&space->paged_out[age].list); + GC_ASSERT(unavailable <= total); + return total - unavailable; +} + +static int +nofl_maybe_push_evacuation_target(struct nofl_space *space, + struct nofl_block_ref block, + double reserve) { + size_t targets = nofl_block_count(&space->evacuation_targets); + size_t active = nofl_active_block_count(space); + if (targets >= active * reserve) + return 0; + + nofl_block_list_push(&space->evacuation_targets, block); + return 1; +} + +static int +nofl_push_evacuation_target_if_needed(struct nofl_space *space, + struct nofl_block_ref block) { + return nofl_maybe_push_evacuation_target(space, block, + space->evacuation_minimum_reserve); +} + +static int +nofl_push_evacuation_target_if_possible(struct nofl_space *space, + struct nofl_block_ref block) { + return nofl_maybe_push_evacuation_target(space, block, + space->evacuation_reserve); +} + +static inline void +nofl_clear_memory(uintptr_t addr, size_t size) { + memset((char*)addr, 0, size); +} + +static size_t +nofl_space_live_object_granules(uint8_t *metadata) { + return scan_for_byte_with_bits(metadata, -1, NOFL_METADATA_BYTE_END) + 1; +} + +static void +nofl_allocator_reset(struct nofl_allocator *alloc) { + alloc->alloc = alloc->sweep = 0; + alloc->block = nofl_block_null(); +} + +static int +nofl_should_promote_block(struct nofl_space *space, + struct nofl_block_ref block) { + // If this block has mostly survivors, we can promote it to the old + // generation. Old-generation blocks won't be used for allocation + // until after the next full GC. + if (!GC_GENERATIONAL) return 0; + size_t threshold = NOFL_GRANULES_PER_BLOCK * space->promotion_threshold; + return block.summary->hole_granules < threshold; +} + +static void +nofl_allocator_release_full_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + GC_ASSERT(nofl_allocator_has_block(alloc)); + struct nofl_block_ref block = alloc->block; + GC_ASSERT(alloc->alloc == alloc->sweep); + atomic_fetch_add(&space->allocated_granules_since_last_collection, + block.summary->hole_granules); + atomic_fetch_add(&space->survivor_granules_at_last_collection, + NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules); + atomic_fetch_add(&space->fragmentation_granules_since_last_collection, + block.summary->fragmentation_granules); + + if (nofl_should_promote_block(space, block)) + nofl_block_list_push(&space->promoted, block); + else + nofl_block_list_push(&space->full, block); + + nofl_allocator_reset(alloc); +} + +static void +nofl_allocator_release_full_evacuation_target(struct nofl_allocator *alloc, + struct nofl_space *space) { + GC_ASSERT(nofl_allocator_has_block(alloc)); + struct nofl_block_ref block = alloc->block; + GC_ASSERT(alloc->alloc > block.addr); + GC_ASSERT(alloc->sweep == block.addr + NOFL_BLOCK_SIZE); + size_t hole_size = alloc->sweep - alloc->alloc; + // FIXME: Check how this affects statistics. + GC_ASSERT_EQ(block.summary->hole_count, 1); + GC_ASSERT_EQ(block.summary->hole_granules, NOFL_GRANULES_PER_BLOCK); + atomic_fetch_add(&space->old_generation_granules, + NOFL_GRANULES_PER_BLOCK); + if (hole_size) { + hole_size >>= NOFL_GRANULE_SIZE_LOG_2; + block.summary->holes_with_fragmentation = 1; + block.summary->fragmentation_granules = hole_size / NOFL_GRANULE_SIZE; + } else { + GC_ASSERT_EQ(block.summary->fragmentation_granules, 0); + GC_ASSERT_EQ(block.summary->holes_with_fragmentation, 0); + } + nofl_block_list_push(&space->old, block); + nofl_allocator_reset(alloc); +} + +static void +nofl_allocator_release_partly_full_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + // A block can go on the partly full list if it has exactly one + // hole, located at the end of the block. + GC_ASSERT(nofl_allocator_has_block(alloc)); + struct nofl_block_ref block = alloc->block; + GC_ASSERT(alloc->alloc > block.addr); + GC_ASSERT(alloc->sweep == block.addr + NOFL_BLOCK_SIZE); + size_t hole_size = alloc->sweep - alloc->alloc; + GC_ASSERT(hole_size); + block.summary->fragmentation_granules = hole_size / NOFL_GRANULE_SIZE; + struct gc_lock lock = nofl_space_lock(space); + nofl_block_stack_push(&space->partly_full, block, &lock); + gc_lock_release(&lock); + nofl_allocator_reset(alloc); +} + +static size_t +nofl_allocator_acquire_partly_full_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + struct gc_lock lock = nofl_space_lock(space); + struct nofl_block_ref block = nofl_block_stack_pop(&space->partly_full, + &lock); + gc_lock_release(&lock); + if (nofl_block_is_null(block)) + return 0; + GC_ASSERT_EQ(block.summary->holes_with_fragmentation, 0); + alloc->block = block; + alloc->sweep = block.addr + NOFL_BLOCK_SIZE; + size_t hole_granules = block.summary->fragmentation_granules; + block.summary->fragmentation_granules = 0; + alloc->alloc = alloc->sweep - (hole_granules << NOFL_GRANULE_SIZE_LOG_2); + return hole_granules; +} + +static size_t +nofl_allocator_acquire_empty_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + struct nofl_block_ref block = nofl_pop_empty_block(space); + if (nofl_block_is_null(block)) + return 0; + block.summary->hole_count = 1; + block.summary->hole_granules = NOFL_GRANULES_PER_BLOCK; + block.summary->holes_with_fragmentation = 0; + block.summary->fragmentation_granules = 0; + alloc->block = block; + alloc->alloc = block.addr; + alloc->sweep = block.addr + NOFL_BLOCK_SIZE; + if (nofl_block_has_flag(block, NOFL_BLOCK_ZERO)) + nofl_block_clear_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + else + nofl_clear_memory(block.addr, NOFL_BLOCK_SIZE); + return NOFL_GRANULES_PER_BLOCK; +} + +static size_t +nofl_allocator_acquire_evacuation_target(struct nofl_allocator* alloc, + struct nofl_space *space) { + size_t granules = nofl_allocator_acquire_partly_full_block(alloc, space); + if (granules) + return granules; + return nofl_allocator_acquire_empty_block(alloc, space); +} + +static void +nofl_allocator_finish_hole(struct nofl_allocator *alloc) { + size_t granules = (alloc->sweep - alloc->alloc) / NOFL_GRANULE_SIZE; + if (granules) { + alloc->block.summary->holes_with_fragmentation++; + alloc->block.summary->fragmentation_granules += granules; + alloc->alloc = alloc->sweep; + } +} + +static inline int +nofl_metadata_byte_has_mark(uint8_t byte, uint8_t marked) { + return (byte & NOFL_METADATA_BYTE_MARK_MASK) == marked; +} + +static inline int +nofl_metadata_byte_is_young_or_has_mark(uint8_t byte, uint8_t marked) { + return (nofl_metadata_byte_has_mark(byte, NOFL_METADATA_BYTE_YOUNG) + || nofl_metadata_byte_has_mark(byte, marked)); +} + +// Sweep some heap to reclaim free space, advancing alloc->alloc and +// alloc->sweep. Return the size of the hole in granules, or 0 if we +// reached the end of the block. +static size_t +nofl_allocator_next_hole_in_block(struct nofl_allocator *alloc, + uint8_t survivor_mark) { + GC_ASSERT(nofl_allocator_has_block(alloc)); + GC_ASSERT_EQ(alloc->alloc, alloc->sweep); + uintptr_t sweep = alloc->sweep; + uintptr_t limit = alloc->block.addr + NOFL_BLOCK_SIZE; + + if (sweep == limit) + return 0; + + GC_ASSERT((sweep & (NOFL_GRANULE_SIZE - 1)) == 0); + uint8_t* metadata = nofl_metadata_byte_for_addr(sweep); + size_t limit_granules = (limit - sweep) >> NOFL_GRANULE_SIZE_LOG_2; + + // Except for when we first get a block, alloc->sweep is positioned + // right after a hole, which can point to either the end of the + // block or to a live object. Assume that a live object is more + // common. + while (limit_granules && + nofl_metadata_byte_has_mark(metadata[0], survivor_mark)) { + // Object survived collection; skip over it and continue sweeping. + size_t object_granules = nofl_space_live_object_granules(metadata); + sweep += object_granules * NOFL_GRANULE_SIZE; + limit_granules -= object_granules; + metadata += object_granules; + } + if (!limit_granules) { + GC_ASSERT_EQ(sweep, limit); + alloc->alloc = alloc->sweep = limit; + return 0; + } + + size_t hole_granules = scan_for_byte_with_tag(metadata, limit_granules, + NOFL_METADATA_BYTE_MARK_MASK, + survivor_mark); + size_t free_bytes = hole_granules * NOFL_GRANULE_SIZE; + GC_ASSERT(hole_granules); + GC_ASSERT(hole_granules <= limit_granules); + + memset(metadata, 0, hole_granules); + memset((char*)sweep, 0, free_bytes); + + alloc->block.summary->hole_count++; + GC_ASSERT(hole_granules <= + NOFL_GRANULES_PER_BLOCK - alloc->block.summary->hole_granules); + alloc->block.summary->hole_granules += hole_granules; + + alloc->alloc = sweep; + alloc->sweep = sweep + free_bytes; + return hole_granules; +} + +static void +nofl_allocator_finish_sweeping_in_block(struct nofl_allocator *alloc, + uint8_t survivor_mark) { + do { + nofl_allocator_finish_hole(alloc); + } while (nofl_allocator_next_hole_in_block(alloc, survivor_mark)); +} + +static void +nofl_allocator_release_block(struct nofl_allocator *alloc, + struct nofl_space *space) { + GC_ASSERT(nofl_allocator_has_block(alloc)); + if (alloc->alloc < alloc->sweep && + alloc->sweep == alloc->block.addr + NOFL_BLOCK_SIZE && + alloc->block.summary->holes_with_fragmentation == 0) { + nofl_allocator_release_partly_full_block(alloc, space); + } else if (space->evacuating) { + nofl_allocator_release_full_evacuation_target(alloc, space); + } else { + nofl_allocator_finish_sweeping_in_block(alloc, space->survivor_mark); + nofl_allocator_release_full_block(alloc, space); + } +} + +static void +nofl_allocator_finish(struct nofl_allocator *alloc, struct nofl_space *space) { + if (nofl_allocator_has_block(alloc)) + nofl_allocator_release_block(alloc, space); +} + +static int +nofl_allocator_acquire_block_to_sweep(struct nofl_allocator *alloc, + struct nofl_space *space) { + struct nofl_block_ref block = nofl_block_list_pop(&space->to_sweep); + if (nofl_block_is_null(block)) + return 0; + alloc->block = block; + alloc->alloc = alloc->sweep = block.addr; + return 1; +} + +static size_t +nofl_allocator_next_hole(struct nofl_allocator *alloc, + struct nofl_space *space) { + nofl_allocator_finish_hole(alloc); + + // Sweep current block for a hole. + if (nofl_allocator_has_block(alloc)) { + size_t granules = + nofl_allocator_next_hole_in_block(alloc, space->survivor_mark); + if (granules) + return granules; + else + nofl_allocator_release_full_block(alloc, space); + GC_ASSERT(!nofl_allocator_has_block(alloc)); + } + + while (nofl_allocator_acquire_block_to_sweep(alloc, space)) { + // This block was marked in the last GC and needs sweeping. + // As we sweep we'll want to record how many bytes were live + // at the last collection. As we allocate we'll record how + // many granules were wasted because of fragmentation. + alloc->block.summary->hole_count = 0; + alloc->block.summary->hole_granules = 0; + alloc->block.summary->holes_with_fragmentation = 0; + alloc->block.summary->fragmentation_granules = 0; + size_t granules = + nofl_allocator_next_hole_in_block(alloc, space->survivor_mark); + if (granules) + return granules; + nofl_allocator_release_full_block(alloc, space); + } + + { + size_t granules = nofl_allocator_acquire_partly_full_block(alloc, space); + if (granules) + return granules; + } + + // We are done sweeping for blocks. Now take from the empties list. + if (nofl_allocator_acquire_empty_block(alloc, space)) + return NOFL_GRANULES_PER_BLOCK; + + // Couldn't acquire another block; return 0 to cause collection. + return 0; +} + +static struct gc_ref +nofl_allocate(struct nofl_allocator *alloc, struct nofl_space *space, + size_t size, void (*gc)(void*), void *gc_data, + enum gc_allocation_kind kind) { + GC_ASSERT(size > 0); + GC_ASSERT(size <= gc_allocator_large_threshold()); + size = align_up(size, NOFL_GRANULE_SIZE); + + if (alloc->alloc + size > alloc->sweep) { + size_t granules = size >> NOFL_GRANULE_SIZE_LOG_2; + while (1) { + size_t hole = nofl_allocator_next_hole(alloc, space); + if (hole >= granules) { + break; + } + if (!hole) + gc(gc_data); + } + } + + struct gc_ref ret = gc_ref(alloc->alloc); + alloc->alloc += size; + gc_update_alloc_table(ret, size, kind); + return ret; +} + +static struct gc_ref +nofl_evacuation_allocate(struct nofl_allocator* alloc, struct nofl_space *space, + size_t granules) { + size_t avail = (alloc->sweep - alloc->alloc) >> NOFL_GRANULE_SIZE_LOG_2; + while (avail < granules) { + if (nofl_allocator_has_block(alloc)) + // No need to finish the hole, these mark bytes are zero. + nofl_allocator_release_full_evacuation_target(alloc, space); + avail = nofl_allocator_acquire_evacuation_target(alloc, space); + if (!avail) + return gc_ref_null(); + } + + struct gc_ref ret = gc_ref(alloc->alloc); + alloc->alloc += granules * NOFL_GRANULE_SIZE; + // Caller is responsible for updating alloc table. + return ret; +} + +// Another thread is triggering GC. Before we stop, finish clearing the +// dead mark bytes for the mutator's block, and release the block. +static void +nofl_finish_sweeping(struct nofl_allocator *alloc, + struct nofl_space *space) { + while (nofl_allocator_next_hole(alloc, space)) {} +} + +static inline int +nofl_is_ephemeron(struct gc_ref ref) { + uint8_t meta = *nofl_metadata_byte_for_addr(gc_ref_value(ref)); + uint8_t kind = meta & NOFL_METADATA_BYTE_TRACE_KIND_MASK; + return kind == NOFL_METADATA_BYTE_TRACE_EPHEMERON; +} + +static void +nofl_space_set_ephemeron_flag(struct gc_ref ref) { + if (gc_has_conservative_intraheap_edges()) { + uint8_t *metadata = nofl_metadata_byte_for_addr(gc_ref_value(ref)); + uint8_t byte = *metadata & ~NOFL_METADATA_BYTE_TRACE_KIND_MASK; + *metadata = byte | NOFL_METADATA_BYTE_TRACE_EPHEMERON; + } +} + +struct gc_trace_worker; + +static inline int +nofl_space_contains_address(struct nofl_space *space, uintptr_t addr) { + return extents_contain_addr(space->extents, addr); +} + +static inline int +nofl_space_contains_conservative_ref(struct nofl_space *space, + struct gc_conservative_ref ref) { + return nofl_space_contains_address(space, gc_conservative_ref_value(ref)); +} + +static inline int +nofl_space_contains(struct nofl_space *space, struct gc_ref ref) { + return nofl_space_contains_address(space, gc_ref_value(ref)); +} + +static inline int +nofl_space_contains_edge(struct nofl_space *space, struct gc_edge edge) { + return nofl_space_contains_address(space, gc_edge_address(edge)); +} + +static inline int +nofl_space_is_survivor(struct nofl_space *space, struct gc_ref ref) { + uint8_t *metadata = nofl_metadata_byte_for_object(ref); + uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed); + return nofl_metadata_byte_has_mark(byte, space->survivor_mark); +} + +static uint8_t* +nofl_field_logged_byte(struct gc_edge edge) { + return nofl_metadata_byte_for_addr(gc_edge_address(edge)); +} + +static uint8_t +nofl_field_logged_bit(struct gc_edge edge) { + GC_ASSERT_EQ(sizeof(uintptr_t) * 2, NOFL_GRANULE_SIZE); + size_t field = gc_edge_address(edge) / sizeof(uintptr_t); + return NOFL_METADATA_BYTE_LOGGED_0 << (field % 2); +} + +static int +nofl_space_remember_edge(struct nofl_space *space, struct gc_ref obj, + struct gc_edge edge) { + GC_ASSERT(nofl_space_contains(space, obj)); + if (!GC_GENERATIONAL) return 0; + if (!nofl_space_is_survivor(space, obj)) + return 0; + uint8_t* loc = nofl_field_logged_byte(edge); + uint8_t bit = nofl_field_logged_bit(edge); + uint8_t byte = atomic_load_explicit(loc, memory_order_acquire); + do { + if (byte & bit) return 0; + } while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit, + memory_order_acq_rel, + memory_order_acquire)); + return 1; +} + +static void +nofl_space_forget_edge(struct nofl_space *space, struct gc_edge edge) { + GC_ASSERT(nofl_space_contains_edge(space, edge)); + GC_ASSERT(GC_GENERATIONAL); + uint8_t* loc = nofl_field_logged_byte(edge); + if (GC_DEBUG) { + pthread_mutex_lock(&space->lock); + uint8_t bit = nofl_field_logged_bit(edge); + GC_ASSERT(*loc & bit); + *loc &= ~bit; + pthread_mutex_unlock(&space->lock); + } else { + // In release mode, race to clear both bits at once. + uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); + byte &= ~(NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1); + atomic_store_explicit(loc, byte, memory_order_relaxed); + } +} + +static void +nofl_space_reset_statistics(struct nofl_space *space) { + space->survivor_granules_at_last_collection = 0; + space->allocated_granules_since_last_collection = 0; + space->fragmentation_granules_since_last_collection = 0; +} + +static size_t +nofl_space_live_size_at_last_collection(struct nofl_space *space) { + size_t granules = space->old_generation_granules + + space->survivor_granules_at_last_collection; + return granules * NOFL_GRANULE_SIZE; +} + +static void +nofl_space_add_to_allocation_counter(struct nofl_space *space, + uint64_t *counter) { + *counter += + atomic_load_explicit(&space->allocated_granules_since_last_collection, + memory_order_relaxed) * NOFL_GRANULE_SIZE; +} + +static size_t +nofl_space_estimate_live_bytes_after_gc(struct nofl_space *space, + double last_yield) +{ + // The nofl space mostly traces via marking, and as such doesn't precisely + // know the live data size until after sweeping. But it is important to + // promptly compute the live size so that we can grow the heap if + // appropriate. Therefore sometimes we will estimate the live data size + // instead of measuring it precisely. + size_t bytes = 0; + bytes += nofl_block_count(&space->full) * NOFL_BLOCK_SIZE; + bytes += nofl_block_count(&space->partly_full.list) * NOFL_BLOCK_SIZE / 2; + GC_ASSERT_EQ(nofl_block_count(&space->promoted), 0); + bytes += space->old_generation_granules * NOFL_GRANULE_SIZE; + bytes += + nofl_block_count(&space->to_sweep) * NOFL_BLOCK_SIZE * (1 - last_yield); + + DEBUG("--- nofl estimate before adjustment: %zu\n", bytes); +/* + // Assume that if we have pending unavailable bytes after GC that there is a + // large object waiting to be allocated, and that probably it survives this GC + // cycle. + bytes += atomic_load_explicit(&space->pending_unavailable_bytes, + memory_order_acquire); + DEBUG("--- nofl estimate after adjustment: %zu\n", bytes); +*/ + return bytes; +} + +static size_t +nofl_space_evacuation_reserve_bytes(struct nofl_space *space) { + return nofl_block_count(&space->evacuation_targets) * NOFL_BLOCK_SIZE; +} + +static size_t +nofl_space_fragmentation(struct nofl_space *space) { + size_t young = space->fragmentation_granules_since_last_collection; + GC_ASSERT(nofl_block_count(&space->old) * NOFL_GRANULES_PER_BLOCK >= + space->old_generation_granules); + size_t old = nofl_block_count(&space->old) * NOFL_GRANULES_PER_BLOCK - + space->old_generation_granules; + return (young + old) * NOFL_GRANULE_SIZE; +} + +static void +nofl_space_prepare_evacuation(struct nofl_space *space) { + GC_ASSERT(!space->evacuating); + struct nofl_block_ref block; + struct gc_lock lock = nofl_space_lock(space); + while (!nofl_block_is_null + (block = nofl_block_list_pop(&space->evacuation_targets))) + nofl_push_empty_block(space, block, &lock); + gc_lock_release(&lock); + // Blocks are either to_sweep, empty, or unavailable. + GC_ASSERT_EQ(nofl_block_count(&space->partly_full.list), 0); + GC_ASSERT_EQ(nofl_block_count(&space->full), 0); + GC_ASSERT_EQ(nofl_block_count(&space->promoted), 0); + GC_ASSERT_EQ(nofl_block_count(&space->old), 0); + GC_ASSERT_EQ(nofl_block_count(&space->evacuation_targets), 0); + size_t target_blocks = nofl_block_count(&space->empty.list); + DEBUG("evacuation target block count: %zu\n", target_blocks); + + if (target_blocks == 0) { + DEBUG("no evacuation target blocks, not evacuating this round\n"); + return; + } + + // Put the mutator into evacuation mode, collecting up to 50% of free + // space as evacuation blocks. + space->evacuation_reserve = 0.5; + space->evacuating = 1; + + size_t target_granules = target_blocks * NOFL_GRANULES_PER_BLOCK; + // Compute histogram where domain is the number of granules in a block + // that survived the last collection, aggregated into 33 buckets, and + // range is number of blocks in that bucket. (Bucket 0 is for blocks + // that were found to be completely empty; such blocks may be on the + // evacuation target list.) + const size_t bucket_count = 33; + size_t histogram[33] = {0,}; + size_t bucket_size = NOFL_GRANULES_PER_BLOCK / 32; + for (struct nofl_block_ref b = nofl_block_for_addr(space->to_sweep.blocks); + !nofl_block_is_null(b); + b = nofl_block_next(b)) { + size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->hole_granules; + size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size; + histogram[bucket]++; + } + + // Now select a number of blocks that is likely to fill the space in + // the target blocks. Prefer candidate blocks with fewer survivors + // from the last GC, to increase expected free block yield. + for (size_t bucket = 0; bucket < bucket_count; bucket++) { + size_t bucket_granules = bucket * bucket_size * histogram[bucket]; + if (bucket_granules <= target_granules) { + target_granules -= bucket_granules; + } else { + histogram[bucket] = target_granules / (bucket_size * bucket); + target_granules = 0; + } + } + + // Having selected the number of blocks, now we set the evacuation + // candidate flag on all blocks that have live objects. + for (struct nofl_block_ref b = nofl_block_for_addr(space->to_sweep.blocks); + !nofl_block_is_null(b); + b = nofl_block_next(b)) { + size_t survivor_granules = NOFL_GRANULES_PER_BLOCK - b.summary->hole_granules; + size_t bucket = (survivor_granules + bucket_size - 1) / bucket_size; + if (histogram[bucket]) { + nofl_block_set_flag(b, NOFL_BLOCK_EVACUATE); + histogram[bucket]--; + } else { + nofl_block_clear_flag(b, NOFL_BLOCK_EVACUATE); + } + } +} + +static void +nofl_space_clear_block_marks(struct nofl_space *space) { + for (size_t s = 0; s < space->nslabs; s++) { + struct nofl_slab *slab = space->slabs[s]; + memset(slab->header.block_marks, 0, sizeof(slab->header.block_marks)); + } +} + +static void +nofl_space_prepare_gc(struct nofl_space *space, enum gc_collection_kind kind) { + int is_minor = kind == GC_COLLECTION_MINOR; + if (!is_minor) { + space->current_mark = nofl_advance_current_mark(space->current_mark); + nofl_space_clear_block_marks(space); + } +} + +static void +nofl_space_start_gc(struct nofl_space *space, enum gc_collection_kind gc_kind) { + GC_ASSERT_EQ(nofl_block_count(&space->to_sweep), 0); + + // Any block that was the target of allocation in the last cycle will need to + // be swept next cycle. + struct nofl_block_ref block; + while (!nofl_block_is_null + (block = nofl_block_list_pop(&space->partly_full.list))) + nofl_block_list_push(&space->to_sweep, block); + while (!nofl_block_is_null(block = nofl_block_list_pop(&space->full))) + nofl_block_list_push(&space->to_sweep, block); + + if (gc_kind != GC_COLLECTION_MINOR) { + while (!nofl_block_is_null(block = nofl_block_list_pop(&space->promoted))) + nofl_block_list_push(&space->to_sweep, block); + while (!nofl_block_is_null(block = nofl_block_list_pop(&space->old))) + nofl_block_list_push(&space->to_sweep, block); + space->old_generation_granules = 0; + } + + if (gc_kind == GC_COLLECTION_COMPACTING) + nofl_space_prepare_evacuation(space); +} + +static void +nofl_space_finish_evacuation(struct nofl_space *space, + const struct gc_lock *lock) { + // When evacuation began, the evacuation reserve was moved to the + // empties list. Now that evacuation is finished, attempt to + // repopulate the reserve. + GC_ASSERT(space->evacuating); + space->evacuating = 0; + size_t active = nofl_active_block_count(space); + size_t reserve = space->evacuation_minimum_reserve * active; + GC_ASSERT(nofl_block_count(&space->evacuation_targets) == 0); + while (reserve--) { + struct nofl_block_ref block = nofl_pop_empty_block_with_lock(space, lock); + if (nofl_block_is_null(block)) break; + nofl_block_list_push(&space->evacuation_targets, block); + } +} + +static void +nofl_space_promote_blocks(struct nofl_space *space) { + struct nofl_block_ref block; + while (!nofl_block_is_null(block = nofl_block_list_pop(&space->promoted))) { + block.summary->hole_count = 0; + block.summary->hole_granules = 0; + block.summary->holes_with_fragmentation = 0; + block.summary->fragmentation_granules = 0; + struct nofl_allocator alloc = { block.addr, block.addr, block }; + nofl_allocator_finish_sweeping_in_block(&alloc, space->current_mark); + atomic_fetch_add(&space->old_generation_granules, + NOFL_GRANULES_PER_BLOCK - block.summary->hole_granules); + nofl_block_list_push(&space->old, block); + } +} + +static inline size_t +nofl_size_to_granules(size_t size) { + return (size + NOFL_GRANULE_SIZE - 1) >> NOFL_GRANULE_SIZE_LOG_2; +} + +static void +nofl_space_verify_sweepable_blocks(struct nofl_space *space, + struct nofl_block_list *list) +{ + if (GC_CONSERVATIVE_TRACE) + // No intrinsic way to measure object size, only the extrinsic + // metadata bytes. + return; + for (struct nofl_block_ref b = nofl_block_for_addr(list->blocks); + !nofl_block_is_null(b); + b = nofl_block_next(b)) { + // Iterate objects in the block, verifying that the END bytes correspond to + // the measured object size. + uintptr_t addr = b.addr; + uintptr_t limit = addr + NOFL_BLOCK_SIZE; + uint8_t *meta = nofl_metadata_byte_for_addr(b.addr); + while (addr < limit) { + if (nofl_metadata_byte_has_mark(meta[0], space->current_mark)) { + struct gc_ref obj = gc_ref(addr); + size_t obj_bytes; + gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); + size_t granules = nofl_size_to_granules(obj_bytes); + GC_ASSERT(granules); + for (size_t granule = 0; granule < granules - 1; granule++) + GC_ASSERT(!(meta[granule] & NOFL_METADATA_BYTE_END)); + GC_ASSERT(meta[granules - 1] & NOFL_METADATA_BYTE_END); + meta += granules; + addr += granules * NOFL_GRANULE_SIZE; + } else { + meta++; + addr += NOFL_GRANULE_SIZE; + } + } + GC_ASSERT(addr == limit); + } +} + +static void +nofl_space_verify_swept_blocks(struct nofl_space *space, + struct nofl_block_list *list) { + if (GC_CONSERVATIVE_TRACE) + // No intrinsic way to measure object size, only the extrinsic + // metadata bytes. + return; + for (struct nofl_block_ref b = nofl_block_for_addr(list->blocks); + !nofl_block_is_null(b); + b = nofl_block_next(b)) { + // Iterate objects in the block, verifying that the END bytes correspond to + // the measured object size. + uintptr_t addr = b.addr; + uintptr_t limit = addr + NOFL_BLOCK_SIZE; + uint8_t *meta = nofl_metadata_byte_for_addr(addr); + while (addr < limit) { + if (meta[0]) { + GC_ASSERT(nofl_metadata_byte_has_mark(meta[0], space->current_mark)); + struct gc_ref obj = gc_ref(addr); + size_t obj_bytes; + gc_trace_object(obj, NULL, NULL, NULL, &obj_bytes); + size_t granules = nofl_size_to_granules(obj_bytes); + GC_ASSERT(granules); + for (size_t granule = 0; granule < granules - 1; granule++) + GC_ASSERT(!(meta[granule] & NOFL_METADATA_BYTE_END)); + GC_ASSERT(meta[granules - 1] & NOFL_METADATA_BYTE_END); + meta += granules; + addr += granules * NOFL_GRANULE_SIZE; + } else { + meta++; + addr += NOFL_GRANULE_SIZE; + } + } + GC_ASSERT(addr == limit); + } +} + +static void +nofl_space_verify_empty_blocks(struct nofl_space *space, + struct nofl_block_list *list, + int paged_in) { + for (struct nofl_block_ref b = nofl_block_for_addr(list->blocks); + !nofl_block_is_null(b); + b = nofl_block_next(b)) { + // Iterate objects in the block, verifying that the END bytes correspond to + // the measured object size. + uintptr_t addr = b.addr; + uintptr_t limit = addr + NOFL_BLOCK_SIZE; + uint8_t *meta = nofl_metadata_byte_for_addr(addr); + while (addr < limit) { + GC_ASSERT_EQ(*meta, 0); + if (paged_in && nofl_block_has_flag(b, NOFL_BLOCK_ZERO)) { + char zeroes[NOFL_GRANULE_SIZE] = { 0, }; + GC_ASSERT_EQ(memcmp((char*)addr, zeroes, NOFL_GRANULE_SIZE), 0); + } + meta++; + addr += NOFL_GRANULE_SIZE; + } + GC_ASSERT(addr == limit); + } +} + +static void +nofl_space_verify_before_restart(struct nofl_space *space) { + nofl_space_verify_sweepable_blocks(space, &space->to_sweep); + nofl_space_verify_sweepable_blocks(space, &space->promoted); + // If there are full or partly full blocks, they were filled during + // evacuation. + nofl_space_verify_swept_blocks(space, &space->partly_full.list); + nofl_space_verify_swept_blocks(space, &space->full); + nofl_space_verify_swept_blocks(space, &space->old); + nofl_space_verify_empty_blocks(space, &space->empty.list, 1); + for (int age = 0; age < NOFL_PAGE_OUT_QUEUE_SIZE; age++) + nofl_space_verify_empty_blocks(space, &space->paged_out[age].list, 0); + // GC_ASSERT(space->last_collection_was_minor || !nofl_block_count(&space->old)); +} + +static void +nofl_space_finish_gc(struct nofl_space *space, + enum gc_collection_kind gc_kind) { + space->last_collection_was_minor = (gc_kind == GC_COLLECTION_MINOR); + struct gc_lock lock = nofl_space_lock(space); + if (space->evacuating) + nofl_space_finish_evacuation(space, &lock); + else { + space->evacuation_reserve = space->evacuation_minimum_reserve; + // If we were evacuating and preferentially allocated empty blocks + // to the evacuation reserve, return those blocks to the empty set + // for allocation by the mutator. + size_t active = nofl_active_block_count(space); + size_t target = space->evacuation_minimum_reserve * active; + size_t reserve = nofl_block_count(&space->evacuation_targets); + while (reserve-- > target) + nofl_push_empty_block(space, + nofl_block_list_pop(&space->evacuation_targets), + &lock); + } + + { + struct nofl_block_list to_sweep = {0,}; + struct nofl_block_ref block; + while (!nofl_block_is_null(block = nofl_block_list_pop(&space->to_sweep))) { + if (nofl_block_is_marked(block.addr)) { + nofl_block_list_push(&to_sweep, block); + } else { + // Block is empty. + memset(nofl_metadata_byte_for_addr(block.addr), 0, + NOFL_GRANULES_PER_BLOCK); + if (!nofl_push_evacuation_target_if_possible(space, block)) + nofl_push_empty_block(space, block, &lock); + } + } + atomic_store_explicit(&space->to_sweep.count, to_sweep.count, + memory_order_release); + atomic_store_explicit(&space->to_sweep.blocks, to_sweep.blocks, + memory_order_release); + } + + // FIXME: Promote concurrently instead of during the pause. + gc_lock_release(&lock); + nofl_space_promote_blocks(space); + nofl_space_reset_statistics(space); + space->survivor_mark = space->current_mark; + if (GC_DEBUG) + nofl_space_verify_before_restart(space); +} + +static ssize_t +nofl_space_request_release_memory(struct nofl_space *space, size_t bytes) { + return atomic_fetch_add(&space->pending_unavailable_bytes, bytes) + bytes; +} + +static ssize_t +nofl_space_maybe_reacquire_memory(struct nofl_space *space, size_t bytes) { + ssize_t pending = + atomic_fetch_sub(&space->pending_unavailable_bytes, bytes) - bytes; + struct gc_lock lock = nofl_space_lock(space); + while (pending + NOFL_BLOCK_SIZE <= 0) { + struct nofl_block_ref block = nofl_pop_unavailable_block(space, &lock); + if (nofl_block_is_null(block)) break; + if (!nofl_push_evacuation_target_if_needed(space, block)) + nofl_push_empty_block(space, block, &lock); + pending = atomic_fetch_add(&space->pending_unavailable_bytes, NOFL_BLOCK_SIZE) + + NOFL_BLOCK_SIZE; + } + gc_lock_release(&lock); + return pending; +} + +static inline int +nofl_space_should_evacuate(struct nofl_space *space, uint8_t metadata_byte, + struct gc_ref obj) { + if (gc_has_conservative_intraheap_edges()) + return 0; + if (!space->evacuating) + return 0; + if (metadata_byte & NOFL_METADATA_BYTE_PINNED) + return 0; + return nofl_block_has_flag(nofl_block_for_addr(gc_ref_value(obj)), + NOFL_BLOCK_EVACUATE); +} + +static inline int +nofl_space_set_mark_relaxed(struct nofl_space *space, uint8_t *metadata, + uint8_t byte) { + uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK; + atomic_store_explicit(metadata, + (byte & ~mask) | space->current_mark, + memory_order_relaxed); + return 1; +} + +static inline int +nofl_space_set_mark(struct nofl_space *space, uint8_t *metadata, uint8_t byte) { + uint8_t mask = NOFL_METADATA_BYTE_MARK_MASK; + atomic_store_explicit(metadata, + (byte & ~mask) | space->current_mark, + memory_order_release); + return 1; +} + +static inline int +nofl_space_set_nonempty_mark(struct nofl_space *space, uint8_t *metadata, + uint8_t byte, struct gc_ref ref) { + // FIXME: Check that relaxed atomics are actually worth it. + nofl_space_set_mark_relaxed(space, metadata, byte); + nofl_block_set_mark(gc_ref_value(ref)); + return 1; +} + +static inline void +nofl_space_pin_object(struct nofl_space *space, struct gc_ref ref) { + // For the heap-conservative configuration, all objects are pinned, and we use + // the pinned bit instead to identify an object's trace kind. + if (gc_has_conservative_intraheap_edges()) + return; + uint8_t *metadata = nofl_metadata_byte_for_object(ref); + uint8_t byte = atomic_load_explicit(metadata, memory_order_relaxed); + if (byte & NOFL_METADATA_BYTE_PINNED) + return; + uint8_t new_byte; + do { + new_byte = byte | NOFL_METADATA_BYTE_PINNED; + } while (!atomic_compare_exchange_weak_explicit(metadata, &byte, new_byte, + memory_order_acq_rel, + memory_order_acquire)); +} + +static inline uint8_t +clear_logged_bits_in_evacuated_object(uint8_t head, uint8_t *metadata, + size_t count) { + // On a major collection, it could be that we evacuate an object that + // has one or more fields in the old-to-new remembered set. Because + // the young generation is empty after a major collection, we know the + // old-to-new remembered set will be empty also. To clear the + // remembered set, we call gc_field_set_clear, which will end up + // visiting all remembered edges and clearing their logged bits. But + // that doesn't work for evacuated objects, because their edges move: + // gc_field_set_clear will frob the pre-evacuation metadata bytes of + // the object. So here we explicitly clear logged bits for evacuated + // objects. That the bits for the pre-evacuation location are also + // frobbed by gc_field_set_clear doesn't cause a problem, as that + // memory will be swept and cleared later. + // + // This concern doesn't apply to minor collections: there we will + // never evacuate an object in the remembered set, because old objects + // aren't traced during a minor collection. + uint8_t mask = NOFL_METADATA_BYTE_LOGGED_0 | NOFL_METADATA_BYTE_LOGGED_1; + for (size_t i = 1; i < count; i++) { + if (metadata[i] & mask) + metadata[i] &= ~mask; + } + return head & ~mask; +} + +static inline int +nofl_space_evacuate(struct nofl_space *space, uint8_t *metadata, uint8_t byte, + struct gc_edge edge, + struct gc_ref old_ref, + struct nofl_allocator *evacuate) { + struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref); + + if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) + gc_atomic_forward_acquire(&fwd); + + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + default: + // Impossible. + GC_CRASH(); + case GC_FORWARDING_STATE_ACQUIRED: { + // We claimed the object successfully. + + // First check again if someone else tried to evacuate this object and ended + // up marking in place instead. + byte = atomic_load_explicit(metadata, memory_order_acquire); + if (nofl_metadata_byte_has_mark(byte, space->current_mark)) { + // Indeed, already marked in place. + gc_atomic_forward_abort(&fwd); + return 0; + } + + // Otherwise, we try to evacuate. + size_t object_granules = nofl_space_live_object_granules(metadata); + struct gc_ref new_ref = nofl_evacuation_allocate(evacuate, space, + object_granules); + if (!gc_ref_is_null(new_ref)) { + // Whee, it works! Copy object contents before committing, as we don't + // know what part of the object (if any) will be overwritten by the + // commit. + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), + object_granules * NOFL_GRANULE_SIZE); + gc_atomic_forward_commit(&fwd, new_ref); + // Now update extent metadata, and indicate to the caller that + // the object's fields need to be traced. + uint8_t *new_metadata = nofl_metadata_byte_for_object(new_ref); + memcpy(new_metadata + 1, metadata + 1, object_granules - 1); + if (GC_GENERATIONAL) + byte = clear_logged_bits_in_evacuated_object(byte, new_metadata, + object_granules); + gc_edge_update(edge, new_ref); + return nofl_space_set_nonempty_mark(space, new_metadata, byte, + new_ref); + } else { + // Well shucks; allocation failed. Mark in place and then release the + // object. + nofl_space_set_mark(space, metadata, byte); + nofl_block_set_mark(gc_ref_value(old_ref)); + gc_atomic_forward_abort(&fwd); + return 1; + } + break; + } + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known, or evacuation aborts. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + break; + yield_for_spin(spin_count); + } + if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) + // Remove evacuation aborted; remote will mark and enqueue. + return 0; + ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); + // Fall through. + case GC_FORWARDING_STATE_FORWARDED: + // The object has been evacuated already. Update the edge; + // whoever forwarded the object will make sure it's eventually + // traced. + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 0; + } +} + +static inline int +nofl_space_evacuate_or_mark_object(struct nofl_space *space, + struct gc_edge edge, + struct gc_ref old_ref, + struct nofl_allocator *evacuate) { + uint8_t *metadata = nofl_metadata_byte_for_object(old_ref); + uint8_t byte = *metadata; + if (nofl_metadata_byte_has_mark(byte, space->current_mark)) + return 0; + + if (nofl_space_should_evacuate(space, byte, old_ref)) + return nofl_space_evacuate(space, metadata, byte, edge, old_ref, + evacuate); + + return nofl_space_set_nonempty_mark(space, metadata, byte, old_ref); +} + +static inline int +nofl_space_forward_if_evacuated(struct nofl_space *space, + struct gc_edge edge, + struct gc_ref ref) { + struct gc_atomic_forward fwd = gc_atomic_forward_begin(ref); + switch (fwd.state) { + case GC_FORWARDING_STATE_NOT_FORWARDED: + return 0; + case GC_FORWARDING_STATE_BUSY: + // Someone else claimed this object first. Spin until new address + // known, or evacuation aborts. + for (size_t spin_count = 0;; spin_count++) { + if (gc_atomic_forward_retry_busy(&fwd)) + break; + yield_for_spin(spin_count); + } + if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED) + // Remote evacuation aborted; remote will mark and enqueue. + return 1; + ASSERT(fwd.state == GC_FORWARDING_STATE_FORWARDED); + // Fall through. + case GC_FORWARDING_STATE_FORWARDED: + gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd))); + return 1; + default: + GC_CRASH(); + } +} + +static int +nofl_space_forward_or_mark_if_traced(struct nofl_space *space, + struct gc_edge edge, + struct gc_ref ref) { + uint8_t *metadata = nofl_metadata_byte_for_object(ref); + uint8_t byte = *metadata; + if (nofl_metadata_byte_has_mark(byte, space->current_mark)) + return 1; + + if (!nofl_space_should_evacuate(space, byte, ref)) + return 0; + + return nofl_space_forward_if_evacuated(space, edge, ref); +} + +static inline struct gc_ref +nofl_space_mark_conservative_ref(struct nofl_space *space, + struct gc_conservative_ref ref, + int possibly_interior) { + uintptr_t addr = gc_conservative_ref_value(ref); + + if (possibly_interior) { + addr = align_down(addr, NOFL_GRANULE_SIZE); + } else { + // Addr not an aligned granule? Not an object. + uintptr_t displacement = addr & (NOFL_GRANULE_SIZE - 1); + if (!gc_is_valid_conservative_ref_displacement(displacement)) + return gc_ref_null(); + addr -= displacement; + } + + // Addr in meta block? Not an object. + if ((addr & (NOFL_SLAB_SIZE - 1)) < NOFL_META_BLOCKS_PER_SLAB * NOFL_BLOCK_SIZE) + return gc_ref_null(); + + // Addr in block that has been paged out? Not an object. + if (nofl_block_has_flag(nofl_block_for_addr(addr), NOFL_BLOCK_UNAVAILABLE)) + return gc_ref_null(); + + uint8_t *loc = nofl_metadata_byte_for_addr(addr); + uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); + + // Already marked object? Nothing to do. + if (nofl_metadata_byte_has_mark(byte, space->current_mark)) + return gc_ref_null(); + + // Addr is the not start of an unmarked object? Search backwards if + // we have interior pointers, otherwise not an object. + if (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark)) { + if (!possibly_interior) + return gc_ref_null(); + + uintptr_t block_base = align_down(addr, NOFL_BLOCK_SIZE); + uint8_t *loc_base = nofl_metadata_byte_for_addr(block_base); + do { + // Searched past block? Not an object. + if (loc-- == loc_base) + return gc_ref_null(); + + byte = atomic_load_explicit(loc, memory_order_relaxed); + + // Ran into the end of some other allocation? Not an object, then. + if (byte & NOFL_METADATA_BYTE_END) + return gc_ref_null(); + // Object already marked? Nothing to do. + if (nofl_metadata_byte_has_mark(byte, space->current_mark)) + return gc_ref_null(); + + // Continue until we find object start. + } while (!nofl_metadata_byte_is_young_or_has_mark(byte, space->survivor_mark)); + + // Found object start, and object is unmarked; adjust addr. + addr = block_base + (loc - loc_base) * NOFL_GRANULE_SIZE; + } + + nofl_space_set_nonempty_mark(space, loc, byte, gc_ref(addr)); + + return gc_ref(addr); +} + +static inline size_t +nofl_space_object_size(struct nofl_space *space, struct gc_ref ref) { + uint8_t *loc = nofl_metadata_byte_for_object(ref); + size_t granules = nofl_space_live_object_granules(loc); + return granules * NOFL_GRANULE_SIZE; +} + +static inline enum gc_trace_kind +nofl_metadata_byte_trace_kind(uint8_t byte) +{ + switch (byte & NOFL_METADATA_BYTE_TRACE_KIND_MASK) { + case NOFL_METADATA_BYTE_TRACE_PRECISELY: + return GC_TRACE_PRECISELY; + case NOFL_METADATA_BYTE_TRACE_NONE: + return GC_TRACE_NONE; +#if GC_CONSERVATIVE_TRACE + case NOFL_METADATA_BYTE_TRACE_CONSERVATIVELY: + return GC_TRACE_CONSERVATIVELY; + case NOFL_METADATA_BYTE_TRACE_EPHEMERON: + return GC_TRACE_EPHEMERON; +#endif + default: + GC_CRASH(); + } +} +static inline struct gc_trace_plan +nofl_space_object_trace_plan(struct nofl_space *space, struct gc_ref ref) { + uint8_t *loc = nofl_metadata_byte_for_object(ref); + uint8_t byte = atomic_load_explicit(loc, memory_order_relaxed); + enum gc_trace_kind kind = nofl_metadata_byte_trace_kind(byte); + switch (kind) { + case GC_TRACE_PRECISELY: + case GC_TRACE_NONE: + return (struct gc_trace_plan){ kind, }; +#if GC_CONSERVATIVE_TRACE + case GC_TRACE_CONSERVATIVELY: { + size_t granules = nofl_space_live_object_granules(loc); + return (struct gc_trace_plan){ kind, granules * NOFL_GRANULE_SIZE }; + } + case GC_TRACE_EPHEMERON: + return (struct gc_trace_plan){ kind, }; +#endif + default: + GC_CRASH(); + } +} + +static struct nofl_slab* +nofl_allocate_slabs(size_t nslabs) { + return gc_platform_acquire_memory(nslabs * NOFL_SLAB_SIZE, NOFL_SLAB_SIZE); +} + +static void +nofl_space_add_slabs(struct nofl_space *space, struct nofl_slab *slabs, + size_t nslabs) { + size_t old_size = space->nslabs * sizeof(struct nofl_slab*); + size_t additional_size = nslabs * sizeof(struct nofl_slab*); + space->extents = extents_adjoin(space->extents, slabs, + nslabs * sizeof(struct nofl_slab)); + space->slabs = realloc(space->slabs, old_size + additional_size); + if (!space->slabs) + GC_CRASH(); + while (nslabs--) + space->slabs[space->nslabs++] = slabs++; +} + +static int +nofl_space_shrink(struct nofl_space *space, size_t bytes) { + ssize_t pending = nofl_space_request_release_memory(space, bytes); + struct gc_lock lock = nofl_space_lock(space); + + // First try to shrink by unmapping previously-identified empty blocks. + while (pending > 0) { + struct nofl_block_ref block = nofl_pop_empty_block_with_lock(space, &lock); + if (nofl_block_is_null(block)) + break; + nofl_push_unavailable_block(space, block, &lock); + pending = atomic_fetch_sub(&space->pending_unavailable_bytes, + NOFL_BLOCK_SIZE); + pending -= NOFL_BLOCK_SIZE; + } + + // If we still need to shrink, steal from the evacuation reserve, if it's more + // than the minimum. Not racy: evacuation target lists are built during eager + // lazy sweep, which is mutually exclusive with consumption, itself either + // during trace, synchronously from gc_heap_sizer_on_gc, or async but subject + // to the heap lock. + if (pending > 0) { + size_t active = nofl_active_block_count(space); + size_t target = space->evacuation_minimum_reserve * active; + ssize_t avail = nofl_block_count(&space->evacuation_targets); + while (avail > target && pending > 0) { + struct nofl_block_ref block = + nofl_block_list_pop(&space->evacuation_targets); + GC_ASSERT(!nofl_block_is_null(block)); + nofl_push_unavailable_block(space, block, &lock); + pending = atomic_fetch_sub(&space->pending_unavailable_bytes, + NOFL_BLOCK_SIZE); + pending -= NOFL_BLOCK_SIZE; + } + } + + gc_lock_release(&lock); + + // It still may be the case we need to page out more blocks. Only evacuation + // can help us then! + return pending <= 0; +} + +static void +nofl_space_expand(struct nofl_space *space, size_t bytes) { + double overhead = ((double)NOFL_META_BLOCKS_PER_SLAB) / NOFL_BLOCKS_PER_SLAB; + ssize_t to_acquire = -nofl_space_maybe_reacquire_memory(space, bytes); + if (to_acquire < NOFL_BLOCK_SIZE) return; + to_acquire *= (1 + overhead); + size_t reserved = align_up(to_acquire, NOFL_SLAB_SIZE); + size_t nslabs = reserved / NOFL_SLAB_SIZE; + struct nofl_slab *slabs = nofl_allocate_slabs(nslabs); + nofl_space_add_slabs(space, slabs, nslabs); + + struct gc_lock lock = nofl_space_lock(space); + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = 0; idx < NOFL_NONMETA_BLOCKS_PER_SLAB; idx++) { + uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data; + struct nofl_block_ref block = nofl_block_for_addr(addr); + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + nofl_push_unavailable_block(space, block, &lock); + } + } + gc_lock_release(&lock); + nofl_space_maybe_reacquire_memory(space, 0); +} + +static void +nofl_space_advance_page_out_queue(void *data) { + // When the nofl space goes to return a block to the OS, it goes on the head + // of the page-out queue. Every second, the background thread will age the + // queue, moving all blocks from index 0 to index 1, and so on. When a block + // reaches the end of the queue it is paged out (and stays at the end of the + // queue). In this task, invoked by the background thread, we age queue + // items, except that we don't page out yet, as it could be that some other + // background task will need to pull pages back in. + struct nofl_space *space = data; + struct gc_lock lock = nofl_space_lock(space); + for (int age = NOFL_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) { + struct nofl_block_ref block = + nofl_block_stack_pop(&space->paged_out[age], &lock); + if (nofl_block_is_null(block)) + break; + nofl_block_stack_push(&space->paged_out[age+1], block, &lock); + } + gc_lock_release(&lock); +} + +static void +nofl_space_page_out_blocks(void *data) { + // This task is invoked by the background thread after other tasks. It + // actually pages out blocks that reached the end of the queue. + struct nofl_space *space = data; + struct gc_lock lock = nofl_space_lock(space); + int age = NOFL_PAGE_OUT_QUEUE_SIZE - 2; + while (1) { + struct nofl_block_ref block = + nofl_block_stack_pop(&space->paged_out[age], &lock); + if (nofl_block_is_null(block)) + break; + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + gc_platform_discard_memory((void*)block.addr, NOFL_BLOCK_SIZE); + nofl_block_stack_push(&space->paged_out[age + 1], block, &lock); + } + gc_lock_release(&lock); +} + +static int +nofl_space_init(struct nofl_space *space, size_t size, int atomic, + double promotion_threshold, + struct gc_background_thread *thread) { + size = align_up(size, NOFL_BLOCK_SIZE); + size_t reserved = align_up(size, NOFL_SLAB_SIZE); + size_t nslabs = reserved / NOFL_SLAB_SIZE; + struct nofl_slab *slabs = nofl_allocate_slabs(nslabs); + if (!slabs) + return 0; + + space->current_mark = space->survivor_mark = NOFL_METADATA_BYTE_MARK_0; + space->extents = extents_allocate(10); + nofl_space_add_slabs(space, slabs, nslabs); + pthread_mutex_init(&space->lock, NULL); + space->evacuation_minimum_reserve = 0.02; + space->evacuation_reserve = space->evacuation_minimum_reserve; + space->promotion_threshold = promotion_threshold; + struct gc_lock lock = nofl_space_lock(space); + for (size_t slab = 0; slab < nslabs; slab++) { + for (size_t idx = 0; idx < NOFL_NONMETA_BLOCKS_PER_SLAB; idx++) { + uintptr_t addr = (uintptr_t)slabs[slab].blocks[idx].data; + struct nofl_block_ref block = nofl_block_for_addr(addr); + nofl_block_set_flag(block, NOFL_BLOCK_ZERO | NOFL_BLOCK_PAGED_OUT); + if (reserved > size) { + nofl_push_unavailable_block(space, block, &lock); + reserved -= NOFL_BLOCK_SIZE; + } else { + if (!nofl_push_evacuation_target_if_needed(space, block)) + nofl_push_empty_block(space, block, &lock); + } + } + } + gc_lock_release(&lock); + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START, + nofl_space_advance_page_out_queue, + space); + gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END, + nofl_space_page_out_blocks, + space); + return 1; +} + +#endif // NOFL_SPACE_H diff --git a/libguile/whippet/src/parallel-tracer.h b/libguile/whippet/src/parallel-tracer.h new file mode 100644 index 000000000..db8afae8a --- /dev/null +++ b/libguile/whippet/src/parallel-tracer.h @@ -0,0 +1,433 @@ +#ifndef PARALLEL_TRACER_H +#define PARALLEL_TRACER_H + +#include +#include +#include +#include + +#include "assert.h" +#include "debug.h" +#include "gc-inline.h" +#include "gc-tracepoint.h" +#include "local-worklist.h" +#include "root-worklist.h" +#include "shared-worklist.h" +#include "spin.h" +#include "tracer.h" + +#ifdef VERBOSE_LOGGING +#define LOG(...) fprintf (stderr, "LOG: " __VA_ARGS__) +#else +#define LOG(...) do { } while (0) +#endif + +enum trace_worker_state { + TRACE_WORKER_STOPPED, + TRACE_WORKER_IDLE, + TRACE_WORKER_TRACING, + TRACE_WORKER_STOPPING, + TRACE_WORKER_DEAD +}; + +struct gc_heap; +struct gc_trace_worker { + struct gc_heap *heap; + struct gc_tracer *tracer; + size_t id; + size_t steal_id; + pthread_t thread; + enum trace_worker_state state; + pthread_mutex_t lock; + struct shared_worklist shared; + struct local_worklist local; + struct gc_trace_worker_data *data; +}; + +static inline struct gc_trace_worker_data* +gc_trace_worker_data(struct gc_trace_worker *worker) { + return worker->data; +} + +#define TRACE_WORKERS_MAX_COUNT 8 + +struct gc_tracer { + struct gc_heap *heap; + atomic_size_t active_tracers; + size_t worker_count; + long epoch; + pthread_mutex_t lock; + pthread_cond_t cond; + int trace_roots_only; + struct root_worklist roots; + struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT]; +}; + +static int +trace_worker_init(struct gc_trace_worker *worker, struct gc_heap *heap, + struct gc_tracer *tracer, size_t id) { + worker->heap = heap; + worker->tracer = tracer; + worker->id = id; + worker->steal_id = 0; + worker->thread = 0; + worker->state = TRACE_WORKER_STOPPED; + pthread_mutex_init(&worker->lock, NULL); + worker->data = NULL; + local_worklist_init(&worker->local); + return shared_worklist_init(&worker->shared); +} + +static void trace_worker_trace(struct gc_trace_worker *worker); + +static void* +trace_worker_thread(void *data) { + struct gc_trace_worker *worker = data; + struct gc_tracer *tracer = worker->tracer; + long trace_epoch = 0; + + pthread_mutex_lock(&worker->lock); + while (1) { + long epoch = atomic_load_explicit(&tracer->epoch, memory_order_acquire); + if (trace_epoch != epoch) { + trace_epoch = epoch; + trace_worker_trace(worker); + } + pthread_cond_wait(&tracer->cond, &worker->lock); + } + return NULL; +} + +static int +trace_worker_spawn(struct gc_trace_worker *worker) { + if (pthread_create(&worker->thread, NULL, trace_worker_thread, worker)) { + perror("spawning tracer thread failed"); + return 0; + } + + return 1; +} + +static int +gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, + size_t parallelism) { + tracer->heap = heap; + atomic_init(&tracer->active_tracers, 0); + tracer->epoch = 0; + tracer->trace_roots_only = 0; + pthread_mutex_init(&tracer->lock, NULL); + pthread_cond_init(&tracer->cond, NULL); + root_worklist_init(&tracer->roots); + size_t desired_worker_count = parallelism; + ASSERT(desired_worker_count); + if (desired_worker_count > TRACE_WORKERS_MAX_COUNT) + desired_worker_count = TRACE_WORKERS_MAX_COUNT; + if (!trace_worker_init(&tracer->workers[0], heap, tracer, 0)) + return 0; + tracer->worker_count++; + for (size_t i = 1; i < desired_worker_count; i++) { + if (!trace_worker_init(&tracer->workers[i], heap, tracer, i)) + break; + pthread_mutex_lock(&tracer->workers[i].lock); + if (trace_worker_spawn(&tracer->workers[i])) + tracer->worker_count++; + else + break; + } + return 1; +} + +static void gc_tracer_prepare(struct gc_tracer *tracer) { + for (size_t i = 0; i < tracer->worker_count; i++) + tracer->workers[i].steal_id = (i + 1) % tracer->worker_count; +} +static void gc_tracer_release(struct gc_tracer *tracer) { + for (size_t i = 0; i < tracer->worker_count; i++) + shared_worklist_release(&tracer->workers[i].shared); +} + +static inline void +gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) { + root_worklist_push(&tracer->roots, root); +} + +static inline void +tracer_unpark_all_workers(struct gc_tracer *tracer) { + long old_epoch = + atomic_fetch_add_explicit(&tracer->epoch, 1, memory_order_acq_rel); + long epoch = old_epoch + 1; + DEBUG("starting trace; %zu workers; epoch=%ld\n", tracer->worker_count, + epoch); + GC_TRACEPOINT(trace_unpark_all); + pthread_cond_broadcast(&tracer->cond); +} + +static inline void +tracer_maybe_unpark_workers(struct gc_tracer *tracer) { + size_t active = + atomic_load_explicit(&tracer->active_tracers, memory_order_acquire); + if (active < tracer->worker_count) + tracer_unpark_all_workers(tracer); +} + +static inline void +tracer_share(struct gc_trace_worker *worker) { + LOG("tracer #%zu: sharing\n", worker->id); + GC_TRACEPOINT(trace_share); + size_t to_share = LOCAL_WORKLIST_SHARE_AMOUNT; + while (to_share) { + struct gc_ref *objv; + size_t count = local_worklist_pop_many(&worker->local, &objv, to_share); + shared_worklist_push_many(&worker->shared, objv, count); + to_share -= count; + } + tracer_maybe_unpark_workers(worker->tracer); +} + +static inline void +gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) { + ASSERT(gc_ref_is_heap_object(ref)); + if (local_worklist_full(&worker->local)) + tracer_share(worker); + local_worklist_push(&worker->local, ref); +} + +static struct gc_ref +tracer_steal_from_worker(struct gc_tracer *tracer, size_t id) { + ASSERT(id < tracer->worker_count); + return shared_worklist_steal(&tracer->workers[id].shared); +} + +static int +tracer_can_steal_from_worker(struct gc_tracer *tracer, size_t id) { + ASSERT(id < tracer->worker_count); + return shared_worklist_can_steal(&tracer->workers[id].shared); +} + +static struct gc_ref +trace_worker_steal_from_any(struct gc_trace_worker *worker, + struct gc_tracer *tracer) { + for (size_t i = 0; i < tracer->worker_count; i++) { + LOG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id); + struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id); + if (!gc_ref_is_null(obj)) { + LOG("tracer #%zu: stealing got %p\n", worker->id, + gc_ref_heap_object(obj)); + return obj; + } + worker->steal_id = (worker->steal_id + 1) % tracer->worker_count; + } + LOG("tracer #%zu: failed to steal\n", worker->id); + return gc_ref_null(); +} + +static int +trace_worker_can_steal_from_any(struct gc_trace_worker *worker, + struct gc_tracer *tracer) { + LOG("tracer #%zu: checking if any worker has tasks\n", worker->id); + for (size_t i = 0; i < tracer->worker_count; i++) { + int res = tracer_can_steal_from_worker(tracer, worker->steal_id); + if (res) { + LOG("tracer #%zu: worker #%zu has tasks!\n", worker->id, + worker->steal_id); + return 1; + } + worker->steal_id = (worker->steal_id + 1) % tracer->worker_count; + } + LOG("tracer #%zu: nothing to steal\n", worker->id); + return 0; +} + +static size_t +trace_worker_should_continue(struct gc_trace_worker *worker, size_t spin_count) { + // Helper workers should park themselves immediately if they have no work. + if (worker->id != 0) + return 0; + + struct gc_tracer *tracer = worker->tracer; + + if (atomic_load_explicit(&tracer->active_tracers, memory_order_acquire) != 1) { + LOG("checking for termination: tracers active, spinning #%zu\n", spin_count); + yield_for_spin(spin_count); + return 1; + } + + // All trace workers have exited except us, the main worker. We are + // probably done, but we need to synchronize to be sure that there is no + // work pending, for example if a worker had a spurious wakeup. Skip + // worker 0 (the main worker). + + GC_TRACEPOINT(trace_check_termination_begin); + size_t locked = 1; + while (locked < tracer->worker_count) { + if (pthread_mutex_trylock(&tracer->workers[locked].lock) == 0) + locked++; + else + break; + } + int done = (locked == tracer->worker_count) && + !trace_worker_can_steal_from_any(worker, tracer); + GC_TRACEPOINT(trace_check_termination_end); + + if (done) + return 0; + while (locked > 1) + pthread_mutex_unlock(&tracer->workers[--locked].lock); + + LOG("checking for termination: failed to lock, spinning #%zu\n", spin_count); + yield_for_spin(spin_count); + return 1; +} + +static struct gc_ref +trace_worker_steal(struct gc_trace_worker *worker) { + struct gc_tracer *tracer = worker->tracer; + + // It could be that the worker's local trace queue has simply + // overflowed. In that case avoid contention by trying to pop + // something from the worker's own queue. + { + LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id); + struct gc_ref obj = shared_worklist_try_pop(&worker->shared); + if (!gc_ref_is_null(obj)) + return obj; + } + + GC_TRACEPOINT(trace_steal); + LOG("tracer #%zu: trying to steal\n", worker->id); + struct gc_ref obj = trace_worker_steal_from_any(worker, tracer); + if (!gc_ref_is_null(obj)) + return obj; + + return gc_ref_null(); +} + +static void +trace_with_data(struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data) { + atomic_fetch_add_explicit(&tracer->active_tracers, 1, memory_order_acq_rel); + worker->data = data; + + LOG("tracer #%zu: running trace loop\n", worker->id); + + { + LOG("tracer #%zu: tracing roots\n", worker->id); + size_t n = 0; + do { + struct gc_root root = root_worklist_pop(&tracer->roots); + if (root.kind == GC_ROOT_KIND_NONE) + break; + trace_root(root, heap, worker); + n++; + } while (1); + + LOG("tracer #%zu: done tracing roots, %zu roots traced\n", worker->id, n); + } + + if (tracer->trace_roots_only) { + // Unlike the full trace where work is generated during the trace, a + // roots-only trace consumes work monotonically; any object enqueued as a + // result of marking roots isn't ours to deal with. However we do need to + // synchronize with remote workers to ensure they have completed their + // work items. + if (worker->id == 0) { + for (size_t i = 1; i < tracer->worker_count; i++) + pthread_mutex_lock(&tracer->workers[i].lock); + } + } else { + LOG("tracer #%zu: tracing objects\n", worker->id); + GC_TRACEPOINT(trace_objects_begin); + size_t n = 0; + size_t spin_count = 0; + do { + while (1) { + struct gc_ref ref; + if (!local_worklist_empty(&worker->local)) { + ref = local_worklist_pop(&worker->local); + } else { + ref = trace_worker_steal(worker); + if (gc_ref_is_null(ref)) + break; + } + trace_one(ref, heap, worker); + n++; + } + } while (trace_worker_should_continue(worker, spin_count++)); + GC_TRACEPOINT(trace_objects_end); + + LOG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n); + } + + worker->data = NULL; + atomic_fetch_sub_explicit(&tracer->active_tracers, 1, memory_order_acq_rel); +} + +static void +trace_worker_trace(struct gc_trace_worker *worker) { + GC_TRACEPOINT(trace_worker_begin); + gc_trace_worker_call_with_data(trace_with_data, worker->tracer, + worker->heap, worker); + GC_TRACEPOINT(trace_worker_end); +} + +static inline int +gc_tracer_should_parallelize(struct gc_tracer *tracer) { + if (root_worklist_size(&tracer->roots) > 1) + return 1; + + if (tracer->trace_roots_only) + return 0; + + size_t nonempty_worklists = 0; + ssize_t parallel_threshold = + LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT; + for (size_t i = 0; i < tracer->worker_count; i++) { + ssize_t size = shared_worklist_size(&tracer->workers[i].shared); + if (!size) + continue; + nonempty_worklists++; + if (nonempty_worklists > 1) + return 1; + if (size >= parallel_threshold) + return 1; + } + return 0; +} + +static inline void +gc_tracer_trace(struct gc_tracer *tracer) { + LOG("starting trace; %zu workers\n", tracer->worker_count); + + for (int i = 1; i < tracer->worker_count; i++) + pthread_mutex_unlock(&tracer->workers[i].lock); + + if (gc_tracer_should_parallelize(tracer)) { + LOG("waking workers\n"); + tracer_unpark_all_workers(tracer); + } else { + LOG("starting in local-only mode\n"); + } + + trace_worker_trace(&tracer->workers[0]); + root_worklist_reset(&tracer->roots); + + LOG("trace finished\n"); +} + +static inline void +gc_tracer_trace_roots(struct gc_tracer *tracer) { + LOG("starting roots-only trace\n"); + + GC_TRACEPOINT(trace_roots_begin); + tracer->trace_roots_only = 1; + gc_tracer_trace(tracer); + tracer->trace_roots_only = 0; + GC_TRACEPOINT(trace_roots_end); + + GC_ASSERT_EQ(atomic_load(&tracer->active_tracers), 0); + LOG("roots-only trace finished\n"); +} + +#endif // PARALLEL_TRACER_H diff --git a/libguile/whippet/src/pcc.c b/libguile/whippet/src/pcc.c new file mode 100644 index 000000000..ca8be1c11 --- /dev/null +++ b/libguile/whippet/src/pcc.c @@ -0,0 +1,1340 @@ +#include +#include +#include +#include +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "background-thread.h" +#include "copy-space.h" +#include "debug.h" +#include "field-set.h" +#include "gc-align.h" +#include "gc-inline.h" +#include "gc-platform.h" +#include "gc-trace.h" +#include "gc-tracepoint.h" +#include "heap-sizer.h" +#include "large-object-space.h" +#if GC_PARALLEL +#include "parallel-tracer.h" +#else +#include "serial-tracer.h" +#endif +#include "spin.h" +#include "pcc-attrs.h" + +struct gc_heap { +#if GC_GENERATIONAL + struct copy_space new_space; + struct copy_space old_space; +#else + struct copy_space mono_space; +#endif + struct large_object_space large_object_space; + struct gc_extern_space *extern_space; +#if GC_GENERATIONAL + struct gc_field_set remembered_set; +#endif + size_t large_object_pages; + pthread_mutex_t lock; + pthread_cond_t collector_cond; + pthread_cond_t mutator_cond; + size_t size; + size_t total_allocated_bytes_at_last_gc; + int collecting; +#if GC_GENERATIONAL + int is_minor_collection; + size_t per_processor_nursery_size; + size_t nursery_size; +#endif + size_t processor_count; + size_t max_active_mutator_count; + int check_pending_ephemerons; +#if GC_GENERATIONAL + struct gc_pending_ephemerons *nursery_pending_ephemerons; +#endif + struct gc_pending_ephemerons *pending_ephemerons; + struct gc_finalizer_state *finalizer_state; + size_t mutator_count; + size_t paused_mutator_count; + size_t inactive_mutator_count; + struct gc_heap_roots *roots; + struct gc_mutator *mutators; + long count; + struct gc_tracer tracer; + double pending_ephemerons_size_factor; + double pending_ephemerons_size_slop; + struct gc_background_thread *background_thread; + struct gc_heap_sizer sizer; + struct gc_event_listener event_listener; + void *event_listener_data; +}; + +#define HEAP_EVENT(heap, event, ...) do { \ + (heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) +#define MUTATOR_EVENT(mut, event, ...) do { \ + (mut)->heap->event_listener.event((mut)->event_listener_data, \ + ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) + +struct gc_mutator { + struct copy_space_allocator allocator; +#if GC_GENERATIONAL + struct gc_field_set_writer logger; +#endif + struct gc_heap *heap; + struct gc_mutator_roots *roots; + void *event_listener_data; + struct gc_mutator *next; + struct gc_mutator *prev; +}; + +struct gc_trace_worker_data { +#if GC_GENERATIONAL + struct copy_space_allocator new_allocator; + struct copy_space_allocator old_allocator; + struct gc_field_set_writer logger; +#else + struct copy_space_allocator allocator; +#endif +}; + +static inline struct copy_space* heap_mono_space(struct gc_heap *heap) { +#if GC_GENERATIONAL + GC_CRASH(); +#else + return &heap->mono_space; +#endif +} + +static inline struct copy_space* heap_new_space(struct gc_heap *heap) { +#if GC_GENERATIONAL + return &heap->new_space; +#else + GC_CRASH(); +#endif +} + +static inline struct copy_space* heap_old_space(struct gc_heap *heap) { +#if GC_GENERATIONAL + return &heap->old_space; +#else + GC_CRASH(); +#endif +} + +static inline struct gc_field_set* heap_remembered_set(struct gc_heap *heap) { +#if GC_GENERATIONAL + return &heap->remembered_set; +#else + GC_CRASH(); +#endif +} + +static inline struct copy_space_allocator* +trace_worker_mono_space_allocator(struct gc_trace_worker_data *data) { +#if GC_GENERATIONAL + GC_CRASH(); +#else + return &data->allocator; +#endif +} + +static inline struct copy_space_allocator* +trace_worker_new_space_allocator(struct gc_trace_worker_data *data) { +#if GC_GENERATIONAL + return &data->new_allocator; +#else + GC_CRASH(); +#endif +} + +static inline struct copy_space_allocator* +trace_worker_old_space_allocator(struct gc_trace_worker_data *data) { +#if GC_GENERATIONAL + return &data->old_allocator; +#else + GC_CRASH(); +#endif +} + +static inline struct gc_field_set_writer* +trace_worker_field_logger(struct gc_trace_worker_data *data) { +#if GC_GENERATIONAL + return &data->logger; +#else + GC_CRASH(); +#endif +} + +static inline struct gc_field_set_writer* +mutator_field_logger(struct gc_mutator *mut) { +#if GC_GENERATIONAL + return &mut->logger; +#else + GC_CRASH(); +#endif +} + +static int is_minor_collection(struct gc_heap *heap) { +#if GC_GENERATIONAL + return heap->is_minor_collection; +#else + GC_CRASH(); +#endif +} + +static inline struct copy_space* heap_allocation_space(struct gc_heap *heap) { + return GC_GENERATIONAL ? heap_new_space(heap) : heap_mono_space(heap); +} + +static inline struct copy_space* heap_resizable_space(struct gc_heap *heap) { + return GC_GENERATIONAL ? heap_old_space(heap) : heap_mono_space(heap); +} + +static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) { + return &heap->large_object_space; +} + +static inline struct gc_extern_space* heap_extern_space(struct gc_heap *heap) { + return heap->extern_space; +} + +static inline struct gc_heap* mutator_heap(struct gc_mutator *mutator) { + return mutator->heap; +} + +struct gc_heap* gc_mutator_heap(struct gc_mutator *mutator) { + return mutator_heap(mutator); +} + +uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap) { + if (GC_GENERATIONAL) + return copy_space_low_aligned_address(heap_new_space(heap)); + GC_CRASH(); +} +uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap) { + if (GC_GENERATIONAL) + return copy_space_high_aligned_address(heap_new_space(heap)); + GC_CRASH(); +} + +static void +gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data), + struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker) { + struct gc_trace_worker_data data; + + if (GC_GENERATIONAL) { + copy_space_allocator_init(trace_worker_new_space_allocator(&data)); + copy_space_allocator_init(trace_worker_old_space_allocator(&data)); + gc_field_set_writer_init(trace_worker_field_logger(&data), + heap_remembered_set(heap)); + } else { + copy_space_allocator_init(trace_worker_mono_space_allocator(&data)); + } + + f(tracer, heap, worker, &data); + + if (GC_GENERATIONAL) { + copy_space_allocator_finish(trace_worker_new_space_allocator(&data), + heap_new_space(heap)); + copy_space_allocator_finish(trace_worker_old_space_allocator(&data), + heap_old_space(heap)); + gc_field_set_writer_release_buffer(trace_worker_field_logger(&data)); + } else { + copy_space_allocator_finish(trace_worker_mono_space_allocator(&data), + heap_mono_space(heap)); + } +} + +static int new_space_contains_addr(struct gc_heap *heap, uintptr_t addr) { + return copy_space_contains_address_aligned(heap_new_space(heap), addr); +} + +static int new_space_contains(struct gc_heap *heap, struct gc_ref ref) { + return new_space_contains_addr(heap, gc_ref_value(ref)); +} + +static int old_space_contains(struct gc_heap *heap, struct gc_ref ref) { + return copy_space_contains(heap_old_space(heap), ref); +} + +static int remember_edge_to_survivor_object(struct gc_heap *heap, + struct gc_edge edge) { + GC_ASSERT(!new_space_contains_addr(heap, gc_edge_address(edge))); + GC_ASSERT(new_space_contains(heap, gc_edge_ref(edge))); + if (copy_space_contains_edge(heap_old_space(heap), edge)) + return copy_space_remember_edge(heap_old_space(heap), edge); + struct gc_ref large_object = + large_object_space_object_containing_edge(heap_large_object_space(heap), + edge); + if (!gc_ref_is_null(large_object)) + return large_object_space_remember_edge(heap_large_object_space(heap), + large_object, edge); + return 0; +} + +static inline int edge_is_from_survivor(struct gc_heap *heap, + struct gc_edge edge) { + // Currently only the copy-space has survivors. (A survivor is a live object + // which stays in the nursery after collection). If lospace gains a survivor + // stage, we would need to augment this check. + GC_ASSERT(is_minor_collection(heap)); + return copy_space_contains_edge_aligned(heap_new_space(heap), edge); +} + +static inline int forward(struct copy_space *src_space, + struct copy_space *dst_space, + struct gc_edge edge, + struct gc_ref ref, + struct copy_space_allocator *dst_alloc) { + switch (copy_space_forward(src_space, dst_space, edge, ref, dst_alloc)) { + case COPY_SPACE_FORWARD_UPDATED: + return 0; + case COPY_SPACE_FORWARD_EVACUATED: + return 1; + case COPY_SPACE_FORWARD_FAILED: + // If space is really tight and reordering of objects during evacuation + // resulted in more end-of-block fragmentation and thus block use than + // before collection started, we can actually run out of memory while + // collecting. We should probably attempt to expand the heap here, at + // least by a single block; it's better than the alternatives. For now, + // abort. + fprintf(stderr, "Out of memory\n"); + GC_CRASH(); + break; + default: + GC_CRASH(); + } +} + +static inline int do_minor_trace(struct gc_heap *heap, struct gc_edge edge, + struct gc_ref ref, + struct gc_trace_worker_data *data) { + // Trace EDGE for a minor GC. We only need to trace edges to young objects. + // Young objects are either in the nursery copy space, or in the large object + // space. + + if (GC_LIKELY(new_space_contains(heap, ref))) { + struct copy_space *new_space = heap_new_space(heap); + struct copy_space *old_space = heap_old_space(heap); + // We are visiting an edge into newspace. Either the edge's target will be + // promoted to oldspace, or it will stay in newspace as a survivor. + // + // After the scavenge, we need to preserve the invariant that all old-to-new + // edges are part of the remembered set. So depending on where the edge + // comes from and where the object moves to, we may need to add or remove + // the edge from the remembered set. Concretely: + // + // | survivor dst | promoted dst + // ----------------+------------------+----------------- + // survivor src | nothing | nothing + // | | + // promoted src | log edge | nothing + // | | + // oldspace src | nothing | clear log + // | | + // root src | nothing | nothing + // + // However, clearing a logged field usually isn't possible, as it's not easy + // to go from field address to position in a field set, so instead we lazily + // remove old->old edges from the field set during the next minor GC. (Or, + // we will anyway; for now we ignore them.) So really we only need to log + // promoted-to-survivor edges. + // + // However however, it is hard to distinguish between edges from promoted + // objects and edges from old objects, so we mostly just rely on an + // idempotent "log if unlogged" operation instead. + if (!copy_space_should_promote(new_space, ref)) { + // Try to leave the object in newspace as a survivor. If the edge is from + // a promoted object, we will need to add it to the remembered set. + if (!edge_is_from_survivor(heap, edge) + && remember_edge_to_survivor_object(heap, edge)) { + // Log the edge even though in rare conditions the referent could end up + // being promoted by us (if we run out of newspace) or a remote + // evacuation thread (if they run out of newspace). + gc_field_set_writer_add_edge(trace_worker_field_logger(data), edge); + } + switch (copy_space_forward(new_space, new_space, edge, ref, + trace_worker_new_space_allocator(data))) { + case COPY_SPACE_FORWARD_UPDATED: + return 0; + case COPY_SPACE_FORWARD_EVACUATED: + return 1; + case COPY_SPACE_FORWARD_FAILED: + // Ran out of newspace! Fall through to promote instead. + break; + default: + GC_CRASH(); + } + } + // Promote the object. + return forward(new_space, old_space, edge, ref, + trace_worker_old_space_allocator(data)); + } else { + // Note that although the target of the edge might not be in lospace, this + // will do what we want and return 1 if and only if ref is was a young + // object in lospace. + return large_object_space_mark(heap_large_object_space(heap), ref); + } +} + + +static inline int do_trace(struct gc_heap *heap, struct gc_edge edge, + struct gc_ref ref, + struct gc_trace_worker_data *data) { + if (GC_GENERATIONAL) { + if (GC_LIKELY(is_minor_collection(heap))) + return do_minor_trace(heap, edge, ref, data); + + // Major trace: promote all copyspace objects to oldgen. + struct copy_space *new_space = heap_new_space(heap); + struct copy_space *old_space = heap_old_space(heap); + if (new_space_contains(heap, ref)) + return forward(new_space, old_space, edge, ref, + trace_worker_old_space_allocator(data)); + if (old_space_contains(heap, ref)) + return forward(old_space, old_space, edge, ref, + trace_worker_old_space_allocator(data)); + } else { + if (GC_LIKELY(copy_space_contains(heap_mono_space(heap), ref))) + return forward(heap_mono_space(heap), heap_mono_space(heap), + edge, ref, + trace_worker_mono_space_allocator(data)); + } + + // Fall through for objects in large or extern spaces. + if (large_object_space_contains_with_lock(heap_large_object_space(heap), ref)) + return large_object_space_mark(heap_large_object_space(heap), ref); + else + return gc_extern_space_visit(heap_extern_space(heap), edge, ref); +} + +static inline int trace_edge(struct gc_heap *heap, struct gc_edge edge, + struct gc_trace_worker *worker) { + struct gc_ref ref = gc_edge_ref(edge); + if (gc_ref_is_null(ref) || gc_ref_is_immediate(ref)) + return 0; + struct gc_trace_worker_data *data = gc_trace_worker_data(worker); + int is_new = do_trace(heap, edge, ref, data); + + if (is_new && + GC_UNLIKELY(atomic_load_explicit(&heap->check_pending_ephemerons, + memory_order_relaxed))) + gc_resolve_pending_ephemerons(ref, heap); + + return is_new; +} + +int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { + struct gc_ref ref = gc_edge_ref(edge); + GC_ASSERT(!gc_ref_is_null(ref)); + if (gc_ref_is_immediate(ref)) + return 1; + GC_ASSERT(gc_ref_is_heap_object(ref)); + + if (GC_GENERATIONAL) { + if (new_space_contains(heap, ref)) + return copy_space_forward_if_traced(heap_new_space(heap), edge, ref); + if (old_space_contains(heap, ref)) + return is_minor_collection(heap) || + copy_space_forward_if_traced(heap_old_space(heap), edge, ref); + } else { + if (copy_space_contains(heap_mono_space(heap), ref)) + return copy_space_forward_if_traced(heap_mono_space(heap), edge, ref); + } + + if (large_object_space_contains_with_lock(heap_large_object_space(heap), ref)) + return large_object_space_is_marked(heap_large_object_space(heap), ref); + GC_CRASH(); +} + +static int mutators_are_stopping(struct gc_heap *heap) { + return atomic_load_explicit(&heap->collecting, memory_order_relaxed); +} + +static inline void heap_lock(struct gc_heap *heap) { + pthread_mutex_lock(&heap->lock); +} +static inline void heap_unlock(struct gc_heap *heap) { + pthread_mutex_unlock(&heap->lock); +} + +// with heap lock +static inline int all_mutators_stopped(struct gc_heap *heap) { + return heap->mutator_count == + heap->paused_mutator_count + heap->inactive_mutator_count; +} + +// with heap lock +static void maybe_increase_max_active_mutator_count(struct gc_heap *heap) { + size_t active_mutators = heap->mutator_count - heap->inactive_mutator_count; + if (active_mutators > heap->max_active_mutator_count) + heap->max_active_mutator_count = active_mutators; +} + +static void add_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + mut->heap = heap; + mut->event_listener_data = + heap->event_listener.mutator_added(heap->event_listener_data); + copy_space_allocator_init(&mut->allocator); + if (GC_GENERATIONAL) + gc_field_set_writer_init(mutator_field_logger(mut), + heap_remembered_set(heap)); + heap_lock(heap); + // We have no roots. If there is a GC currently in progress, we have + // nothing to add. Just wait until it's done. + while (mutators_are_stopping(heap)) + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + mut->next = mut->prev = NULL; + struct gc_mutator *tail = heap->mutators; + if (tail) { + mut->next = tail; + tail->prev = mut; + } + heap->mutators = mut; + heap->mutator_count++; + maybe_increase_max_active_mutator_count(heap); + heap_unlock(heap); +} + +static void remove_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + copy_space_allocator_finish(&mut->allocator, heap_allocation_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(mutator_field_logger(mut)); + MUTATOR_EVENT(mut, mutator_removed); + mut->heap = NULL; + heap_lock(heap); + heap->mutator_count--; + if (mut->next) + mut->next->prev = mut->prev; + if (mut->prev) + mut->prev->next = mut->next; + else + heap->mutators = mut->next; + // We have no roots. If there is a GC stop currently in progress, + // maybe tell the controller it can continue. + if (mutators_are_stopping(heap) && all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + heap_unlock(heap); +} + +void gc_mutator_set_roots(struct gc_mutator *mut, + struct gc_mutator_roots *roots) { + mut->roots = roots; +} +void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) { + heap->roots = roots; +} +void gc_heap_set_extern_space(struct gc_heap *heap, + struct gc_extern_space *space) { + heap->extern_space = space; +} + +static inline void tracer_visit(struct gc_edge edge, struct gc_heap *heap, + void *trace_data) GC_ALWAYS_INLINE; +static inline void +tracer_visit(struct gc_edge edge, struct gc_heap *heap, void *trace_data) { + struct gc_trace_worker *worker = trace_data; + if (trace_edge(heap, edge, worker)) + gc_trace_worker_enqueue(worker, gc_edge_ref(edge)); +} + +static inline int +trace_remembered_edge(struct gc_edge edge, struct gc_heap *heap, + void *trace_data) { + GC_ASSERT(is_minor_collection(heap)); + tracer_visit(edge, heap, trace_data); + + // Return 1 if the edge should be kept in the remset, which is the + // case only for new objects that survive the minor GC, and only the + // nursery copy space has survivors. + if (new_space_contains(heap, gc_edge_ref(edge))) + return 1; // Keep edge in remset. + // Otherwise remove field-logging bit and return 0 to indicate that + // the remembered field set should remove this edge. + if (copy_space_contains_edge(heap_old_space(heap), edge)) + copy_space_forget_edge(heap_old_space(heap), edge); + else + large_object_space_forget_edge(heap_large_object_space(heap), edge); + return 0; +} + +static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, + struct gc_trace_worker *worker) { +#ifdef DEBUG + if (GC_GENERATIONAL) { + if (new_space_contains(heap, ref)) + GC_ASSERT_EQ(copy_space_object_region(ref), + heap_new_space(heap)->active_region); + else if (old_space_contains(heap, ref)) + GC_ASSERT_EQ(copy_space_object_region(ref), + heap_old_space(heap)->active_region); + } else { + if (copy_space_contains(heap_mono_space(heap), ref)) + GC_ASSERT_EQ(copy_space_object_region(ref), + heap_mono_space(heap)->active_region); + } +#endif + + gc_trace_object(ref, tracer_visit, heap, worker, NULL); +} + +static inline void trace_root(struct gc_root root, struct gc_heap *heap, + struct gc_trace_worker *worker) { + switch (root.kind) { + case GC_ROOT_KIND_HEAP: + gc_trace_heap_roots(root.heap->roots, tracer_visit, heap, worker); + break; + case GC_ROOT_KIND_MUTATOR: + gc_trace_mutator_roots(root.mutator->roots, tracer_visit, heap, worker); + break; + case GC_ROOT_KIND_RESOLVED_EPHEMERONS: + gc_trace_resolved_ephemerons(root.resolved_ephemerons, tracer_visit, + heap, worker); + break; + case GC_ROOT_KIND_EDGE: + tracer_visit(root.edge, heap, worker); + break; + case GC_ROOT_KIND_EDGE_BUFFER: + gc_field_set_visit_edge_buffer(heap_remembered_set(heap), root.edge_buffer, + trace_remembered_edge, heap, worker); + break; + default: + GC_CRASH(); + } +} + +static void request_mutators_to_stop(struct gc_heap *heap) { + GC_ASSERT(!mutators_are_stopping(heap)); + atomic_store_explicit(&heap->collecting, 1, memory_order_relaxed); +} + +static void allow_mutators_to_continue(struct gc_heap *heap) { + GC_ASSERT(mutators_are_stopping(heap)); + GC_ASSERT(all_mutators_stopped(heap)); + heap->paused_mutator_count--; + atomic_store_explicit(&heap->collecting, 0, memory_order_relaxed); + GC_ASSERT(!mutators_are_stopping(heap)); + pthread_cond_broadcast(&heap->mutator_cond); +} + +static void heap_reset_large_object_pages(struct gc_heap *heap, size_t npages) { + size_t previous = heap->large_object_pages; + heap->large_object_pages = npages; + GC_ASSERT(npages <= previous); + size_t bytes = (previous - npages) << + heap_large_object_space(heap)->page_size_log2; + copy_space_reacquire_memory(heap_resizable_space(heap), bytes); +} + +static void wait_for_mutators_to_stop(struct gc_heap *heap) { + heap->paused_mutator_count++; + while (!all_mutators_stopped(heap)) + pthread_cond_wait(&heap->collector_cond, &heap->lock); +} + +static enum gc_collection_kind +pause_mutator_for_collection(struct gc_heap *heap, + struct gc_mutator *mut) GC_NEVER_INLINE; +static enum gc_collection_kind +pause_mutator_for_collection(struct gc_heap *heap, struct gc_mutator *mut) { + GC_ASSERT(mutators_are_stopping(heap)); + GC_ASSERT(!all_mutators_stopped(heap)); + MUTATOR_EVENT(mut, mutator_stopping); + MUTATOR_EVENT(mut, mutator_stopped); + heap->paused_mutator_count++; + if (all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + + enum gc_collection_kind collection_kind = GC_COLLECTION_MINOR; + do { + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + // is_minor_collection is reset before requesting mutators to stop, so this + // will pick up either whether the last collection was minor, or whether the + // next one will be minor. + if (!GC_GENERATIONAL || !is_minor_collection(heap)) + collection_kind = GC_COLLECTION_COMPACTING; + } while (mutators_are_stopping(heap)); + heap->paused_mutator_count--; + + MUTATOR_EVENT(mut, mutator_restarted); + return collection_kind; +} + +static void resize_heap(struct gc_heap *heap, size_t new_size) { + if (new_size == heap->size) + return; + DEBUG("------ resizing heap\n"); + DEBUG("------ old heap size: %zu bytes\n", heap->size); + DEBUG("------ new heap size: %zu bytes\n", new_size); + if (new_size < heap->size) + copy_space_shrink(heap_resizable_space(heap), heap->size - new_size); + else + copy_space_expand(heap_resizable_space(heap), new_size - heap->size); + + heap->size = new_size; + HEAP_EVENT(heap, heap_resized, new_size); +} + +static size_t heap_nursery_size(struct gc_heap *heap) { +#if GC_GENERATIONAL + return heap->nursery_size; +#else + GC_CRASH(); +#endif +} + +static void heap_set_nursery_size(struct gc_heap *heap, size_t size) { +#if GC_GENERATIONAL + GC_ASSERT(size); + heap->nursery_size = size; +#else + GC_CRASH(); +#endif +} + +static size_t heap_nursery_size_for_mutator_count(struct gc_heap *heap, + size_t count) { +#if GC_GENERATIONAL + return heap->per_processor_nursery_size * count; +#else + GC_CRASH(); +#endif +} + +static void resize_nursery(struct gc_heap *heap, size_t size) { + size_t prev_size = heap_nursery_size(heap); + if (size < prev_size) + copy_space_shrink(heap_new_space(heap), prev_size - size); + else + copy_space_reacquire_memory(heap_new_space(heap), size - prev_size); + heap_set_nursery_size(heap, size); +} + +static void resize_nursery_for_active_mutator_count(struct gc_heap *heap, + size_t count) { + if (count > heap->processor_count) + count = heap->processor_count; + size_t prev_size = heap_nursery_size(heap); + size_t size = heap_nursery_size_for_mutator_count(heap, count); + // If there were more mutator processors this cycle than in the previous, + // increase the nursery size. Otherwise shrink, but with an exponential decay + // factor. + if (size < prev_size) + size = (prev_size + size) / 2; + resize_nursery(heap, size); +} + +static void resize_for_active_mutator_count(struct gc_heap *heap) { + size_t mutators = heap->max_active_mutator_count; + GC_ASSERT(mutators); + heap->max_active_mutator_count = 1; + maybe_increase_max_active_mutator_count(heap); + + if (GC_GENERATIONAL) + resize_nursery_for_active_mutator_count(heap, mutators); +} + +static void visit_root_edge(struct gc_edge edge, struct gc_heap *heap, + void *unused) { + gc_tracer_add_root(&heap->tracer, gc_root_edge(edge)); +} + +static void add_roots(struct gc_heap *heap, int is_minor_gc) { + for (struct gc_mutator *mut = heap->mutators; mut; mut = mut->next) + gc_tracer_add_root(&heap->tracer, gc_root_mutator(mut)); + gc_tracer_add_root(&heap->tracer, gc_root_heap(heap)); + gc_visit_finalizer_roots(heap->finalizer_state, visit_root_edge, heap, NULL); + if (is_minor_gc) + gc_field_set_add_roots(heap_remembered_set(heap), &heap->tracer); +} + +static void +clear_remembered_set(struct gc_heap *heap) { + gc_field_set_clear(heap_remembered_set(heap), NULL, NULL); + large_object_space_clear_remembered_edges(heap_large_object_space(heap)); +} + +static void resolve_ephemerons_lazily(struct gc_heap *heap) { + atomic_store_explicit(&heap->check_pending_ephemerons, 0, + memory_order_release); +} + +static void resolve_ephemerons_eagerly(struct gc_heap *heap) { + atomic_store_explicit(&heap->check_pending_ephemerons, 1, + memory_order_release); + gc_scan_pending_ephemerons(gc_heap_pending_ephemerons(heap), heap, 0, 1); +} + +static void trace_resolved_ephemerons(struct gc_heap *heap) { + for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); + resolved; + resolved = gc_pop_resolved_ephemerons(heap)) { + gc_tracer_add_root(&heap->tracer, gc_root_resolved_ephemerons(resolved)); + gc_tracer_trace(&heap->tracer); + } +} + +static void resolve_finalizers(struct gc_heap *heap) { + for (size_t priority = 0; + priority < gc_finalizer_priority_count(); + priority++) { + if (gc_resolve_finalizers(heap->finalizer_state, priority, + visit_root_edge, heap, NULL)) { + gc_tracer_trace(&heap->tracer); + trace_resolved_ephemerons(heap); + } + } + gc_notify_finalizers(heap->finalizer_state, heap); +} + +static void sweep_ephemerons(struct gc_heap *heap) { + return gc_sweep_pending_ephemerons(gc_heap_pending_ephemerons(heap), 0, 1); +} + +static int +heap_can_minor_gc(struct gc_heap *heap) { + if (!GC_GENERATIONAL) return 0; + // Invariant: the oldgen always has enough free space to accomodate promoted + // objects from the nursery. This is a precondition for minor GC of course, + // but it is also a post-condition: after potentially promoting all nursery + // objects, we still need an additional nursery's worth of space in oldgen to + // satisfy the invariant. We ensure the invariant by only doing minor GC if + // the copy space can allocate as many bytes as the nursery, which is already + // twice the allocatable size because of the copy reserve. + struct copy_space *new_space = heap_new_space(heap); + struct copy_space *old_space = heap_old_space(heap); + size_t nursery_size = heap_nursery_size(heap); + return copy_space_can_allocate(old_space, nursery_size) >= nursery_size; +} + +static enum gc_collection_kind +determine_collection_kind(struct gc_heap *heap, + enum gc_collection_kind requested) { + if (requested == GC_COLLECTION_MINOR && heap_can_minor_gc(heap)) + return GC_COLLECTION_MINOR; + return GC_COLLECTION_COMPACTING; +} + +static void +copy_spaces_start_gc(struct gc_heap *heap, int is_minor_gc) { + if (GC_GENERATIONAL) { + copy_space_flip(heap_new_space(heap)); + if (!is_minor_gc) + copy_space_flip(heap_old_space(heap)); + } else { + copy_space_flip(heap_mono_space(heap)); + } +} + +static void +copy_spaces_finish_gc(struct gc_heap *heap, int is_minor_gc) { + if (GC_GENERATIONAL) { + copy_space_finish_gc(heap_new_space(heap), is_minor_gc); + if (!is_minor_gc) + copy_space_finish_gc(heap_old_space(heap), 0); + } else { + GC_ASSERT(!is_minor_gc); + copy_space_finish_gc(heap_mono_space(heap), 0); + } +} + +static size_t +copy_spaces_allocated_bytes(struct gc_heap *heap) +{ + return GC_GENERATIONAL + ? (heap_new_space(heap)->allocated_bytes_at_last_gc + + heap_old_space(heap)->allocated_bytes_at_last_gc) + : heap_mono_space(heap)->allocated_bytes_at_last_gc; +} + +static enum gc_collection_kind +collect(struct gc_mutator *mut, + enum gc_collection_kind requested_kind) GC_NEVER_INLINE; +static enum gc_collection_kind +collect(struct gc_mutator *mut, enum gc_collection_kind requested_kind) { + struct gc_heap *heap = mutator_heap(mut); + struct large_object_space *lospace = heap_large_object_space(heap); + struct gc_extern_space *exspace = heap_extern_space(heap); + uint64_t start_ns = gc_platform_monotonic_nanoseconds(); + MUTATOR_EVENT(mut, mutator_cause_gc); + DEBUG("start collect #%ld:\n", heap->count); + HEAP_EVENT(heap, requesting_stop); + request_mutators_to_stop(heap); + HEAP_EVENT(heap, waiting_for_stop); + wait_for_mutators_to_stop(heap); + HEAP_EVENT(heap, mutators_stopped); + enum gc_collection_kind gc_kind = + determine_collection_kind(heap, requested_kind); + int is_minor_gc = +#if GC_GENERATIONAL + heap->is_minor_collection = +#endif + GC_GENERATIONAL ? gc_kind == GC_COLLECTION_MINOR : 0; + HEAP_EVENT(heap, prepare_gc, gc_kind); + uint64_t *counter_loc = &heap->total_allocated_bytes_at_last_gc; + copy_space_add_to_allocation_counter(heap_allocation_space(heap), + counter_loc); + large_object_space_add_to_allocation_counter(lospace, counter_loc); + copy_spaces_start_gc(heap, is_minor_gc); + large_object_space_start_gc(lospace, is_minor_gc); + gc_extern_space_start_gc(exspace, is_minor_gc); + resolve_ephemerons_lazily(heap); + gc_tracer_prepare(&heap->tracer); + add_roots(heap, is_minor_gc); + HEAP_EVENT(heap, roots_traced); + gc_tracer_trace(&heap->tracer); + HEAP_EVENT(heap, heap_traced); + resolve_ephemerons_eagerly(heap); + trace_resolved_ephemerons(heap); + HEAP_EVENT(heap, ephemerons_traced); + resolve_finalizers(heap); + HEAP_EVENT(heap, finalizers_traced); + sweep_ephemerons(heap); + gc_tracer_release(&heap->tracer); + copy_spaces_finish_gc(heap, is_minor_gc); + large_object_space_finish_gc(lospace, is_minor_gc); + gc_extern_space_finish_gc(exspace, is_minor_gc); + if (GC_GENERATIONAL && !is_minor_gc) + clear_remembered_set(heap); + heap->count++; + resize_for_active_mutator_count(heap); + heap_reset_large_object_pages(heap, lospace->live_pages_at_last_collection); + size_t live_size = (copy_spaces_allocated_bytes(heap) + + large_object_space_size_at_last_collection(lospace)); + uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns; + HEAP_EVENT(heap, live_data_size, live_size); + gc_heap_sizer_on_gc(heap->sizer, heap->size, live_size, pause_ns, + resize_heap); + { + struct copy_space *space = heap_resizable_space(heap); + if (!copy_space_page_out_blocks_until_memory_released(space) + && heap->sizer.policy == GC_HEAP_SIZE_FIXED) { + fprintf(stderr, "ran out of space, heap size %zu\n", heap->size); + GC_CRASH(); + } + } + HEAP_EVENT(heap, restarting_mutators); + allow_mutators_to_continue(heap); + return gc_kind; +} + +static void trigger_collection(struct gc_mutator *mut, + enum gc_collection_kind requested_kind) { + struct gc_heap *heap = mutator_heap(mut); + copy_space_allocator_finish(&mut->allocator, heap_allocation_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(mutator_field_logger(mut)); + heap_lock(heap); + int prev_kind = -1; + while (mutators_are_stopping(heap)) + prev_kind = pause_mutator_for_collection(heap, mut); + if (prev_kind < (int)requested_kind) + collect(mut, requested_kind); + heap_unlock(heap); +} + +void gc_collect(struct gc_mutator *mut, enum gc_collection_kind kind) { + trigger_collection(mut, kind); +} + +static void* allocate_large(struct gc_mutator *mut, size_t size) { + struct gc_heap *heap = mutator_heap(mut); + struct large_object_space *space = heap_large_object_space(heap); + + size_t npages = large_object_space_npages(space, size); + + copy_space_request_release_memory(heap_resizable_space(heap), + npages << space->page_size_log2); + while (!copy_space_page_out_blocks_until_memory_released(heap_resizable_space(heap))) + trigger_collection(mut, GC_COLLECTION_COMPACTING); + atomic_fetch_add(&heap->large_object_pages, npages); + + void *ret = large_object_space_alloc(space, npages, GC_TRACE_PRECISELY); + + if (!ret) { + perror("weird: we have the space but mmap didn't work"); + GC_CRASH(); + } + + return ret; +} + +static void get_more_empty_blocks_for_mutator(void *mut) { + trigger_collection(mut, GC_COLLECTION_MINOR); +} + +void* gc_allocate_slow(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + if (GC_UNLIKELY(kind != GC_ALLOCATION_TAGGED + && kind != GC_ALLOCATION_TAGGED_POINTERLESS)) { + fprintf(stderr, "pcc collector cannot make allocations of kind %d\n", + (int)kind); + GC_CRASH(); + } + GC_ASSERT(size > 0); // allocating 0 bytes would be silly + + if (size > gc_allocator_large_threshold()) + return allocate_large(mut, size); + + struct gc_ref ret; + while (1) { + ret = copy_space_allocate(&mut->allocator, + heap_allocation_space(mutator_heap(mut)), + size); + if (gc_ref_is_null(ret)) + trigger_collection(mut, GC_COLLECTION_MINOR); + else + break; + } + + return gc_ref_heap_object(ret); +} + +void gc_pin_object(struct gc_mutator *mut, struct gc_ref ref) { + GC_CRASH(); +} + +int gc_object_is_old_generation_slow(struct gc_mutator *mut, + struct gc_ref obj) { + if (!GC_GENERATIONAL) + return 0; + + struct gc_heap *heap = mutator_heap(mut); + + if (copy_space_contains(heap_new_space(heap), obj)) + return 0; + if (copy_space_contains(heap_old_space(heap), obj)) + return 1; + + struct large_object_space *lospace = heap_large_object_space(heap); + if (large_object_space_contains(lospace, obj)) + return large_object_space_is_survivor(lospace, obj); + + return 0; +} + +void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) { + GC_ASSERT(!gc_ref_is_null(new_val)); + if (!GC_GENERATIONAL) return; + if (gc_object_is_old_generation_slow(mut, new_val)) + return; + struct gc_heap *heap = mutator_heap(mut); + if ((obj_size <= gc_allocator_large_threshold()) + ? copy_space_remember_edge(heap_old_space(heap), edge) + : large_object_space_remember_edge(heap_large_object_space(heap), + obj, edge)) + gc_field_set_writer_add_edge(mutator_field_logger(mut), edge); +} + +int* gc_safepoint_flag_loc(struct gc_mutator *mut) { + return &mutator_heap(mut)->collecting; +} + +void gc_safepoint_slow(struct gc_mutator *mut) { + struct gc_heap *heap = mutator_heap(mut); + copy_space_allocator_finish(&mut->allocator, heap_allocation_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(mutator_field_logger(mut)); + heap_lock(heap); + while (mutators_are_stopping(mutator_heap(mut))) + pause_mutator_for_collection(heap, mut); + heap_unlock(heap); +} + +struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) { + return gc_allocate(mut, gc_ephemeron_size(), GC_ALLOCATION_TAGGED); +} + +void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value) { + gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value); +} + +struct gc_pending_ephemerons *gc_heap_pending_ephemerons(struct gc_heap *heap) { +#if GC_GENERATIONAL + if (is_minor_collection(heap)) + return heap->nursery_pending_ephemerons; +#endif + return heap->pending_ephemerons; +} + +unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) { + return heap->count; +} + +struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) { + return gc_allocate(mut, gc_finalizer_size(), GC_ALLOCATION_TAGGED); +} + +void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer, + unsigned priority, struct gc_ref object, + struct gc_ref closure) { + gc_finalizer_init_internal(finalizer, object, closure); + gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state, + finalizer, priority); + // No write barrier. +} + +struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) { + return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state); +} + +void gc_set_finalizer_callback(struct gc_heap *heap, + gc_finalizer_callback callback) { + gc_finalizer_state_set_callback(heap->finalizer_state, callback); +} + +static int +heap_do_prepare_pending_ephemerons(struct gc_heap *heap, + struct gc_pending_ephemerons **loc, + size_t size) { + size_t target = size * heap->pending_ephemerons_size_factor; + double slop = heap->pending_ephemerons_size_slop; + + return !!(*loc = gc_prepare_pending_ephemerons(*loc, target, slop)); +} + +static int heap_prepare_pending_ephemerons(struct gc_heap *heap) { + return heap_do_prepare_pending_ephemerons(heap, &heap->pending_ephemerons, + heap->size) +#if GC_GENERATIONAL + && heap_do_prepare_pending_ephemerons(heap, + &heap->nursery_pending_ephemerons, + heap->per_processor_nursery_size * 2) +#endif + ; +} + +struct gc_options { + struct gc_common_options common; +}; +int gc_option_from_string(const char *str) { + return gc_common_option_from_string(str); +} +struct gc_options* gc_allocate_options(void) { + struct gc_options *ret = malloc(sizeof(struct gc_options)); + gc_init_common_options(&ret->common); + return ret; +} +int gc_options_set_int(struct gc_options *options, int option, int value) { + return gc_common_options_set_int(&options->common, option, value); +} +int gc_options_set_size(struct gc_options *options, int option, + size_t value) { + return gc_common_options_set_size(&options->common, option, value); +} +int gc_options_set_double(struct gc_options *options, int option, + double value) { + return gc_common_options_set_double(&options->common, option, value); +} +int gc_options_parse_and_set(struct gc_options *options, int option, + const char *value) { + return gc_common_options_parse_and_set(&options->common, option, value); +} + +// with heap lock +static uint64_t allocation_counter(struct gc_heap *heap) { + uint64_t ret = heap->total_allocated_bytes_at_last_gc; + copy_space_add_to_allocation_counter(heap_allocation_space(heap), &ret); + large_object_space_add_to_allocation_counter(heap_large_object_space(heap), + &ret); + return ret; +} + +uint64_t gc_allocation_counter(struct gc_heap *heap) { + pthread_mutex_lock(&heap->lock); + uint64_t ret = allocation_counter(heap); + pthread_mutex_unlock(&heap->lock); + return ret; +} + +static uint64_t allocation_counter_from_thread(struct gc_heap *heap) { + if (pthread_mutex_trylock(&heap->lock)) return 0; + uint64_t ret = allocation_counter(heap); + pthread_mutex_unlock(&heap->lock); + return ret; +} + +static void set_heap_size_from_thread(struct gc_heap *heap, size_t size) { + if (pthread_mutex_trylock(&heap->lock)) return; + resize_heap(heap, size); + pthread_mutex_unlock(&heap->lock); +} + +static int heap_init(struct gc_heap *heap, const struct gc_options *options) { + // *heap is already initialized to 0. + + if (GC_GENERATIONAL) + gc_field_set_init(heap_remembered_set(heap)); + pthread_mutex_init(&heap->lock, NULL); + pthread_cond_init(&heap->mutator_cond, NULL); + pthread_cond_init(&heap->collector_cond, NULL); + heap->size = options->common.heap_size; + heap->processor_count = gc_platform_processor_count(); + // max_active_mutator_count never falls below 1 after this point. + heap->max_active_mutator_count = 1; + +#if GC_GENERATIONAL + // We should add an option to set this, but for now, 2 MB per processor. + heap->per_processor_nursery_size = 2 * 1024 * 1024; +#endif + + if (!gc_tracer_init(&heap->tracer, heap, options->common.parallelism)) + GC_CRASH(); + + heap->pending_ephemerons_size_factor = 0.005; + heap->pending_ephemerons_size_slop = 0.5; + + if (!heap_prepare_pending_ephemerons(heap)) + GC_CRASH(); + + heap->finalizer_state = gc_make_finalizer_state(); + if (!heap->finalizer_state) + GC_CRASH(); + + heap->background_thread = gc_make_background_thread(); + heap->sizer = gc_make_heap_sizer(heap, &options->common, + allocation_counter_from_thread, + set_heap_size_from_thread, + heap->background_thread); + + return 1; +} + +int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, + struct gc_heap **heap, struct gc_mutator **mut, + struct gc_event_listener event_listener, + void *event_listener_data) { + GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_ALIGNMENT); + GC_ASSERT_EQ(gc_allocator_large_threshold(), GC_LARGE_OBJECT_THRESHOLD); + GC_ASSERT_EQ(0, offsetof(struct gc_mutator, allocator)); + GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), + offsetof(struct copy_space_allocator, hp)); + GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), + offsetof(struct copy_space_allocator, limit)); + if (GC_GENERATIONAL) { + GC_ASSERT_EQ(gc_write_barrier_field_table_alignment(), + COPY_SPACE_SLAB_SIZE); + GC_ASSERT_EQ(gc_write_barrier_field_table_offset(), + offsetof(struct copy_space_slab, blocks)); + } + + *heap = calloc(1, sizeof(struct gc_heap)); + if (!*heap) GC_CRASH(); + + if (!heap_init(*heap, options)) + GC_CRASH(); + + (*heap)->event_listener = event_listener; + (*heap)->event_listener_data = event_listener_data; + HEAP_EVENT(*heap, init, (*heap)->size); + + { + uint32_t flags = 0; + if (options->common.parallelism > 1) + flags |= COPY_SPACE_ATOMIC_FORWARDING; + if (GC_GENERATIONAL) { + size_t nursery_size = + heap_nursery_size_for_mutator_count(*heap, (*heap)->processor_count); + heap_set_nursery_size(*heap, nursery_size); + if (!copy_space_init(heap_new_space(*heap), nursery_size, + flags | COPY_SPACE_ALIGNED, + (*heap)->background_thread)) { + free(*heap); + *heap = NULL; + return 0; + } + // Initially dimension the nursery for one mutator. + resize_nursery(*heap, heap_nursery_size_for_mutator_count(*heap, 1)); + + if (!copy_space_init(heap_old_space(*heap), (*heap)->size, + flags | COPY_SPACE_HAS_FIELD_LOGGING_BITS, + (*heap)->background_thread)) { + free(*heap); + *heap = NULL; + return 0; + } + } else { + if (!copy_space_init(heap_mono_space(*heap), (*heap)->size, flags, + (*heap)->background_thread)) { + free(*heap); + *heap = NULL; + return 0; + } + } + } + + if (!large_object_space_init(heap_large_object_space(*heap), *heap, + (*heap)->background_thread)) + GC_CRASH(); + + *mut = calloc(1, sizeof(struct gc_mutator)); + if (!*mut) GC_CRASH(); + add_mutator(*heap, *mut); + + gc_background_thread_start((*heap)->background_thread); + + return 1; +} + +struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base, + struct gc_heap *heap) { + struct gc_mutator *ret = calloc(1, sizeof(struct gc_mutator)); + if (!ret) + GC_CRASH(); + add_mutator(heap, ret); + return ret; +} + +void gc_finish_for_thread(struct gc_mutator *mut) { + remove_mutator(mutator_heap(mut), mut); + free(mut); +} + +static void deactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + GC_ASSERT(mut->next == NULL); + copy_space_allocator_finish(&mut->allocator, heap_allocation_space(heap)); + if (GC_GENERATIONAL) + gc_field_set_writer_release_buffer(mutator_field_logger(mut)); + heap_lock(heap); + heap->inactive_mutator_count++; + if (all_mutators_stopped(heap)) + pthread_cond_signal(&heap->collector_cond); + heap_unlock(heap); +} + +static void reactivate_mutator(struct gc_heap *heap, struct gc_mutator *mut) { + heap_lock(heap); + while (mutators_are_stopping(heap)) + pthread_cond_wait(&heap->mutator_cond, &heap->lock); + heap->inactive_mutator_count--; + maybe_increase_max_active_mutator_count(heap); + heap_unlock(heap); +} + +void* gc_call_without_gc(struct gc_mutator *mut, + void* (*f)(void*), + void *data) { + struct gc_heap *heap = mutator_heap(mut); + deactivate_mutator(heap, mut); + void *ret = f(data); + reactivate_mutator(heap, mut); + return ret; +} diff --git a/libguile/whippet/src/root-worklist.h b/libguile/whippet/src/root-worklist.h new file mode 100644 index 000000000..45ede8595 --- /dev/null +++ b/libguile/whippet/src/root-worklist.h @@ -0,0 +1,76 @@ +#ifndef ROOT_WORKLIST_H +#define ROOT_WORKLIST_H + +#include +#include +#include + +#include "assert.h" +#include "debug.h" +#include "gc-inline.h" +#include "gc-ref.h" +#include "root.h" + +// A single-producer, multiple-consumer worklist that has two phases: +// one in which roots are added by the producer, then one in which roots +// are consumed from the worklist. Roots are never added once the +// consumer phase starts. +struct root_worklist { + size_t size; + size_t read; + size_t write; + struct gc_root *buf; +}; + +void +root_worklist_alloc(struct root_worklist *q) { + q->buf = realloc(q->buf, q->size * sizeof(struct gc_root)); + if (!q->buf) { + perror("Failed to grow root worklist"); + GC_CRASH(); + } +} + +static void +root_worklist_init(struct root_worklist *q) { + q->size = 16; + q->read = 0; + q->write = 0; + q->buf = NULL; + root_worklist_alloc(q); +} + +static inline void +root_worklist_push(struct root_worklist *q, struct gc_root root) { + if (UNLIKELY(q->write == q->size)) { + q->size *= 2; + root_worklist_alloc(q); + } + q->buf[q->write++] = root; +} + +// Not atomic. +static inline size_t +root_worklist_size(struct root_worklist *q) { + return q->write - q->read; +} + +static inline struct gc_root +root_worklist_pop(struct root_worklist *q) { + size_t idx = atomic_fetch_add(&q->read, 1); + if (idx < q->write) + return q->buf[idx]; + return (struct gc_root){ GC_ROOT_KIND_NONE, }; +} + +static void +root_worklist_reset(struct root_worklist *q) { + q->read = q->write = 0; +} + +static void +root_worklist_destroy(struct root_worklist *q) { + free(q->buf); +} + +#endif // ROOT_WORKLIST_H diff --git a/libguile/whippet/src/root.h b/libguile/whippet/src/root.h new file mode 100644 index 000000000..4fc705e61 --- /dev/null +++ b/libguile/whippet/src/root.h @@ -0,0 +1,81 @@ +#ifndef ROOT_H +#define ROOT_H + +#include "gc-edge.h" +#include "extents.h" + +struct gc_ephemeron; +struct gc_heap; +struct gc_mutator; +struct gc_edge_buffer; + +enum gc_root_kind { + GC_ROOT_KIND_NONE, + GC_ROOT_KIND_HEAP, + GC_ROOT_KIND_MUTATOR, + GC_ROOT_KIND_CONSERVATIVE_EDGES, + GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES, + GC_ROOT_KIND_RESOLVED_EPHEMERONS, + GC_ROOT_KIND_EDGE, + GC_ROOT_KIND_EDGE_BUFFER, +}; + +struct gc_root { + enum gc_root_kind kind; + union { + struct gc_heap *heap; + struct gc_mutator *mutator; + struct gc_ephemeron *resolved_ephemerons; + struct extent_range range; + struct gc_edge edge; + struct gc_edge_buffer *edge_buffer; + }; +}; + +static inline struct gc_root +gc_root_heap(struct gc_heap* heap) { + struct gc_root ret = { GC_ROOT_KIND_HEAP }; + ret.heap = heap; + return ret; +} + +static inline struct gc_root +gc_root_mutator(struct gc_mutator* mutator) { + struct gc_root ret = { GC_ROOT_KIND_MUTATOR }; + ret.mutator = mutator; + return ret; +} + +static inline struct gc_root +gc_root_conservative_edges(uintptr_t lo_addr, uintptr_t hi_addr, + int possibly_interior) { + enum gc_root_kind kind = possibly_interior + ? GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES + : GC_ROOT_KIND_CONSERVATIVE_EDGES; + struct gc_root ret = { kind }; + ret.range = (struct extent_range) {lo_addr, hi_addr}; + return ret; +} + +static inline struct gc_root +gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) { + struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS }; + ret.resolved_ephemerons = resolved; + return ret; +} + +static inline struct gc_root +gc_root_edge(struct gc_edge edge) { + struct gc_root ret = { GC_ROOT_KIND_EDGE }; + ret.edge = edge; + return ret; +} + +static inline struct gc_root +gc_root_edge_buffer(struct gc_edge_buffer *buf) { + struct gc_root ret = { GC_ROOT_KIND_EDGE_BUFFER }; + ret.edge_buffer = buf; + return ret; +} + +#endif // ROOT_H diff --git a/libguile/whippet/src/semi.c b/libguile/whippet/src/semi.c new file mode 100644 index 000000000..6f902534d --- /dev/null +++ b/libguile/whippet/src/semi.c @@ -0,0 +1,738 @@ +#include +#include +#include +#include + +#include "gc-api.h" + +#define GC_IMPL 1 +#include "gc-internal.h" + +#include "gc-platform.h" +#include "gc-tracepoint.h" +#include "heap-sizer.h" +#include "semi-attrs.h" +#include "large-object-space.h" + +#if GC_CONSERVATIVE_ROOTS +#error semi is a precise collector +#endif + +struct gc_options { + struct gc_common_options common; +}; +struct region { + uintptr_t base; + size_t active_size; + size_t mapped_size; +}; +struct semi_space { + uintptr_t hp; + uintptr_t limit; + struct region from_space; + struct region to_space; + size_t page_size; + size_t stolen_pages; + size_t live_bytes_at_last_gc; +}; +struct gc_heap { + struct semi_space semi_space; + struct large_object_space large_object_space; + struct gc_pending_ephemerons *pending_ephemerons; + struct gc_finalizer_state *finalizer_state; + struct gc_extern_space *extern_space; + double pending_ephemerons_size_factor; + double pending_ephemerons_size_slop; + size_t size; + size_t total_allocated_bytes_at_last_gc; + long count; + int check_pending_ephemerons; + const struct gc_options *options; + struct gc_heap_roots *roots; + struct gc_heap_sizer sizer; + struct gc_event_listener event_listener; + void *event_listener_data; +}; +// One mutator per space, can just store the heap in the mutator. +struct gc_mutator { + struct gc_heap heap; + struct gc_mutator_roots *roots; + void *event_listener_data; +}; + +#define HEAP_EVENT(heap, event, ...) do { \ + (heap)->event_listener.event((heap)->event_listener_data, ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) +#define MUTATOR_EVENT(mut, event, ...) do { \ + (mut)->heap->event_listener.event((mut)->event_listener_data, \ + ##__VA_ARGS__); \ + GC_TRACEPOINT(event, ##__VA_ARGS__); \ + } while (0) + +static inline void clear_memory(uintptr_t addr, size_t size) { + memset((char*)addr, 0, size); +} + +static inline struct gc_heap* mutator_heap(struct gc_mutator *mut) { + return &mut->heap; +} +static inline struct semi_space* heap_semi_space(struct gc_heap *heap) { + return &heap->semi_space; +} +static inline struct large_object_space* heap_large_object_space(struct gc_heap *heap) { + return &heap->large_object_space; +} +static inline struct semi_space* mutator_semi_space(struct gc_mutator *mut) { + return heap_semi_space(mutator_heap(mut)); +} + +struct gc_heap* gc_mutator_heap(struct gc_mutator *mutator) { + return mutator_heap(mutator); +} +uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap) { + GC_CRASH(); +} +uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap) { + GC_CRASH(); +} + +static uintptr_t align_up(uintptr_t addr, size_t align) { + return (addr + align - 1) & ~(align-1); +} +static size_t min_size(size_t a, size_t b) { return a < b ? a : b; } +static size_t max_size(size_t a, size_t b) { return a < b ? b : a; } + +static void collect(struct gc_mutator *mut, size_t for_alloc) GC_NEVER_INLINE; +static void collect_for_alloc(struct gc_mutator *mut, + size_t bytes) GC_NEVER_INLINE; + +static void trace(struct gc_edge edge, struct gc_heap *heap, void *visit_data); + +static void region_trim_by(struct region *region, size_t newly_unavailable) { + size_t old_available = region->active_size; + GC_ASSERT(newly_unavailable <= region->active_size); + + region->active_size -= newly_unavailable; + gc_platform_discard_memory((void*)(region->base + region->active_size), + newly_unavailable); +} + +static void region_set_active_size(struct region *region, size_t size) { + GC_ASSERT(size <= region->mapped_size); + GC_ASSERT(size == align_up(size, gc_platform_page_size())); + if (size < region->active_size) + region_trim_by(region, region->active_size - size); + else + region->active_size = size; +} + +static int semi_space_steal_pages(struct semi_space *space, size_t npages) { + size_t old_stolen_pages = space->stolen_pages; + size_t old_region_stolen_pages = align_up(old_stolen_pages,2)/2; + size_t new_stolen_pages = old_stolen_pages + npages; + size_t new_region_stolen_pages = align_up(new_stolen_pages,2)/2; + size_t region_newly_stolen_pages = + new_region_stolen_pages - old_region_stolen_pages; + size_t region_newly_unavailable_bytes = + region_newly_stolen_pages * space->page_size; + + if (space->limit - space->hp < region_newly_unavailable_bytes) + return 0; + + space->stolen_pages += npages; + + if (region_newly_unavailable_bytes == 0) + return 1; + + space->limit -= region_newly_unavailable_bytes; + region_trim_by(&space->to_space, region_newly_unavailable_bytes); + region_trim_by(&space->from_space, region_newly_unavailable_bytes); + return 1; +} + +static void semi_space_finish_gc(struct semi_space *space, + size_t large_object_pages) { + space->live_bytes_at_last_gc = space->hp - space->to_space.base; + space->stolen_pages = large_object_pages; + space->limit = 0; // set in adjust_heap_size_and_limits +} + +static void +semi_space_add_to_allocation_counter(struct semi_space *space, + uint64_t *counter) { + size_t base = space->to_space.base + space->live_bytes_at_last_gc; + *counter += space->hp - base; +} + +static void flip(struct semi_space *space) { + struct region tmp; + GC_ASSERT(space->hp <= space->limit); + GC_ASSERT(space->limit - space->to_space.base <= space->to_space.active_size); + GC_ASSERT(space->to_space.active_size <= space->from_space.mapped_size); + memcpy(&tmp, &space->from_space, sizeof(tmp)); + memcpy(&space->from_space, &space->to_space, sizeof(tmp)); + memcpy(&space->to_space, &tmp, sizeof(tmp)); + + space->hp = space->to_space.base; + space->limit = space->hp + space->to_space.active_size; +} + +static struct gc_ref copy(struct gc_heap *heap, struct semi_space *space, + struct gc_ref ref) { + size_t size; + gc_trace_object(ref, NULL, NULL, NULL, &size); + struct gc_ref new_ref = gc_ref(space->hp); + memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(ref), size); + gc_object_forward_nonatomic(ref, new_ref); + space->hp += align_up(size, GC_ALIGNMENT); + + if (GC_UNLIKELY(heap->check_pending_ephemerons)) + gc_resolve_pending_ephemerons(ref, heap); + + return new_ref; +} + +static uintptr_t scan(struct gc_heap *heap, struct gc_ref grey) { + size_t size; + gc_trace_object(grey, trace, heap, NULL, &size); + return gc_ref_value(grey) + align_up(size, GC_ALIGNMENT); +} + +static struct gc_ref forward(struct gc_heap *heap, struct semi_space *space, + struct gc_ref obj) { + uintptr_t forwarded = gc_object_forwarded_nonatomic(obj); + return forwarded ? gc_ref(forwarded) : copy(heap, space, obj); +} + +static void visit_semi_space(struct gc_heap *heap, struct semi_space *space, + struct gc_edge edge, struct gc_ref ref) { + gc_edge_update(edge, forward(heap, space, ref)); +} + +static void visit_large_object_space(struct gc_heap *heap, + struct large_object_space *space, + struct gc_ref ref) { + if (large_object_space_mark(space, ref)) { + if (GC_UNLIKELY(heap->check_pending_ephemerons)) + gc_resolve_pending_ephemerons(ref, heap); + + gc_trace_object(ref, trace, heap, NULL, NULL); + } +} + +static int region_contains(struct region *region, uintptr_t addr) { + return addr - region->base < region->active_size; +} + +static int semi_space_contains(struct semi_space *space, struct gc_ref ref) { + // As each live object is traced exactly once, its edges have not been + // visited, so its refs are to fromspace and not tospace. + uintptr_t addr = gc_ref_value(ref); + GC_ASSERT(!region_contains(&space->to_space, addr)); + return region_contains(&space->from_space, addr); +} + +static void visit_external_object(struct gc_heap *heap, + struct gc_extern_space *space, + struct gc_edge edge, + struct gc_ref old_ref) { + if (gc_extern_space_visit(space, edge, old_ref)) { + if (GC_UNLIKELY(heap->check_pending_ephemerons)) + gc_resolve_pending_ephemerons(old_ref, heap); + + gc_trace_object(gc_edge_ref(edge), trace, heap, NULL, NULL); + } +} + +static void visit(struct gc_edge edge, struct gc_heap *heap) { + struct gc_ref ref = gc_edge_ref(edge); + if (gc_ref_is_null(ref) || gc_ref_is_immediate(ref)) + return; + if (semi_space_contains(heap_semi_space(heap), ref)) + visit_semi_space(heap, heap_semi_space(heap), edge, ref); + else if (large_object_space_contains_with_lock(heap_large_object_space(heap), + ref)) + visit_large_object_space(heap, heap_large_object_space(heap), ref); + else + visit_external_object(heap, heap->extern_space, edge, ref); +} + +struct gc_pending_ephemerons * +gc_heap_pending_ephemerons(struct gc_heap *heap) { + return heap->pending_ephemerons; +} + +int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) { + struct gc_ref ref = gc_edge_ref(edge); + GC_ASSERT(!gc_ref_is_null(ref)); + if (gc_ref_is_immediate(ref)) + return 1; + GC_ASSERT(gc_ref_is_heap_object(ref)); + if (semi_space_contains(heap_semi_space(heap), ref)) { + uintptr_t forwarded = gc_object_forwarded_nonatomic(ref); + if (!forwarded) + return 0; + gc_edge_update(edge, gc_ref(forwarded)); + return 1; + } else if (large_object_space_contains_with_lock(heap_large_object_space(heap), ref)) { + return large_object_space_is_marked(heap_large_object_space(heap), ref); + } + GC_CRASH(); +} + +static void trace(struct gc_edge edge, struct gc_heap *heap, void *visit_data) { + return visit(edge, heap); +} + +static int grow_region_if_needed(struct region *region, size_t new_size) { + if (new_size <= region->mapped_size) + return 1; + + void *mem = gc_platform_acquire_memory(new_size, 0); + DEBUG("new size %zx\n", new_size); + if (!mem) + return 0; + if (region->mapped_size) + gc_platform_release_memory((void*)region->base, region->mapped_size); + region->base = (uintptr_t)mem; + region->active_size = 0; + region->mapped_size = new_size; + return 1; +} + +static void truncate_region(struct region *region, size_t new_size) { + GC_ASSERT(new_size <= region->mapped_size); + + size_t bytes = region->mapped_size - new_size; + if (bytes) { + gc_platform_release_memory((void*)(region->base + new_size), bytes); + region->mapped_size = new_size; + if (region->active_size > new_size) + region->active_size = new_size; + } +} + +static void resize_heap(struct gc_heap *heap, size_t new_heap_size) { + struct semi_space *semi = heap_semi_space(heap); + new_heap_size = align_up(new_heap_size, semi->page_size * 2); + size_t new_region_size = new_heap_size / 2; + + // Note that there is an asymmetry in how heap size is adjusted: we + // grow in two cycles (first the fromspace, then the tospace after it + // becomes the fromspace in the next collection) but shrink in one (by + // returning pages to the OS). + + // If we are growing the heap now, grow the fromspace mapping. Also, + // always try to grow the fromspace if it is smaller than the tospace. + grow_region_if_needed(&semi->from_space, + max_size(new_region_size, semi->to_space.mapped_size)); + + // We may have grown fromspace. Find out what our actual new region + // size will be. + new_region_size = min_size(new_region_size, + min_size(semi->to_space.mapped_size, + semi->from_space.mapped_size)); + size_t old_heap_size = heap->size; + heap->size = new_region_size * 2; + if (heap->size != old_heap_size) + HEAP_EVENT(heap, heap_resized, heap->size); +} + +static void reset_heap_limits(struct gc_heap *heap) { + struct semi_space *semi = heap_semi_space(heap); + size_t new_region_size = align_up(heap->size, semi->page_size * 2) / 2; + size_t stolen = align_up(semi->stolen_pages, 2) * semi->page_size; + GC_ASSERT(new_region_size > stolen/2); + size_t new_active_region_size = new_region_size - stolen/2; + + region_set_active_size(&semi->from_space, new_active_region_size); + region_set_active_size(&semi->to_space, new_active_region_size); + + size_t new_limit = semi->to_space.base + new_active_region_size; + GC_ASSERT(semi->hp <= new_limit); + semi->limit = new_limit; +} + +static uintptr_t trace_closure(struct gc_heap *heap, struct semi_space *semi, + uintptr_t grey) { + while(grey < semi->hp) + grey = scan(heap, gc_ref(grey)); + return grey; +} + +static uintptr_t resolve_ephemerons(struct gc_heap *heap, uintptr_t grey) { + for (struct gc_ephemeron *resolved = gc_pop_resolved_ephemerons(heap); + resolved; + resolved = gc_pop_resolved_ephemerons(heap)) { + gc_trace_resolved_ephemerons(resolved, trace, heap, NULL); + grey = trace_closure(heap, heap_semi_space(heap), grey); + } + return grey; +} + +static uintptr_t resolve_finalizers(struct gc_heap *heap, uintptr_t grey) { + for (size_t priority = 0; + priority < gc_finalizer_priority_count(); + priority++) { + if (gc_resolve_finalizers(heap->finalizer_state, priority, + trace, heap, NULL)) { + grey = trace_closure(heap, heap_semi_space(heap), grey); + grey = resolve_ephemerons(heap, grey); + } + } + gc_notify_finalizers(heap->finalizer_state, heap); + return grey; +} + +static void collect(struct gc_mutator *mut, size_t for_alloc) { + struct gc_heap *heap = mutator_heap(mut); + int is_minor = 0; + int is_compacting = 1; + uint64_t start_ns = gc_platform_monotonic_nanoseconds(); + + HEAP_EVENT(heap, requesting_stop); + HEAP_EVENT(heap, waiting_for_stop); + HEAP_EVENT(heap, mutators_stopped); + HEAP_EVENT(heap, prepare_gc, GC_COLLECTION_COMPACTING); + + struct semi_space *semi = heap_semi_space(heap); + struct large_object_space *large = heap_large_object_space(heap); + // fprintf(stderr, "start collect #%ld:\n", space->count); + uint64_t *counter_loc = &heap->total_allocated_bytes_at_last_gc; + semi_space_add_to_allocation_counter(semi, counter_loc); + large_object_space_add_to_allocation_counter(large, counter_loc); + large_object_space_start_gc(large, 0); + gc_extern_space_start_gc(heap->extern_space, 0); + flip(semi); + heap->count++; + heap->check_pending_ephemerons = 0; + uintptr_t grey = semi->hp; + if (heap->roots) + gc_trace_heap_roots(heap->roots, trace, heap, NULL); + if (mut->roots) + gc_trace_mutator_roots(mut->roots, trace, heap, NULL); + gc_visit_finalizer_roots(heap->finalizer_state, trace, heap, NULL); + HEAP_EVENT(heap, roots_traced); + // fprintf(stderr, "pushed %zd bytes in roots\n", space->hp - grey); + grey = trace_closure(heap, semi, grey); + HEAP_EVENT(heap, heap_traced); + gc_scan_pending_ephemerons(heap->pending_ephemerons, heap, 0, 1); + heap->check_pending_ephemerons = 1; + grey = resolve_ephemerons(heap, grey); + HEAP_EVENT(heap, ephemerons_traced); + grey = resolve_finalizers(heap, grey); + HEAP_EVENT(heap, finalizers_traced); + large_object_space_finish_gc(large, 0); + gc_extern_space_finish_gc(heap->extern_space, 0); + semi_space_finish_gc(semi, large->live_pages_at_last_collection); + gc_sweep_pending_ephemerons(heap->pending_ephemerons, 0, 1); + size_t live_size = semi->live_bytes_at_last_gc; + live_size += large_object_space_size_at_last_collection(large); + live_size += for_alloc; + uint64_t pause_ns = gc_platform_monotonic_nanoseconds() - start_ns; + HEAP_EVENT(heap, live_data_size, live_size); + DEBUG("gc %zu: live size %zu, heap size %zu\n", heap->count, live_size, + heap->size); + gc_heap_sizer_on_gc(heap->sizer, heap->size, live_size, pause_ns, + resize_heap); + reset_heap_limits(heap); + clear_memory(semi->hp, semi->limit - semi->hp); + + HEAP_EVENT(heap, restarting_mutators); + // fprintf(stderr, "%zd bytes copied\n", (space->size>>1)-(space->limit-space->hp)); +} + +static void collect_for_alloc(struct gc_mutator *mut, size_t bytes) { + collect(mut, bytes); + + struct semi_space *space = mutator_semi_space(mut); + if (bytes < space->limit - space->hp) + return; + + struct gc_heap *heap = mutator_heap(mut); + if (heap->options->common.heap_size_policy != GC_HEAP_SIZE_FIXED) { + // Each collection can potentially resize only the inactive + // fromspace, so if we really run out of space we will need to + // collect again in order to resize the other half. + collect(mut, bytes); + if (bytes < space->limit - space->hp) + return; + } + fprintf(stderr, "ran out of space, heap size %zu\n", heap->size); + GC_CRASH(); +} + +void gc_collect(struct gc_mutator *mut, + enum gc_collection_kind requested_kind) { + // Ignore requested kind, because we always compact. + collect(mut, 0); +} + +int gc_object_is_old_generation_slow(struct gc_mutator *mut, + struct gc_ref obj) { + return 0; +} + +void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj, + size_t obj_size, struct gc_edge edge, + struct gc_ref new_val) { +} + +int* gc_safepoint_flag_loc(struct gc_mutator *mut) { GC_CRASH(); } +void gc_safepoint_slow(struct gc_mutator *mut) { GC_CRASH(); } + +static void collect_for_large_alloc(struct gc_mutator *mut, size_t npages) { + collect_for_alloc(mut, npages * mutator_semi_space(mut)->page_size); +} + +static void* allocate_large(struct gc_mutator *mut, size_t size) { + struct gc_heap *heap = mutator_heap(mut); + struct large_object_space *space = heap_large_object_space(heap); + struct semi_space *semi_space = heap_semi_space(heap); + + size_t npages = large_object_space_npages(space, size); + while (!semi_space_steal_pages(semi_space, npages)) + collect_for_large_alloc(mut, npages); + + void *ret = large_object_space_alloc(space, npages, GC_TRACE_PRECISELY); + + if (!ret) { + perror("weird: we have the space but mmap didn't work"); + GC_CRASH(); + } + + return ret; +} + +void* gc_allocate_slow(struct gc_mutator *mut, size_t size, + enum gc_allocation_kind kind) { + if (GC_UNLIKELY(kind != GC_ALLOCATION_TAGGED + && kind != GC_ALLOCATION_TAGGED_POINTERLESS)) { + fprintf(stderr, "semispace collector cannot make allocations of kind %d\n", + (int)kind); + GC_CRASH(); + } + + if (size > gc_allocator_large_threshold()) + return allocate_large(mut, size); + + struct semi_space *space = mutator_semi_space(mut); + while (1) { + uintptr_t addr = space->hp; + uintptr_t new_hp = align_up (addr + size, GC_ALIGNMENT); + if (space->limit < new_hp) { + // The factor of 2 is for both regions. + collect_for_alloc(mut, size * 2); + continue; + } + space->hp = new_hp; + return (void *)addr; + } +} + +void gc_pin_object(struct gc_mutator *mut, struct gc_ref ref) { + GC_CRASH(); +} + +struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) { + return gc_allocate(mut, gc_ephemeron_size(), GC_ALLOCATION_TAGGED); +} + +void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron, + struct gc_ref key, struct gc_ref value) { + gc_ephemeron_init_internal(mutator_heap(mut), ephemeron, key, value); +} + +struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) { + return gc_allocate(mut, gc_finalizer_size(), GC_ALLOCATION_TAGGED); +} + +void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer, + unsigned priority, struct gc_ref object, + struct gc_ref closure) { + gc_finalizer_init_internal(finalizer, object, closure); + gc_finalizer_attach_internal(mutator_heap(mut)->finalizer_state, + finalizer, priority); + // No write barrier. +} + +struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) { + return gc_finalizer_state_pop(mutator_heap(mut)->finalizer_state); +} + +void gc_set_finalizer_callback(struct gc_heap *heap, + gc_finalizer_callback callback) { + gc_finalizer_state_set_callback(heap->finalizer_state, callback); +} + +static int region_init(struct region *region, size_t size) { + region->base = 0; + region->active_size = 0; + region->mapped_size = 0; + + if (!grow_region_if_needed(region, size)) { + fprintf(stderr, "failed to allocated %zu bytes\n", size); + return 0; + } + + region->active_size = size; + + return 1; +} + +static int semi_space_init(struct semi_space *space, struct gc_heap *heap) { + // Allocate even numbers of pages. + size_t page_size = gc_platform_page_size(); + size_t size = align_up(heap->size, page_size * 2); + + space->page_size = page_size; + space->stolen_pages = 0; + + if (!region_init(&space->from_space, size / 2)) + return 0; + if (!region_init(&space->to_space, size / 2)) + return 0; + + space->hp = space->to_space.base; + space->limit = space->hp + space->to_space.active_size; + + return 1; +} + +static int heap_prepare_pending_ephemerons(struct gc_heap *heap) { + struct gc_pending_ephemerons *cur = heap->pending_ephemerons; + size_t target = heap->size * heap->pending_ephemerons_size_factor; + double slop = heap->pending_ephemerons_size_slop; + + heap->pending_ephemerons = gc_prepare_pending_ephemerons(cur, target, slop); + + return !!heap->pending_ephemerons; +} + +unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) { + return heap->count; +} + +static uint64_t get_allocation_counter(struct gc_heap *heap) { + return heap->total_allocated_bytes_at_last_gc; +} + +uint64_t gc_allocation_counter(struct gc_heap *heap) { + return get_allocation_counter(heap); +} + +static void ignore_async_heap_size_adjustment(struct gc_heap *heap, + size_t size) { +} + +static int heap_init(struct gc_heap *heap, const struct gc_options *options) { + heap->extern_space = NULL; + heap->pending_ephemerons_size_factor = 0.01; + heap->pending_ephemerons_size_slop = 0.5; + heap->count = 0; + heap->options = options; + heap->size = options->common.heap_size; + heap->roots = NULL; + heap->finalizer_state = gc_make_finalizer_state(); + if (!heap->finalizer_state) + GC_CRASH(); + + heap->sizer = gc_make_heap_sizer(heap, &options->common, + get_allocation_counter, + ignore_async_heap_size_adjustment, + NULL); + + return heap_prepare_pending_ephemerons(heap); +} + +int gc_option_from_string(const char *str) { + return gc_common_option_from_string(str); +} +struct gc_options* gc_allocate_options(void) { + struct gc_options *ret = malloc(sizeof(struct gc_options)); + gc_init_common_options(&ret->common); + return ret; +} +int gc_options_set_int(struct gc_options *options, int option, int value) { + return gc_common_options_set_int(&options->common, option, value); +} +int gc_options_set_size(struct gc_options *options, int option, + size_t value) { + return gc_common_options_set_size(&options->common, option, value); +} +int gc_options_set_double(struct gc_options *options, int option, + double value) { + return gc_common_options_set_double(&options->common, option, value); +} +int gc_options_parse_and_set(struct gc_options *options, int option, + const char *value) { + return gc_common_options_parse_and_set(&options->common, option, value); +} + +int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base, + struct gc_heap **heap, struct gc_mutator **mut, + struct gc_event_listener event_listener, + void *event_listener_data) { + GC_ASSERT_EQ(gc_allocator_allocation_pointer_offset(), + offsetof(struct semi_space, hp)); + GC_ASSERT_EQ(gc_allocator_allocation_limit_offset(), + offsetof(struct semi_space, limit)); + + if (!options) options = gc_allocate_options(); + + if (options->common.parallelism != 1) + fprintf(stderr, "warning: parallelism unimplemented in semispace copying collector\n"); + + *mut = calloc(1, sizeof(struct gc_mutator)); + if (!*mut) GC_CRASH(); + *heap = mutator_heap(*mut); + + if (!heap_init(*heap, options)) + return 0; + + (*heap)->event_listener = event_listener; + (*heap)->event_listener_data = event_listener_data; + HEAP_EVENT(*heap, init, (*heap)->size); + + if (!semi_space_init(heap_semi_space(*heap), *heap)) + return 0; + struct gc_background_thread *thread = NULL; + if (!large_object_space_init(heap_large_object_space(*heap), *heap, thread)) + return 0; + + // Ignore stack base, as we are precise. + (*mut)->roots = NULL; + + (*mut)->event_listener_data = + event_listener.mutator_added(event_listener_data); + + return 1; +} + +void gc_mutator_set_roots(struct gc_mutator *mut, + struct gc_mutator_roots *roots) { + mut->roots = roots; +} +void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) { + heap->roots = roots; +} +void gc_heap_set_extern_space(struct gc_heap *heap, + struct gc_extern_space *space) { + heap->extern_space = space; +} + +struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *base, + struct gc_heap *heap) { + fprintf(stderr, + "Semispace copying collector not appropriate for multithreaded use.\n"); + GC_CRASH(); +} +void gc_finish_for_thread(struct gc_mutator *space) { +} + +void* gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*), + void *data) { + // Can't be threads, then there won't be collection. + return f(data); +} diff --git a/libguile/whippet/src/serial-tracer.h b/libguile/whippet/src/serial-tracer.h new file mode 100644 index 000000000..a3289e30c --- /dev/null +++ b/libguile/whippet/src/serial-tracer.h @@ -0,0 +1,88 @@ +#ifndef SERIAL_TRACER_H +#define SERIAL_TRACER_H + +#include +#include + +#include "assert.h" +#include "debug.h" +#include "simple-worklist.h" +#include "root-worklist.h" +#include "tracer.h" + +struct gc_tracer { + struct gc_heap *heap; + int trace_roots_only; + struct root_worklist roots; + struct simple_worklist worklist; +}; + +struct gc_trace_worker { + struct gc_tracer *tracer; + struct gc_trace_worker_data *data; +}; + +static inline struct gc_trace_worker_data* +gc_trace_worker_data(struct gc_trace_worker *worker) { + return worker->data; +} + +static int +gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, + size_t parallelism) { + tracer->heap = heap; + tracer->trace_roots_only = 0; + root_worklist_init(&tracer->roots); + return simple_worklist_init(&tracer->worklist); +} +static void gc_tracer_prepare(struct gc_tracer *tracer) {} +static void gc_tracer_release(struct gc_tracer *tracer) { + simple_worklist_release(&tracer->worklist); +} + +static inline void +gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) { + root_worklist_push(&tracer->roots, root); +} + +static inline void +gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) { + simple_worklist_push(&worker->tracer->worklist, ref); +} + +static inline void +tracer_trace_with_data(struct gc_tracer *tracer, struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data) { + worker->data = data; + do { + struct gc_root root = root_worklist_pop(&tracer->roots); + if (root.kind == GC_ROOT_KIND_NONE) + break; + trace_root(root, heap, worker); + } while (1); + root_worklist_reset(&tracer->roots); + if (!tracer->trace_roots_only) { + do { + struct gc_ref obj = simple_worklist_pop(&tracer->worklist); + if (gc_ref_is_null(obj)) + break; + trace_one(obj, heap, worker); + } while (1); + } +} +static inline void +gc_tracer_trace(struct gc_tracer *tracer) { + struct gc_trace_worker worker = { tracer }; + gc_trace_worker_call_with_data(tracer_trace_with_data, tracer, tracer->heap, + &worker); +} + +static inline void +gc_tracer_trace_roots(struct gc_tracer *tracer) { + tracer->trace_roots_only = 1; + gc_tracer_trace(tracer); + tracer->trace_roots_only = 0; +} + +#endif // SERIAL_TRACER_H diff --git a/libguile/whippet/src/shared-worklist.h b/libguile/whippet/src/shared-worklist.h new file mode 100644 index 000000000..979c87178 --- /dev/null +++ b/libguile/whippet/src/shared-worklist.h @@ -0,0 +1,260 @@ +#ifndef SHARED_WORKLIST_H +#define SHARED_WORKLIST_H + +#include + +#include "assert.h" +#include "debug.h" +#include "gc-align.h" +#include "gc-inline.h" +#include "gc-platform.h" +#include "spin.h" + +// The Chase-Lev work-stealing deque, as initially described in "Dynamic +// Circular Work-Stealing Deque" (Chase and Lev, SPAA'05) +// (https://www.dre.vanderbilt.edu/~schmidt/PDF/work-stealing-dequeue.pdf) +// and improved with C11 atomics in "Correct and Efficient Work-Stealing +// for Weak Memory Models" (LĂȘ et al, PPoPP'13) +// (http://www.di.ens.fr/%7Ezappa/readings/ppopp13.pdf). + +struct shared_worklist_buf { + unsigned log_size; + size_t size; + uintptr_t *data; +}; + +// Min size: 8 kB on 64-bit systems, 4 kB on 32-bit. +#define shared_worklist_buf_min_log_size ((unsigned) 10) +// Max size: 2 GB on 64-bit systems, 1 GB on 32-bit. +#define shared_worklist_buf_max_log_size ((unsigned) 28) + +static const size_t shared_worklist_release_byte_threshold = 256 * 1024; + +static int +shared_worklist_buf_init(struct shared_worklist_buf *buf, unsigned log_size) { + ASSERT(log_size >= shared_worklist_buf_min_log_size); + ASSERT(log_size <= shared_worklist_buf_max_log_size); + size_t size = (1 << log_size) * sizeof(uintptr_t); + void *mem = gc_platform_acquire_memory(size, 0); + if (!mem) { + perror("Failed to grow work-stealing dequeue"); + DEBUG("Failed to allocate %zu bytes", size); + return 0; + } + buf->log_size = log_size; + buf->size = 1 << log_size; + buf->data = mem; + return 1; +} + +static inline size_t +shared_worklist_buf_size(struct shared_worklist_buf *buf) { + return buf->size; +} + +static inline size_t +shared_worklist_buf_byte_size(struct shared_worklist_buf *buf) { + return shared_worklist_buf_size(buf) * sizeof(uintptr_t); +} + +static void +shared_worklist_buf_release(struct shared_worklist_buf *buf) { + size_t byte_size = shared_worklist_buf_byte_size(buf); + if (buf->data && byte_size >= shared_worklist_release_byte_threshold) + gc_platform_discard_memory(buf->data, byte_size); +} + +static void +shared_worklist_buf_destroy(struct shared_worklist_buf *buf) { + if (buf->data) { + gc_platform_release_memory(buf->data, shared_worklist_buf_byte_size(buf)); + buf->data = NULL; + buf->log_size = 0; + buf->size = 0; + } +} + +static inline struct gc_ref +shared_worklist_buf_get(struct shared_worklist_buf *buf, size_t i) { + return gc_ref(atomic_load_explicit(&buf->data[i & (buf->size - 1)], + memory_order_relaxed)); +} + +static inline void +shared_worklist_buf_put(struct shared_worklist_buf *buf, size_t i, + struct gc_ref ref) { + return atomic_store_explicit(&buf->data[i & (buf->size - 1)], + gc_ref_value(ref), + memory_order_relaxed); +} + +static inline int +shared_worklist_buf_grow(struct shared_worklist_buf *from, + struct shared_worklist_buf *to, size_t b, size_t t) { + if (from->log_size == shared_worklist_buf_max_log_size) + return 0; + if (!shared_worklist_buf_init (to, from->log_size + 1)) + return 0; + for (size_t i=t; ibufs[0], + shared_worklist_buf_min_log_size); + // Note, this fence isn't in the paper, I added it out of caution. + atomic_thread_fence(memory_order_release); + return ret; +} + +static void +shared_worklist_release(struct shared_worklist *q) { + for (int i = LOAD_RELAXED(&q->active); i >= 0; i--) + shared_worklist_buf_release(&q->bufs[i]); +} + +static void +shared_worklist_destroy(struct shared_worklist *q) { + for (int i = LOAD_RELAXED(&q->active); i >= 0; i--) + shared_worklist_buf_destroy(&q->bufs[i]); +} + +static int +shared_worklist_grow(struct shared_worklist *q, int cur, size_t b, size_t t) { + if (!shared_worklist_buf_grow(&q->bufs[cur], &q->bufs[cur + 1], b, t)) { + fprintf(stderr, "failed to grow deque!!\n"); + GC_CRASH(); + } + + cur++; + STORE_RELAXED(&q->active, cur); + return cur; +} + +static void +shared_worklist_push(struct shared_worklist *q, struct gc_ref x) { + size_t b = LOAD_RELAXED(&q->bottom); + size_t t = LOAD_ACQUIRE(&q->top); + int active = LOAD_RELAXED(&q->active); + + ssize_t size = b - t; + if (size > shared_worklist_buf_size(&q->bufs[active]) - 1) + active = shared_worklist_grow(q, active, b, t); /* Full queue; grow. */ + + shared_worklist_buf_put(&q->bufs[active], b, x); + atomic_thread_fence(memory_order_release); + STORE_RELAXED(&q->bottom, b + 1); +} + +static void +shared_worklist_push_many(struct shared_worklist *q, struct gc_ref *objv, + size_t count) { + size_t b = LOAD_RELAXED(&q->bottom); + size_t t = LOAD_ACQUIRE(&q->top); + int active = LOAD_RELAXED(&q->active); + + ssize_t size = b - t; + while (size > shared_worklist_buf_size(&q->bufs[active]) - count) + active = shared_worklist_grow(q, active, b, t); /* Full queue; grow. */ + + for (size_t i = 0; i < count; i++) + shared_worklist_buf_put(&q->bufs[active], b + i, objv[i]); + atomic_thread_fence(memory_order_release); + STORE_RELAXED(&q->bottom, b + count); +} + +static struct gc_ref +shared_worklist_try_pop(struct shared_worklist *q) { + size_t b = LOAD_RELAXED(&q->bottom); + int active = LOAD_RELAXED(&q->active); + STORE_RELAXED(&q->bottom, b - 1); + atomic_thread_fence(memory_order_seq_cst); + size_t t = LOAD_RELAXED(&q->top); + struct gc_ref x; + ssize_t size = b - t; + if (size > 0) { // Non-empty queue. + x = shared_worklist_buf_get(&q->bufs[active], b - 1); + if (size == 1) { // Single last element in queue. + if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, + memory_order_seq_cst, + memory_order_relaxed)) + // Failed race. + x = gc_ref_null(); + STORE_RELAXED(&q->bottom, b); + } + } else { // Empty queue. + x = gc_ref_null(); + STORE_RELAXED(&q->bottom, b); + } + return x; +} + +static struct gc_ref +shared_worklist_steal(struct shared_worklist *q) { + while (1) { + size_t t = LOAD_ACQUIRE(&q->top); + atomic_thread_fence(memory_order_seq_cst); + size_t b = LOAD_ACQUIRE(&q->bottom); + ssize_t size = b - t; + if (size <= 0) + return gc_ref_null(); + int active = LOAD_CONSUME(&q->active); + struct gc_ref ref = shared_worklist_buf_get(&q->bufs[active], t); + if (!atomic_compare_exchange_strong_explicit(&q->top, &t, t + 1, + memory_order_seq_cst, + memory_order_relaxed)) + // Failed race. + continue; + return ref; + } +} + +static ssize_t +shared_worklist_size(struct shared_worklist *q) { + size_t t = LOAD_ACQUIRE(&q->top); + atomic_thread_fence(memory_order_seq_cst); + size_t b = LOAD_ACQUIRE(&q->bottom); + ssize_t size = b - t; + return size; +} + +static int +shared_worklist_can_steal(struct shared_worklist *q) { + return shared_worklist_size(q) > 0; +} + +#undef LOAD_RELAXED +#undef STORE_RELAXED +#undef LOAD_ACQUIRE +#undef STORE_RELEASE +#undef LOAD_CONSUME + +#endif // SHARED_WORKLIST_H diff --git a/libguile/whippet/src/simple-worklist.h b/libguile/whippet/src/simple-worklist.h new file mode 100644 index 000000000..61f92a31d --- /dev/null +++ b/libguile/whippet/src/simple-worklist.h @@ -0,0 +1,121 @@ +#ifndef SIMPLE_WORKLIST_H +#define SIMPLE_WORKLIST_H + +#include "assert.h" +#include "debug.h" +#include "gc-inline.h" +#include "gc-ref.h" +#include "gc-platform.h" + +struct simple_worklist { + size_t size; + size_t read; + size_t write; + struct gc_ref *buf; +}; + +static const size_t simple_worklist_max_size = + (1ULL << (sizeof(struct gc_ref) * 8 - 1)) / sizeof(struct gc_ref); +static const size_t simple_worklist_release_byte_threshold = 1 * 1024 * 1024; + +static struct gc_ref * +simple_worklist_alloc(size_t size) { + void *mem = gc_platform_acquire_memory(size * sizeof(struct gc_ref), 0); + if (!mem) { + perror("Failed to grow trace queue"); + DEBUG("Failed to allocate %zu bytes", size); + return NULL; + } + return mem; +} + +static int +simple_worklist_init(struct simple_worklist *q) { + q->size = gc_platform_page_size() / sizeof(struct gc_ref); + q->read = 0; + q->write = 0; + q->buf = simple_worklist_alloc(q->size); + return !!q->buf; +} + +static inline struct gc_ref +simple_worklist_get(struct simple_worklist *q, size_t idx) { + return q->buf[idx & (q->size - 1)]; +} + +static inline void +simple_worklist_put(struct simple_worklist *q, size_t idx, struct gc_ref x) { + q->buf[idx & (q->size - 1)] = x; +} + +static int simple_worklist_grow(struct simple_worklist *q) GC_NEVER_INLINE; + +static int +simple_worklist_grow(struct simple_worklist *q) { + size_t old_size = q->size; + struct gc_ref *old_buf = q->buf; + if (old_size >= simple_worklist_max_size) { + DEBUG("trace queue already at max size of %zu bytes", old_size); + return 0; + } + + size_t new_size = old_size * 2; + struct gc_ref *new_buf = simple_worklist_alloc(new_size); + if (!new_buf) + return 0; + + size_t old_mask = old_size - 1; + size_t new_mask = new_size - 1; + + for (size_t i = q->read; i < q->write; i++) + new_buf[i & new_mask] = old_buf[i & old_mask]; + + munmap(old_buf, old_size * sizeof(struct gc_ref)); + + q->size = new_size; + q->buf = new_buf; + return 1; +} + +static inline void +simple_worklist_push(struct simple_worklist *q, struct gc_ref p) { + if (UNLIKELY(q->write - q->read == q->size)) { + if (!simple_worklist_grow(q)) + GC_CRASH(); + } + simple_worklist_put(q, q->write++, p); +} + +static inline void +simple_worklist_push_many(struct simple_worklist *q, struct gc_ref *pv, + size_t count) { + while (q->size - (q->write - q->read) < count) { + if (!simple_worklist_grow(q)) + GC_CRASH(); + } + for (size_t i = 0; i < count; i++) + simple_worklist_put(q, q->write++, pv[i]); +} + +static inline struct gc_ref +simple_worklist_pop(struct simple_worklist *q) { + if (UNLIKELY(q->read == q->write)) + return gc_ref_null(); + return simple_worklist_get(q, q->read++); +} + +static void +simple_worklist_release(struct simple_worklist *q) { + size_t byte_size = q->size * sizeof(struct gc_ref); + if (byte_size >= simple_worklist_release_byte_threshold) + madvise(q->buf, byte_size, MADV_DONTNEED); + q->read = q->write = 0; +} + +static void +simple_worklist_destroy(struct simple_worklist *q) { + size_t byte_size = q->size * sizeof(struct gc_ref); + munmap(q->buf, byte_size); +} + +#endif // SIMPLE_WORKLIST_H diff --git a/libguile/whippet/src/spin.h b/libguile/whippet/src/spin.h new file mode 100644 index 000000000..d650c3216 --- /dev/null +++ b/libguile/whippet/src/spin.h @@ -0,0 +1,18 @@ +#ifndef SPIN_H +#define SPIN_H + +#include +#include + +static inline void yield_for_spin(size_t spin_count) { + if (spin_count < 10) + __builtin_ia32_pause(); + else if (spin_count < 20) + sched_yield(); + else if (spin_count < 40) + usleep(0); + else + usleep(1); +} + +#endif // SPIN_H diff --git a/libguile/whippet/src/splay-tree.h b/libguile/whippet/src/splay-tree.h new file mode 100644 index 000000000..f4e41af18 --- /dev/null +++ b/libguile/whippet/src/splay-tree.h @@ -0,0 +1,258 @@ +// A splay tree, originally derived from Octane's `splay.js', whose +// copyright is as follows: +// +// Copyright 2009 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// The splay tree has been modified to allow nodes to store spans of +// keys, for example so that we can look up an object given any address +// pointing into that object. + +#ifndef SPLAY_TREE_PREFIX +#error define SPLAY_TREE_PREFIX before including splay-tree.h +#endif + +#include +#include +#include + +#include "gc-assert.h" + +#define SPLAY___(p, n) p ## n +#define SPLAY__(p, n) SPLAY___(p, n) +#define SPLAY_(n) SPLAY__(SPLAY_TREE_PREFIX, n) + +// Data types used by the splay tree. +#define SPLAY_KEY_SPAN SPLAY_(key_span) +#define SPLAY_KEY SPLAY_(key) +#define SPLAY_VALUE SPLAY_(value) + +// Functions used by the splay tree. +// key_span, key -> -1|0|1 +#define SPLAY_COMPARE SPLAY_(compare) +// key_span -> key +#define SPLAY_SPAN_START SPLAY_(span_start) + +// Data types defined by the splay tree. +#define SPLAY_TREE SPLAY_(tree) +#define SPLAY_NODE SPLAY_(node) + +// Functions defined by the splay tree. +#define SPLAY_NODE_NEW SPLAY_(node_new) +#define SPLAY_INIT SPLAY_(tree_init) +#define SPLAY_SPLAY SPLAY_(tree_splay) +#define SPLAY_PREVIOUS SPLAY_(tree_previous) +#define SPLAY_LOOKUP SPLAY_(tree_lookup) +#define SPLAY_CONTAINS SPLAY_(tree_contains) +#define SPLAY_INSERT SPLAY_(tree_insert) +#define SPLAY_REMOVE SPLAY_(tree_remove) + +struct SPLAY_NODE { + SPLAY_KEY_SPAN key; + SPLAY_VALUE value; + struct SPLAY_NODE *left; + struct SPLAY_NODE *right; +}; + +struct SPLAY_TREE { + struct SPLAY_NODE *root; +}; + +static inline struct SPLAY_NODE* +SPLAY_NODE_NEW(SPLAY_KEY_SPAN key, SPLAY_VALUE value) { + struct SPLAY_NODE *ret = malloc(sizeof(*ret)); + if (!ret) GC_CRASH(); + ret->key = key; + ret->value = value; + ret->left = ret->right = NULL; + return ret; +} + +static inline void +SPLAY_INIT(struct SPLAY_TREE *tree) { + tree->root = NULL; +} + +static struct SPLAY_NODE* +SPLAY_SPLAY(struct SPLAY_TREE *tree, SPLAY_KEY key) { + struct SPLAY_NODE *current = tree->root; + if (!current) + return NULL; + // The use of the dummy node is a bit counter-intuitive: The right + // child of the dummy node will hold the L tree of the algorithm. The + // left child of the dummy node will hold the R tree of the algorithm. + // Using a dummy node, left and right will always be nodes and we + // avoid special cases. + struct SPLAY_NODE dummy; + memset(&dummy, 0, sizeof(dummy)); + struct SPLAY_NODE *left = &dummy; + struct SPLAY_NODE *right = &dummy; + +loop: + switch (SPLAY_COMPARE(key, current->key)) { + case -1: + if (!current->left) + break; + if (SPLAY_COMPARE(key, current->left->key) < 0LL) { + // Rotate right. + struct SPLAY_NODE *tmp = current->left; + current->left = tmp->right; + tmp->right = current; + current = tmp; + if (!current->left) + break; + } + // Link right. + right->left = current; + right = current; + current = current->left; + goto loop; + + case 0: + break; + + case 1: + if (!current->right) + break; + if (SPLAY_COMPARE(key, current->right->key) > 0LL) { + // Rotate left. + struct SPLAY_NODE *tmp = current->right; + current->right = tmp->left; + tmp->left = current; + current = tmp; + if (!current->right) + break; + } + // Link left. + left->right = current; + left = current; + current = current->right; + goto loop; + + default: + GC_CRASH(); + } + + left->right = current->left; + right->left = current->right; + current->left = dummy.right; + current->right = dummy.left; + tree->root = current; + return current; +} + +static inline struct SPLAY_NODE* +SPLAY_PREVIOUS(struct SPLAY_NODE *node) { + node = node->left; + if (!node) return NULL; + while (node->right) + node = node->right; + return node; +} + +static inline struct SPLAY_NODE* +SPLAY_LOOKUP(struct SPLAY_TREE *tree, SPLAY_KEY key) { + struct SPLAY_NODE *node = SPLAY_SPLAY(tree, key); + if (node && SPLAY_COMPARE(key, node->key) == 0) + return node; + return NULL; +} + +static inline int +SPLAY_CONTAINS(struct SPLAY_TREE *tree, SPLAY_KEY key) { + return !!SPLAY_LOOKUP(tree, key); +} + +static inline struct SPLAY_NODE* +SPLAY_INSERT(struct SPLAY_TREE* tree, SPLAY_KEY_SPAN key, SPLAY_VALUE value) { + if (!tree->root) { + tree->root = SPLAY_NODE_NEW(key, value); + return tree->root; + } + SPLAY_KEY scalar = SPLAY_SPAN_START(key); + struct SPLAY_NODE *node = SPLAY_SPLAY(tree, scalar); + switch (SPLAY_COMPARE(scalar, node->key)) { + case -1: + node = SPLAY_NODE_NEW(key, value); + node->right = tree->root; + node->left = tree->root->left; + tree->root->left = NULL; + tree->root = node; + break; + case 0: + GC_ASSERT(memcmp(&key, &node->key, sizeof(SPLAY_KEY_SPAN)) == 0); + node->value = value; + break; + case 1: + node = SPLAY_NODE_NEW(key, value); + node->left = tree->root; + node->right = tree->root->right; + tree->root->right = NULL; + tree->root = node; + break; + default: + GC_CRASH(); + } + return node; +} + +static inline SPLAY_VALUE +SPLAY_REMOVE(struct SPLAY_TREE *tree, SPLAY_KEY key) { + GC_ASSERT(tree->root); + struct SPLAY_NODE *removed = SPLAY_SPLAY(tree, key); + GC_ASSERT(removed); + SPLAY_VALUE value = removed->value; + if (!removed->left) { + tree->root = removed->right; + } else { + struct SPLAY_NODE *right = removed->right; + tree->root = removed->left; + // Splay to make sure that the new root has an empty right child. + SPLAY_SPLAY(tree, key); + tree->root->right = right; + } + free(removed); + return value; +} + +#undef SPLAY_TREE_PREFIX +#undef SPLAY_KEY_SPAN +#undef SPLAY_KEY +#undef SPLAY_VALUE +#undef SPLAY_COMPARE +#undef SPLAY_SPAN_START +#undef SPLAY_SPANS_EQUAL +#undef SPLAY_TREE +#undef SPLAY_NODE +#undef SPLAY_NODE_NEW +#undef SPLAY_INIT +#undef SPLAY_SPLAY +#undef SPLAY_PREVIOUS +#undef SPLAY_LOOKUP +#undef SPLAY_CONTAINS +#undef SPLAY_INSERT +#undef SPLAY_REMOVE diff --git a/libguile/whippet/src/swar.h b/libguile/whippet/src/swar.h new file mode 100644 index 000000000..d8598c8b5 --- /dev/null +++ b/libguile/whippet/src/swar.h @@ -0,0 +1,154 @@ +#ifndef SWAR_H +#define SWAR_H + +#include + +static inline size_t +count_zero_bytes(uint64_t bytes) { + return bytes ? (__builtin_ctzll(bytes) / 8) : sizeof(bytes); +} + +static uint64_t +broadcast_byte(uint8_t byte) { + uint64_t result = byte; + return result * 0x0101010101010101ULL; +} + +static inline uint64_t +load_eight_aligned_bytes(uint8_t *ptr) { + GC_ASSERT(((uintptr_t)ptr & 7) == 0); + uint8_t * __attribute__((aligned(8))) aligned_ptr = ptr; + uint64_t word; + memcpy(&word, aligned_ptr, 8); +#ifdef WORDS_BIGENDIAN + word = __builtin_bswap64(word); +#endif + return word; +} + +static inline uint64_t +match_bytes_against_bits(uint64_t bytes, uint8_t mask) { + return bytes & broadcast_byte(mask); +} + +static inline size_t +scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t mask) { + size_t n = 0; + size_t unaligned = ((uintptr_t) ptr) & 7; + if (unaligned) { + uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8); + uint64_t match = match_bytes_against_bits(bytes, mask); + if (match) + return count_zero_bytes(match); + n += 8 - unaligned; + } + + for(; n < limit; n += 8) { + uint64_t bytes = load_eight_aligned_bytes(ptr + n); + uint64_t match = match_bytes_against_bits(bytes, mask); + if (match) + return n + count_zero_bytes(match); + } + + return limit; +} + +static inline uint64_t +match_bytes_against_tag(uint64_t bytes, uint8_t mask, uint8_t tag) { + // Precondition: tag within mask. + GC_ASSERT_EQ(tag & mask, tag); + // Precondition: high bit of mask byte is empty, so that we can add without + // overflow. + GC_ASSERT_EQ(mask & 0x7f, mask); + // Precondition: mask is low bits of byte. + GC_ASSERT(mask); + GC_ASSERT_EQ(mask & (mask + 1), 0); + + uint64_t vmask = broadcast_byte(mask); + uint64_t vtest = broadcast_byte(mask + 1); + uint64_t vtag = broadcast_byte(tag); + + bytes &= vmask; + uint64_t m = (bytes ^ vtag) + vmask; + return (m & vtest) ^ vtest; +} + +static inline size_t +scan_for_byte_with_tag(uint8_t *ptr, size_t limit, uint8_t mask, uint8_t tag) { + // The way we handle unaligned reads by padding high bytes with zeroes assumes + // that all-zeroes is not a matching byte. + GC_ASSERT(tag); + + size_t n = 0; + size_t unaligned = ((uintptr_t) ptr) & 7; + if (unaligned) { + uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8); + uint64_t match = match_bytes_against_tag(bytes, mask, tag); + if (match) + return count_zero_bytes(match); + n += 8 - unaligned; + } + + for(; n < limit; n += 8) { + uint64_t bytes = load_eight_aligned_bytes(ptr + n); + uint64_t match = match_bytes_against_tag(bytes, mask, tag); + if (match) + return n + count_zero_bytes(match); + } + + return limit; +} + +static inline uint64_t +match_bytes_against_2_tags(uint64_t bytes, uint8_t mask, uint8_t tag1, + uint8_t tag2) +{ + // Precondition: tags are covered by within mask. + GC_ASSERT_EQ(tag1 & mask, tag1); + GC_ASSERT_EQ(tag2 & mask, tag2); + // Precondition: high bit of mask byte is empty, so that we can add without + // overflow. + GC_ASSERT_EQ(mask & 0x7f, mask); + // Precondition: mask is low bits of byte. + GC_ASSERT(mask); + GC_ASSERT_EQ(mask & (mask + 1), 0); + + uint64_t vmask = broadcast_byte(mask); + uint64_t vtest = broadcast_byte(mask + 1); + uint64_t vtag1 = broadcast_byte(tag1); + uint64_t vtag2 = broadcast_byte(tag2); + + bytes &= vmask; + uint64_t m1 = (bytes ^ vtag1) + vmask; + uint64_t m2 = (bytes ^ vtag2) + vmask; + return ((m1 & m2) & vtest) ^ vtest; +} + +static inline size_t +scan_for_byte_with_tags(uint8_t *ptr, size_t limit, uint8_t mask, + uint8_t tag1, uint8_t tag2) { + // The way we handle unaligned reads by padding high bytes with zeroes assumes + // that all-zeroes is not a matching byte. + GC_ASSERT(tag1 && tag2); + + size_t n = 0; + size_t unaligned = ((uintptr_t) ptr) & 7; + if (unaligned) { + uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8); + uint64_t match = match_bytes_against_2_tags(bytes, mask, tag1, tag2); + if (match) + return count_zero_bytes(match); + n += 8 - unaligned; + } + + for(; n < limit; n += 8) { + uint64_t bytes = load_eight_aligned_bytes(ptr + n); + uint64_t match = match_bytes_against_2_tags(bytes, mask, tag1, tag2); + if (match) + return n + count_zero_bytes(match); + } + + return limit; +} + +#endif // SWAR_H diff --git a/libguile/whippet/src/tracer.h b/libguile/whippet/src/tracer.h new file mode 100644 index 000000000..c563a7018 --- /dev/null +++ b/libguile/whippet/src/tracer.h @@ -0,0 +1,65 @@ +#ifndef TRACER_H +#define TRACER_H + +#include "gc-ref.h" +#include "gc-edge.h" +#include "root.h" + +struct gc_heap; + +// Data types to be implemented by tracer. +struct gc_tracer; +struct gc_trace_worker; +// Data types to be implemented by collector. +struct gc_trace_worker_data; + +//////////////////////////////////////////////////////////////////////// +/// To be implemented by collector. +//////////////////////////////////////////////////////////////////////// + +// Visit all fields in an object. +static inline void trace_one(struct gc_ref ref, struct gc_heap *heap, + struct gc_trace_worker *worker) GC_ALWAYS_INLINE; +static inline void trace_root(struct gc_root root, struct gc_heap *heap, + struct gc_trace_worker *worker) GC_ALWAYS_INLINE; + +static void +gc_trace_worker_call_with_data(void (*f)(struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker, + struct gc_trace_worker_data *data), + struct gc_tracer *tracer, + struct gc_heap *heap, + struct gc_trace_worker *worker); + +//////////////////////////////////////////////////////////////////////// +/// To be implemented by tracer. +//////////////////////////////////////////////////////////////////////// + +// Initialize the tracer when the heap is created. +static int gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap, + size_t parallelism); + +// Initialize the tracer for a new GC cycle. +static void gc_tracer_prepare(struct gc_tracer *tracer); + +// Release any resources allocated during the trace. +static void gc_tracer_release(struct gc_tracer *tracer); + +// Add root objects to the trace. Call before tracer_trace. +static inline void gc_tracer_add_root(struct gc_tracer *tracer, + struct gc_root root); + +// Given that an object has been shaded grey, enqueue for tracing. +static inline void gc_trace_worker_enqueue(struct gc_trace_worker *worker, + struct gc_ref ref) GC_ALWAYS_INLINE; +static inline struct gc_trace_worker_data* +gc_trace_worker_data(struct gc_trace_worker *worker) GC_ALWAYS_INLINE; + +// Just trace roots. +static inline void gc_tracer_trace_roots(struct gc_tracer *tracer); + +// Run the full trace, including roots. +static inline void gc_tracer_trace(struct gc_tracer *tracer); + +#endif // TRACER_H diff --git a/libguile/whippet/test/test-address-map.c b/libguile/whippet/test/test-address-map.c new file mode 100644 index 000000000..abe11c4b0 --- /dev/null +++ b/libguile/whippet/test/test-address-map.c @@ -0,0 +1,109 @@ +#include + +#include "address-map.h" + +#define COUNT (1000 * 1000) + +static void add_to_other(uintptr_t addr, uintptr_t val, void *data) { + struct address_map *other = data; + if (addr >= COUNT) + fprintf(stdout, "unexpected address: %zu\n", addr); + if (address_map_contains(other, addr)) + fprintf(stdout, "missing: %zu\n", addr); + address_map_add(other, addr, val); +} + +int main(int argc, char *arv[]) { + struct address_map set; + address_map_init(&set); + for (size_t i = 0; i < COUNT; i++) + address_map_add(&set, i, -i); + fprintf(stdout, "after initial add, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + for (size_t i = 0; i < COUNT; i++) { + if (!address_map_contains(&set, i)) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + if (address_map_lookup(&set, i, -1) != -i) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + for (size_t i = COUNT; i < COUNT * 2; i++) { + if (address_map_contains(&set, i)) { + fprintf(stdout, "unexpectedly present: %zu\n", i); + return 1; + } + } + address_map_clear(&set); + fprintf(stdout, "after clear, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + for (size_t i = 0; i < COUNT; i++) + address_map_add(&set, i, 0); + // Now update. + fprintf(stdout, "after re-add, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + for (size_t i = 0; i < COUNT; i++) + address_map_add(&set, i, i + 1); + fprintf(stdout, "after idempotent re-add, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + for (size_t i = 0; i < COUNT; i++) { + if (!address_map_contains(&set, i)) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + if (address_map_lookup(&set, i, -1) != i + 1) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + for (size_t i = 0; i < COUNT; i++) + address_map_remove(&set, i); + fprintf(stdout, "after one-by-one removal, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + for (size_t i = COUNT; i < 2 * COUNT; i++) { + if (address_map_contains(&set, i)) { + fprintf(stdout, "unexpectedly present: %zu\n", i); + return 1; + } + } + for (size_t i = 0; i < COUNT; i++) + address_map_add(&set, i, i + 2); + struct address_map set2; + address_map_init(&set2); + address_map_for_each(&set, add_to_other, &set2); + fprintf(stdout, "after for-each set, %zu/%zu\n", set2.hash_map.n_items, + set2.hash_map.size); + for (size_t i = 0; i < COUNT; i++) { + if (address_map_lookup(&set2, i, -1) != i + 2) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + address_map_destroy(&set2); + + size_t burnin = 1000 * 1000 * 1000 / COUNT; + fprintf(stdout, "beginning clear then add %zu items, %zu times\n", + (size_t)COUNT, burnin); + for (size_t j = 0; j < burnin; j++) { + address_map_clear(&set); + for (size_t i = 0; i < COUNT; i++) + address_map_add(&set, i, i + 3); + } + fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + fprintf(stdout, "beginning lookup %zu items, %zu times\n", + (size_t)COUNT, burnin); + for (size_t j = 0; j < burnin; j++) { + for (size_t i = 0; i < COUNT; i++) { + if (address_map_lookup(&set, i, -1) != i + 3) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + } + fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_map.n_items, + set.hash_map.size); + address_map_destroy(&set); +} diff --git a/libguile/whippet/test/test-address-set.c b/libguile/whippet/test/test-address-set.c new file mode 100644 index 000000000..ecd14b674 --- /dev/null +++ b/libguile/whippet/test/test-address-set.c @@ -0,0 +1,98 @@ +#include + +#include "address-set.h" + +#define COUNT (1000 * 1000) + +static void remove_from_other(uintptr_t addr, void *data) { + struct address_set *other = data; + if (addr >= COUNT) + fprintf(stdout, "unexpected address: %zu\n", addr); + if (!address_set_contains(other, addr)) + fprintf(stdout, "missing: %zu\n", addr); + address_set_remove(other, addr); +} + +int main(int argc, char *arv[]) { + struct address_set set; + address_set_init(&set); + for (size_t i = 0; i < COUNT; i++) + address_set_add(&set, i); + fprintf(stdout, "after initial add, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + for (size_t i = 0; i < COUNT; i++) { + if (!address_set_contains(&set, i)) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + for (size_t i = COUNT; i < COUNT * 2; i++) { + if (address_set_contains(&set, i)) { + fprintf(stdout, "unexpectedly present: %zu\n", i); + return 1; + } + } + address_set_clear(&set); + fprintf(stdout, "after clear, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + for (size_t i = 0; i < COUNT; i++) + address_set_add(&set, i); + // Do it twice. + fprintf(stdout, "after re-add, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + for (size_t i = 0; i < COUNT; i++) + address_set_add(&set, i); + fprintf(stdout, "after idempotent re-add, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + for (size_t i = 0; i < COUNT; i++) { + if (!address_set_contains(&set, i)) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + for (size_t i = 0; i < COUNT; i++) + address_set_remove(&set, i); + fprintf(stdout, "after one-by-one removal, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + for (size_t i = COUNT; i < 2 * COUNT; i++) { + if (address_set_contains(&set, i)) { + fprintf(stdout, "unexpectedly present: %zu\n", i); + return 1; + } + } + for (size_t i = 0; i < COUNT; i++) + address_set_add(&set, i); + struct address_set set2; + address_set_init(&set2); + address_set_union(&set2, &set); + fprintf(stdout, "populated set2, %zu/%zu\n", set2.hash_set.n_items, + set2.hash_set.size); + address_set_for_each(&set, remove_from_other, &set2); + fprintf(stdout, "after for-each removal, %zu/%zu\n", set2.hash_set.n_items, + set2.hash_set.size); + address_set_destroy(&set2); + + size_t burnin = 1000 * 1000 * 1000 / COUNT; + fprintf(stdout, "beginning clear then add %zu items, %zu times\n", + (size_t)COUNT, burnin); + for (size_t j = 0; j < burnin; j++) { + address_set_clear(&set); + for (size_t i = 0; i < COUNT; i++) + address_set_add(&set, i); + } + fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + fprintf(stdout, "beginning lookup %zu items, %zu times\n", + (size_t)COUNT, burnin); + for (size_t j = 0; j < burnin; j++) { + for (size_t i = 0; i < COUNT; i++) { + if (!address_set_contains(&set, i)) { + fprintf(stdout, "missing: %zu\n", i); + return 1; + } + } + } + fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_set.n_items, + set.hash_set.size); + address_set_destroy(&set); +} diff --git a/libguile/whippet/test/test-splay-tree.c b/libguile/whippet/test/test-splay-tree.c new file mode 100644 index 000000000..7f6e916c6 --- /dev/null +++ b/libguile/whippet/test/test-splay-tree.c @@ -0,0 +1,116 @@ +#include +#include +#include +#include +#include + +struct object { + uintptr_t addr; + size_t size; +}; + +struct data { + size_t idx; +}; + +#define SPLAY_TREE_PREFIX object_ +typedef struct object object_key_span; +typedef uintptr_t object_key; +typedef struct data object_value; +static inline int +object_compare(uintptr_t addr, struct object obj) { + if (addr < obj.addr) return -1; + if (addr - obj.addr < obj.size) return 0; + return 1; +} +static inline uintptr_t +object_span_start(struct object obj) { + return obj.addr; +} +#include "splay-tree.h" + +// A power-law distribution. Each integer was selected by starting at +// 0, taking a random number in [0,1), and then accepting the integer if +// the random number was less than 0.15, or trying again with the next +// integer otherwise. Useful for modelling allocation sizes or number +// of garbage objects to allocate between live allocations. +static const uint8_t power_law_distribution[256] = { + 1, 15, 3, 12, 2, 8, 4, 0, 18, 7, 9, 8, 15, 2, 36, 5, + 1, 9, 6, 11, 9, 19, 2, 0, 0, 3, 9, 6, 3, 2, 1, 1, + 6, 1, 8, 4, 2, 0, 5, 3, 7, 0, 0, 3, 0, 4, 1, 7, + 1, 8, 2, 2, 2, 14, 0, 7, 8, 0, 2, 1, 4, 12, 7, 5, + 0, 3, 4, 13, 10, 2, 3, 7, 0, 8, 0, 23, 0, 16, 1, 1, + 6, 28, 1, 18, 0, 3, 6, 5, 8, 6, 14, 5, 2, 5, 0, 11, + 0, 18, 4, 16, 1, 4, 3, 13, 3, 23, 7, 4, 10, 5, 3, 13, + 0, 14, 5, 5, 2, 5, 0, 16, 2, 0, 1, 1, 0, 0, 4, 2, + 7, 7, 0, 5, 7, 2, 1, 24, 27, 3, 7, 1, 0, 8, 1, 4, + 0, 3, 0, 7, 7, 3, 9, 2, 9, 2, 5, 10, 1, 1, 12, 6, + 2, 9, 5, 0, 4, 6, 0, 7, 2, 1, 5, 4, 1, 0, 1, 15, + 4, 0, 15, 4, 0, 0, 32, 18, 2, 2, 1, 7, 8, 3, 11, 1, + 2, 7, 11, 1, 9, 1, 2, 6, 11, 17, 1, 2, 5, 1, 14, 3, + 6, 1, 1, 15, 3, 1, 0, 6, 10, 8, 1, 3, 2, 7, 0, 1, + 0, 11, 3, 3, 5, 8, 2, 0, 0, 7, 12, 2, 5, 20, 3, 7, + 4, 4, 5, 22, 1, 5, 2, 7, 15, 2, 4, 6, 11, 8, 12, 1 +}; + +static size_t power_law(size_t *counter) { + return power_law_distribution[(*counter)++ & 0xff]; +} + +static uintptr_t allocate(size_t size) { + void *ret = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ret == MAP_FAILED) { + perror("mmap failed"); + exit(1); + } + return (uintptr_t)ret; +} + +static const size_t GB = 1024 * 1024 * 1024; + +// Page size is at least 4 kB, so we will have at most 256 * 1024 allocations. +static uintptr_t all_objects[256 * 1024 + 1]; +static size_t object_count; + +#define ASSERT(x) do { if (!(x)) abort(); } while (0) + +int main(int argc, char *arv[]) { + struct object_tree tree; + + object_tree_init(&tree); + + size_t counter = 0; + size_t page_size = getpagesize(); + + // Use mmap as a source of nonoverlapping spans. Allocate 1 GB of address space. + size_t allocated = 0; + while (allocated < 1 * GB) { + size_t size = power_law(&counter) * page_size; + if (!size) + continue; + uintptr_t addr = allocate(size); + object_tree_insert(&tree, + (struct object){addr, size}, + (struct data){object_count}); + all_objects[object_count++] = addr; + ASSERT(object_count < sizeof(all_objects) / sizeof(all_objects[0])); + allocated += size; + } + + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_contains(&tree, all_objects[i])); + + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_lookup(&tree, all_objects[i])->value.idx == i); + + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_lookup(&tree, all_objects[i] + 42)->value.idx == i); + + for (size_t i = 0; i < object_count; i++) + object_tree_remove(&tree, all_objects[i]); + + for (size_t i = 0; i < object_count; i++) + ASSERT(!object_tree_contains(&tree, all_objects[i])); + for (size_t i = 0; i < object_count; i++) + ASSERT(object_tree_lookup(&tree, all_objects[i]) == NULL); +} diff --git a/libguile/whippet/whippet.m4 b/libguile/whippet/whippet.m4 new file mode 100644 index 000000000..9cd5c3449 --- /dev/null +++ b/libguile/whippet/whippet.m4 @@ -0,0 +1,181 @@ +AC_DEFUN([WHIPPET_ENABLE_LTO], + [AC_REQUIRE([AC_PROG_CC]) + AC_MSG_CHECKING([whether the compiler supports -flto]) + old_CFLAGS="$CFLAGS" + LTO_CFLAGS="-flto" + CFLAGS="$CFLAGS $LTO_CFLAGS" + AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo;], [])],, [LTO_CFLAGS=]) + CFLAGS="$old_CFLAGS" + if test -n "$LTO_CFLAGS"; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + + AC_ARG_ENABLE(lto, + [AS_HELP_STRING([--enable-lto] + [enable link-time optimization])], + [], + [if test -z "$LTO_CFLAGS"; then enable_lto=no; else enable_lto=yes; fi]) + case "$enable_lto" in + yes | y) + if test -z "$LTO_CFLAGS"; then + AC_MSG_ERROR([--enable-lto=$enable_lto unsupported for $CC]) + fi + CFLAGS="$CFLAGS $LTO_CFLAGS" + AC_MSG_CHECKING([for lto-specific prefix for ar, nm, objcopy, ranlib]) + if test "$GCC" = yes; then + TOOLCHAIN_PREFIX=gcc + else + # Assuming LLVM if not GCC. Probably won't hurt. + TOOLCHAIN_PREFIX=llvm + fi + AC_MSG_RESULT([$TOOLCHAIN_PREFIX]) + AC_CHECK_TOOLS([AR], [$TOOLCHAIN_PREFIX-ar ar]) + AC_CHECK_TOOLS([NM], [$TOOLCHAIN_PREFIX-nm nm]) + AC_CHECK_TOOLS([OBJCOPY], [$TOOLCHAIN_PREFIX-objcopy objcopy]) + AC_CHECK_TOOLS([RANLIB], [$TOOLCHAIN_PREFIX-ranlib ranlib]) + ;; + no | n) + ;; + *) + AC_MSG_ERROR([unexpected --enable-lto=$enable_lto]) + ;; + esac]) + +AC_DEFUN([WHIPPET_PKG_PLATFORM], + [# Detect the target system + AC_MSG_CHECKING([which platform support library the garbage collector should use]) + case "$host_os" in + *linux-gnu*) + AC_MSG_RESULT(gnu-linux) + whippet_platform=gnu-linux + ;; + *) + AC_MSG_ERROR([unsupported host OS: $host_os]) + ;; + esac + AM_CONDITIONAL(WHIPPET_PLATFORM_GNU_LINUX, [test "$whippet_platform" = gnu-linux])]) + +AC_DEFUN([WHIPPET_PKG_TRACING], + [WHIPPET_TRACING_DEFAULT="m4_default([$1], [auto])" + AC_ARG_WITH(gc-lttng, + AS_HELP_STRING([--with-gc-lttng], + [Compile GC library with LTTng tracing support (default: $WHIPPET_TRACING_DEFAULT)]), + [whippet_with_lttng=$withval], + [whippet_with_lttng=auto]) + PKG_CHECK_MODULES(WHIPPET_LTTNG, lttng-ust, + [whippet_have_lttng=yes], [whippet_have_lttng=no]) + AC_MSG_CHECKING(whether to compile GC library with LTTng tracing support) + if test "$whippet_with_lttng" = auto; then + if test "$whippet_have_lttng" = no; then + whippet_use_lttng=no + else + whippet_use_lttng=yes + fi + else + whippet_use_lttng=$whippet_with_lttng + fi + AC_MSG_RESULT($whippet_use_lttng) + + if test "$whippet_use_lttng" != no && test "$whippet_have_lttng" = no; then + AC_MSG_ERROR([LTTng support explicitly required, but lttng not found]) + fi + AM_CONDITIONAL(WHIPPET_USE_LTTNG, [test "$whippet_use_lttng" != no]) + AC_SUBST(WHIPPET_LTTNG_CFLAGS) + AC_SUBST(WHIPPET_LTTNG_LIBS)]) + +AC_DEFUN([WHIPPET_PKG_COLLECTOR], + [PKG_CHECK_MODULES(WHIPPET_BDW, bdw-gc, + [whippet_have_bdw=yes], [whippet_have_bdw=no]) + AC_SUBST(WHIPPET_BDW_CFLAGS) + AC_SUBST(WHIPPET_BDW_LIBS) + + WHIPPET_COLLECTOR_DEFAULT="m4_default([$1], [pcc])" + AC_ARG_WITH(gc, + AS_HELP_STRING([--with-gc], + [Select garbage collector implementation (see --with-gc=help)]), + [whippet_collector=$withval], + [whippet_collector=$WHIPPET_COLLECTOR_DEFAULT]) + + WHIPPET_ALL_COLLECTORS=$(echo <