mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-29 19:30:36 +02:00
Merged Whippet into libguile/whippet
This commit is contained in:
commit
db181e67ff
112 changed files with 18115 additions and 0 deletions
16
libguile/whippet/.gitignore
vendored
Normal file
16
libguile/whippet/.gitignore
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
/*.o
|
||||
/*.bdw
|
||||
/*.semi
|
||||
/*.mmc
|
||||
/*.generational-mmc
|
||||
/*.parallel-mmc
|
||||
/*.parallel-generational-mmc
|
||||
/*.stack-conservative-mmc
|
||||
/*.stack-conservative-generational-mmc
|
||||
/*.stack-conservative-parallel-mmc
|
||||
/*.stack-conservative-parallel-generational-mmc
|
||||
/*.heap-conservative-mmc
|
||||
/*.heap-conservative-generational-mmc
|
||||
/*.heap-conservative-parallel-mmc
|
||||
/*.heap-conservative-parallel-generational-mmc
|
||||
/.deps/
|
141
libguile/whippet/Makefile
Normal file
141
libguile/whippet/Makefile
Normal file
|
@ -0,0 +1,141 @@
|
|||
TESTS = quads mt-gcbench ephemerons finalizers
|
||||
COLLECTORS = \
|
||||
bdw \
|
||||
semi \
|
||||
\
|
||||
pcc \
|
||||
generational-pcc \
|
||||
\
|
||||
mmc \
|
||||
stack-conservative-mmc \
|
||||
heap-conservative-mmc \
|
||||
\
|
||||
parallel-mmc \
|
||||
stack-conservative-parallel-mmc \
|
||||
heap-conservative-parallel-mmc \
|
||||
\
|
||||
generational-mmc \
|
||||
stack-conservative-generational-mmc \
|
||||
heap-conservative-generational-mmc \
|
||||
\
|
||||
parallel-generational-mmc \
|
||||
stack-conservative-parallel-generational-mmc \
|
||||
heap-conservative-parallel-generational-mmc
|
||||
|
||||
DEFAULT_BUILD := opt
|
||||
|
||||
BUILD_CFLAGS_opt = -O2 -g -DNDEBUG
|
||||
BUILD_CFLAGS_optdebug = -Og -g -DGC_DEBUG=1
|
||||
BUILD_CFLAGS_debug = -O0 -g -DGC_DEBUG=1
|
||||
|
||||
BUILD_CFLAGS = $(BUILD_CFLAGS_$(or $(BUILD),$(DEFAULT_BUILD)))
|
||||
|
||||
USE_LTTNG_0 :=
|
||||
USE_LTTNG_1 := 1
|
||||
USE_LTTNG := $(shell pkg-config --exists lttng-ust && echo 1 || echo 0)
|
||||
LTTNG_CPPFLAGS := $(if $(USE_LTTNG_$(USE_LTTNG)), $(shell pkg-config --cflags lttng-ust),)
|
||||
LTTNG_LIBS := $(if $(USE_LTTNG_$(USE_LTTNG)), $(shell pkg-config --libs lttng-ust),)
|
||||
TRACEPOINT_CPPFLAGS = $(if $(USE_LTTNG_$(USE_LTTNG)),$(LTTNG_CPPFLAGS) -DGC_TRACEPOINT_LTTNG=1,)
|
||||
TRACEPOINT_LIBS = $(LTTNG_LIBS)
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(BUILD_CFLAGS)
|
||||
CPPFLAGS = -Iapi $(TRACEPOINT_CPPFLAGS)
|
||||
LDFLAGS = -lpthread -flto=auto $(TRACEPOINT_LIBS)
|
||||
DEPFLAGS = -MMD -MP -MF $(@:obj/%.o=.deps/%.d)
|
||||
COMPILE = $(CC) $(CFLAGS) $(CPPFLAGS) $(DEPFLAGS) -o $@
|
||||
LINK = $(CC) $(LDFLAGS) -o $@
|
||||
PLATFORM = gnu-linux
|
||||
|
||||
ALL_TESTS = $(foreach COLLECTOR,$(COLLECTORS),$(addsuffix .$(COLLECTOR),$(TESTS)))
|
||||
|
||||
all: $(ALL_TESTS:%=bin/%)
|
||||
.deps obj bin: ; mkdir -p $@
|
||||
|
||||
include $(wildcard .deps/*)
|
||||
|
||||
obj/gc-platform.o: src/gc-platform-$(PLATFORM).c | .deps obj
|
||||
$(COMPILE) -c $<
|
||||
obj/gc-stack.o: src/gc-stack.c | .deps obj
|
||||
$(COMPILE) -c $<
|
||||
obj/gc-options.o: src/gc-options.c | .deps obj
|
||||
$(COMPILE) -c $<
|
||||
obj/gc-tracepoint.o: src/gc-tracepoint.c | .deps obj
|
||||
$(COMPILE) -c $<
|
||||
obj/%.gc-ephemeron.o: src/gc-ephemeron.c | .deps obj
|
||||
$(COMPILE) -include benchmarks/$*-embedder.h -c $<
|
||||
obj/%.gc-finalizer.o: src/gc-finalizer.c | .deps obj
|
||||
$(COMPILE) -include benchmarks/$*-embedder.h -c $<
|
||||
|
||||
GC_STEM_bdw = bdw
|
||||
GC_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
|
||||
GC_IMPL_CFLAGS_bdw = `pkg-config --cflags bdw-gc`
|
||||
GC_LIBS_bdw = `pkg-config --libs bdw-gc`
|
||||
|
||||
GC_STEM_semi = semi
|
||||
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
|
||||
GC_LIBS_semi = -lm
|
||||
|
||||
GC_STEM_pcc = pcc
|
||||
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
|
||||
GC_LIBS_pcc = -lm
|
||||
|
||||
GC_STEM_generational_pcc = $(GC_STEM_pcc)
|
||||
GC_CFLAGS_generational_pcc = $(GC_CFLAGS_pcc) -DGC_GENERATIONAL=1
|
||||
GC_LIBS_generational_pcc = $(GC_LIBS_pcc)
|
||||
|
||||
define mmc_variant
|
||||
GC_STEM_$(1) = mmc
|
||||
GC_CFLAGS_$(1) = $(2)
|
||||
GC_LIBS_$(1) = -lm
|
||||
endef
|
||||
|
||||
define generational_mmc_variants
|
||||
$(call mmc_variant,$(1)mmc,$(2))
|
||||
$(call mmc_variant,$(1)generational_mmc,$(2) -DGC_GENERATIONAL=1)
|
||||
endef
|
||||
|
||||
define parallel_mmc_variants
|
||||
$(call generational_mmc_variants,$(1),$(2))
|
||||
$(call generational_mmc_variants,$(1)parallel_,$(2) -DGC_PARALLEL=1)
|
||||
endef
|
||||
|
||||
define trace_mmc_variants
|
||||
$(call parallel_mmc_variants,,-DGC_PRECISE_ROOTS=1)
|
||||
$(call parallel_mmc_variants,stack_conservative_,-DGC_CONSERVATIVE_ROOTS=1)
|
||||
$(call parallel_mmc_variants,heap_conservative_,-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1)
|
||||
endef
|
||||
|
||||
$(eval $(call trace_mmc_variants))
|
||||
|
||||
# $(1) is the benchmark, $(2) is the collector configuration
|
||||
make_gc_var = $$($(1)$(subst -,_,$(2)))
|
||||
gc_impl = $(call make_gc_var,GC_STEM_,$(1)).c
|
||||
gc_attrs = $(call make_gc_var,GC_STEM_,$(1))-attrs.h
|
||||
gc_cflags = $(call make_gc_var,GC_CFLAGS_,$(1))
|
||||
gc_impl_cflags = $(call make_gc_var,GC_IMPL_CFLAGS_,$(1))
|
||||
gc_libs = $(call make_gc_var,GC_LIBS_,$(1))
|
||||
define benchmark_template
|
||||
obj/$(1).$(2).gc.o: src/$(call gc_impl,$(2)) | .deps obj
|
||||
$$(COMPILE) $(call gc_cflags,$(2)) $(call gc_impl_cflags,$(2)) -include benchmarks/$(1)-embedder.h -c $$<
|
||||
obj/$(1).$(2).o: benchmarks/$(1).c | .deps obj
|
||||
$$(COMPILE) $(call gc_cflags,$(2)) -include api/$(call gc_attrs,$(2)) -c $$<
|
||||
bin/$(1).$(2): obj/$(1).$(2).gc.o obj/$(1).$(2).o obj/gc-stack.o obj/gc-options.o obj/gc-platform.o obj/gc-tracepoint.o obj/$(1).gc-ephemeron.o obj/$(1).gc-finalizer.o | bin
|
||||
$$(LINK) $$^ $(call gc_libs,$(2))
|
||||
endef
|
||||
|
||||
$(foreach BENCHMARK,$(TESTS),\
|
||||
$(foreach COLLECTOR,$(COLLECTORS),\
|
||||
$(eval $(call benchmark_template,$(BENCHMARK),$(COLLECTOR)))))
|
||||
|
||||
.PRECIOUS: $(ALL_TESTS) $(OBJS)
|
||||
|
||||
clean:
|
||||
rm -f $(ALL_TESTS)
|
||||
rm -rf .deps obj bin
|
||||
|
||||
# Clear some of the default rules.
|
||||
.SUFFIXES:
|
||||
.SECONDARY:
|
||||
%.c:;
|
||||
Makefile:;
|
91
libguile/whippet/README.md
Normal file
91
libguile/whippet/README.md
Normal file
|
@ -0,0 +1,91 @@
|
|||
# Whippet Garbage Collector
|
||||
|
||||
This repository is for development of Whippet, a new garbage collector
|
||||
implementation, eventually for use in [Guile
|
||||
Scheme](https://gnu.org/s/guile).
|
||||
|
||||
Whippet is an embed-only C library, designed to be copied into a
|
||||
program's source tree. It exposes an abstract C API for managed memory
|
||||
allocation, and provides a number of implementations of that API.
|
||||
|
||||
## Documentation
|
||||
|
||||
See the [documentation](./doc/README.md).
|
||||
|
||||
## Features
|
||||
|
||||
- Per-object pinning (with `mmc` collectors)
|
||||
- Finalization (supporting resuscitation)
|
||||
- Ephemerons (except on `bdw`, which has a polyfill)
|
||||
- Conservative roots (optionally with `mmc` or always with `bdw`)
|
||||
- Precise roots (optionally with `mmc` or always with `semi` / `pcc`)
|
||||
- Precise embedder-parameterized heap tracing (except with `bdw`)
|
||||
- Conservative heap tracing (optionally with `mmc`, always with `bdw`)
|
||||
- Parallel tracing (except `semi`)
|
||||
- Parallel mutators (except `semi`)
|
||||
- Inline allocation / write barrier fast paths (supporting JIT)
|
||||
- One unified API with no-overhead abstraction: switch collectors when
|
||||
you like
|
||||
- Three policies for sizing heaps: fixed, proportional to live size, and
|
||||
[MemBalancer](http://marisa.moe/balancer.html)
|
||||
|
||||
## Source repository structure
|
||||
|
||||
* [api/](./api/): The user-facing API. Also, the "embedder API"; see
|
||||
the [manual](./doc/manual.md) for more.
|
||||
* [doc/](./doc/): Documentation, such as it is.
|
||||
* [src/](./src/): The actual GC implementation, containing a number of
|
||||
collector implementations. The embedder chooses which collector to
|
||||
use at compile-time. See the [documentation](./doc/collectors.md)
|
||||
for more on the different collectors (`semi`, `bdw`, `pcc`, and the
|
||||
different flavors of `mmc`).
|
||||
* [benchmarks/](./benchmarks/): Benchmarks. A work in progress.
|
||||
* [test/](./test/): A dusty attic of minimal testing.
|
||||
|
||||
## Status and roadmap
|
||||
|
||||
As of January 2025, Whippet is good to go! Of course there will surely
|
||||
be new features to build as Whippet gets integrated it into language
|
||||
run-times, but the basics are there.
|
||||
|
||||
The next phase on the roadmap is support for tracing, and
|
||||
some performance noodling.
|
||||
|
||||
Once that is done, the big task is integrating Whippet into the [Guile
|
||||
Scheme](https://gnu.org/s/guile) language run-time, replacing BDW-GC.
|
||||
Fingers crossed!
|
||||
|
||||
## About the name
|
||||
|
||||
It sounds better than WIP (work-in-progress) garbage collector, doesn't
|
||||
it? Also apparently a whippet is a kind of dog that is fast for its
|
||||
size. It would be nice if the Whippet collectors turn out to have this
|
||||
property.
|
||||
|
||||
## License
|
||||
|
||||
```
|
||||
Copyright (c) 2022-2024 Andy Wingo
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a
|
||||
copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
```
|
||||
|
||||
Note that some benchmarks have other licenses; see
|
||||
[`benchmarks/README.md`](./benchmarks/README.md) for more.
|
91
libguile/whippet/api/bdw-attrs.h
Normal file
91
libguile/whippet/api/bdw-attrs.h
Normal file
|
@ -0,0 +1,91 @@
|
|||
#ifndef BDW_ATTRS_H
|
||||
#define BDW_ATTRS_H
|
||||
|
||||
#include "gc-attrs.h"
|
||||
#include "gc-assert.h"
|
||||
|
||||
static inline enum gc_allocator_kind gc_allocator_kind(void) {
|
||||
return GC_ALLOCATOR_INLINE_FREELIST;
|
||||
}
|
||||
static inline size_t gc_allocator_small_granule_size(void) {
|
||||
return 2 * sizeof(void *);
|
||||
}
|
||||
static inline size_t gc_allocator_large_threshold(void) {
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_allocation_pointer_offset(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline size_t gc_allocator_allocation_limit_offset(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_freelist_offset(size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_ASSERT(size);
|
||||
size_t base;
|
||||
switch (kind) {
|
||||
case GC_ALLOCATION_TAGGED:
|
||||
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
|
||||
base = 0;
|
||||
break;
|
||||
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
|
||||
case GC_ALLOCATION_TAGGED_POINTERLESS:
|
||||
base = (sizeof(void*) * gc_allocator_large_threshold() /
|
||||
gc_allocator_small_granule_size());
|
||||
break;
|
||||
}
|
||||
size_t bucket = (size - 1) / gc_allocator_small_granule_size();
|
||||
return base + sizeof(void*) * bucket;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_alloc_table_alignment(void) {
|
||||
return 0;
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t) {
|
||||
return GC_OLD_GENERATION_CHECK_NONE;
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t) {
|
||||
return GC_WRITE_BARRIER_NONE;
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_table_alignment(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
|
||||
return GC_SAFEPOINT_MECHANISM_SIGNAL;
|
||||
}
|
||||
|
||||
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
|
||||
return GC_COOPERATIVE_SAFEPOINT_NONE;
|
||||
}
|
||||
|
||||
static inline int gc_can_pin_objects(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // BDW_ATTRS_H
|
19
libguile/whippet/api/gc-allocation-kind.h
Normal file
19
libguile/whippet/api/gc-allocation-kind.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef GC_ALLOCATION_KIND_H
|
||||
#define GC_ALLOCATION_KIND_H
|
||||
|
||||
enum gc_allocation_kind {
|
||||
// An object whose type can be inspected at run-time based on its contents,
|
||||
// and whose fields be traced via the gc_trace_object procedure.
|
||||
GC_ALLOCATION_TAGGED,
|
||||
// Like GC_ALLOCATION_TAGGED, but not containing any fields that reference
|
||||
// GC-managed objects. The GC may choose to handle these specially.
|
||||
GC_ALLOCATION_TAGGED_POINTERLESS,
|
||||
// A raw allocation whose type cannot be inspected at trace-time, and whose
|
||||
// fields should be traced conservatively.
|
||||
GC_ALLOCATION_UNTAGGED_CONSERVATIVE,
|
||||
// A raw allocation whose type cannot be inspected at trace-time, but
|
||||
// containing no fields that reference GC-managed objects.
|
||||
GC_ALLOCATION_UNTAGGED_POINTERLESS
|
||||
};
|
||||
|
||||
#endif // GC_ALLOCATION_KIND_H
|
301
libguile/whippet/api/gc-api.h
Normal file
301
libguile/whippet/api/gc-api.h
Normal file
|
@ -0,0 +1,301 @@
|
|||
#ifndef GC_API_H_
|
||||
#define GC_API_H_
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "gc-allocation-kind.h"
|
||||
#include "gc-assert.h"
|
||||
#include "gc-attrs.h"
|
||||
#include "gc-collection-kind.h"
|
||||
#include "gc-edge.h"
|
||||
#include "gc-event-listener.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-options.h"
|
||||
#include "gc-ref.h"
|
||||
#include "gc-visibility.h"
|
||||
|
||||
#include <stdatomic.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
struct gc_heap;
|
||||
struct gc_mutator;
|
||||
|
||||
struct gc_stack_addr;
|
||||
GC_API_ void* gc_call_with_stack_addr(void* (*f)(struct gc_stack_addr *,
|
||||
void *),
|
||||
void *data) GC_NEVER_INLINE;
|
||||
|
||||
GC_API_ int gc_init(const struct gc_options *options,
|
||||
struct gc_stack_addr *base, struct gc_heap **heap,
|
||||
struct gc_mutator **mutator,
|
||||
struct gc_event_listener event_listener,
|
||||
void *event_listener_data);
|
||||
|
||||
GC_API_ uint64_t gc_allocation_counter(struct gc_heap *heap);
|
||||
|
||||
GC_API_ struct gc_heap* gc_mutator_heap(struct gc_mutator *mut);
|
||||
|
||||
GC_API_ uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap);
|
||||
GC_API_ uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap);
|
||||
|
||||
struct gc_mutator_roots;
|
||||
GC_API_ void gc_mutator_set_roots(struct gc_mutator *mut,
|
||||
struct gc_mutator_roots *roots);
|
||||
|
||||
struct gc_heap_roots;
|
||||
GC_API_ void gc_heap_set_roots(struct gc_heap *heap,
|
||||
struct gc_heap_roots *roots);
|
||||
|
||||
struct gc_extern_space;
|
||||
GC_API_ void gc_heap_set_extern_space(struct gc_heap *heap,
|
||||
struct gc_extern_space *space);
|
||||
|
||||
GC_API_ struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *base,
|
||||
struct gc_heap *heap);
|
||||
GC_API_ void gc_finish_for_thread(struct gc_mutator *mut);
|
||||
GC_API_ void* gc_call_without_gc(struct gc_mutator *mut, void* (*f)(void*),
|
||||
void *data) GC_NEVER_INLINE;
|
||||
|
||||
GC_API_ void gc_collect(struct gc_mutator *mut,
|
||||
enum gc_collection_kind requested_kind);
|
||||
|
||||
static inline void gc_update_alloc_table(struct gc_ref obj, size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline void gc_update_alloc_table(struct gc_ref obj, size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
size_t alignment = gc_allocator_alloc_table_alignment();
|
||||
if (!alignment) return;
|
||||
|
||||
uintptr_t addr = gc_ref_value(obj);
|
||||
uintptr_t base = addr & ~(alignment - 1);
|
||||
size_t granule_size = gc_allocator_small_granule_size();
|
||||
uintptr_t granule = (addr & (alignment - 1)) / granule_size;
|
||||
uint8_t *alloc = (uint8_t*)(base + granule);
|
||||
|
||||
uint8_t begin_pattern = gc_allocator_alloc_table_begin_pattern(kind);
|
||||
uint8_t end_pattern = gc_allocator_alloc_table_end_pattern();
|
||||
if (end_pattern) {
|
||||
size_t granules = size / granule_size;
|
||||
if (granules == 1) {
|
||||
alloc[0] = begin_pattern | end_pattern;
|
||||
} else {
|
||||
alloc[0] = begin_pattern;
|
||||
if (granules > 2)
|
||||
memset(alloc + 1, 0, granules - 2);
|
||||
alloc[granules - 1] = end_pattern;
|
||||
}
|
||||
} else {
|
||||
alloc[0] = begin_pattern;
|
||||
}
|
||||
}
|
||||
|
||||
GC_API_ void* gc_allocate_slow(struct gc_mutator *mut, size_t bytes,
|
||||
enum gc_allocation_kind kind) GC_NEVER_INLINE;
|
||||
|
||||
static inline void*
|
||||
gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline void* gc_allocate_small_fast_bump_pointer(struct gc_mutator *mut,
|
||||
size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_ASSERT(size <= gc_allocator_large_threshold());
|
||||
|
||||
size_t granule_size = gc_allocator_small_granule_size();
|
||||
size_t hp_offset = gc_allocator_allocation_pointer_offset();
|
||||
size_t limit_offset = gc_allocator_allocation_limit_offset();
|
||||
|
||||
uintptr_t base_addr = (uintptr_t)mut;
|
||||
uintptr_t *hp_loc = (uintptr_t*)(base_addr + hp_offset);
|
||||
uintptr_t *limit_loc = (uintptr_t*)(base_addr + limit_offset);
|
||||
|
||||
size = (size + granule_size - 1) & ~(granule_size - 1);
|
||||
uintptr_t hp = *hp_loc;
|
||||
uintptr_t limit = *limit_loc;
|
||||
uintptr_t new_hp = hp + size;
|
||||
|
||||
if (GC_UNLIKELY (new_hp > limit))
|
||||
return NULL;
|
||||
|
||||
*hp_loc = new_hp;
|
||||
|
||||
gc_update_alloc_table(gc_ref(hp), size, kind);
|
||||
|
||||
return (void*)hp;
|
||||
}
|
||||
|
||||
static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut,
|
||||
size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline void* gc_allocate_small_fast_freelist(struct gc_mutator *mut,
|
||||
size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_ASSERT(size <= gc_allocator_large_threshold());
|
||||
|
||||
size_t freelist_offset = gc_allocator_freelist_offset(size, kind);
|
||||
uintptr_t base_addr = (uintptr_t)mut;
|
||||
void **freelist_loc = (void**)(base_addr + freelist_offset);
|
||||
|
||||
void *head = *freelist_loc;
|
||||
if (GC_UNLIKELY(!head))
|
||||
return NULL;
|
||||
|
||||
*freelist_loc = *(void**)head;
|
||||
|
||||
gc_update_alloc_table(gc_ref_from_heap_object(head), size, kind);
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
static inline void* gc_allocate_small_fast(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline void* gc_allocate_small_fast(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_ASSERT(size != 0);
|
||||
GC_ASSERT(size <= gc_allocator_large_threshold());
|
||||
|
||||
switch (gc_allocator_kind()) {
|
||||
case GC_ALLOCATOR_INLINE_BUMP_POINTER:
|
||||
return gc_allocate_small_fast_bump_pointer(mut, size, kind);
|
||||
case GC_ALLOCATOR_INLINE_FREELIST:
|
||||
return gc_allocate_small_fast_freelist(mut, size, kind);
|
||||
case GC_ALLOCATOR_INLINE_NONE:
|
||||
return NULL;
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void* gc_allocate_fast(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline void* gc_allocate_fast(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_ASSERT(size != 0);
|
||||
if (size > gc_allocator_large_threshold())
|
||||
return NULL;
|
||||
|
||||
return gc_allocate_small_fast(mut, size, kind);
|
||||
}
|
||||
|
||||
static inline void* gc_allocate(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline void* gc_allocate(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
void *ret = gc_allocate_fast(mut, size, kind);
|
||||
if (GC_LIKELY(ret != NULL))
|
||||
return ret;
|
||||
|
||||
return gc_allocate_slow(mut, size, kind);
|
||||
}
|
||||
|
||||
GC_API_ int gc_object_is_old_generation_slow(struct gc_mutator *mut,
|
||||
struct gc_ref obj) GC_NEVER_INLINE;
|
||||
|
||||
static inline int gc_object_is_old_generation(struct gc_mutator *mut,
|
||||
struct gc_ref obj,
|
||||
size_t obj_size) GC_ALWAYS_INLINE;
|
||||
static inline int gc_object_is_old_generation(struct gc_mutator *mut,
|
||||
struct gc_ref obj,
|
||||
size_t obj_size) {
|
||||
switch (gc_old_generation_check_kind(obj_size)) {
|
||||
case GC_OLD_GENERATION_CHECK_ALLOC_TABLE: {
|
||||
size_t alignment = gc_allocator_alloc_table_alignment();
|
||||
GC_ASSERT(alignment);
|
||||
uintptr_t addr = gc_ref_value(obj);
|
||||
uintptr_t base = addr & ~(alignment - 1);
|
||||
size_t granule_size = gc_allocator_small_granule_size();
|
||||
uintptr_t granule = (addr & (alignment - 1)) / granule_size;
|
||||
uint8_t *byte_loc = (uint8_t*)(base + granule);
|
||||
uint8_t byte = atomic_load_explicit(byte_loc, memory_order_relaxed);
|
||||
uint8_t mask = gc_old_generation_check_alloc_table_tag_mask();
|
||||
uint8_t young = gc_old_generation_check_alloc_table_young_tag();
|
||||
return (byte & mask) != young;
|
||||
}
|
||||
case GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY: {
|
||||
struct gc_heap *heap = gc_mutator_heap(mut);
|
||||
// Note that these addresses are fixed and that the embedder might
|
||||
// want to store them somewhere or inline them into the output of
|
||||
// JIT-generated code. They may also be power-of-two aligned.
|
||||
uintptr_t low_addr = gc_small_object_nursery_low_address(heap);
|
||||
uintptr_t high_addr = gc_small_object_nursery_high_address(heap);
|
||||
uintptr_t size = high_addr - low_addr;
|
||||
uintptr_t addr = gc_ref_value(obj);
|
||||
return addr - low_addr >= size;
|
||||
}
|
||||
case GC_OLD_GENERATION_CHECK_SLOW:
|
||||
return gc_object_is_old_generation_slow(mut, obj);
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
GC_API_ void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj,
|
||||
size_t obj_size, struct gc_edge edge,
|
||||
struct gc_ref new_val) GC_NEVER_INLINE;
|
||||
|
||||
static inline int gc_write_barrier_fast(struct gc_mutator *mut, struct gc_ref obj,
|
||||
size_t obj_size, struct gc_edge edge,
|
||||
struct gc_ref new_val) GC_ALWAYS_INLINE;
|
||||
static inline int gc_write_barrier_fast(struct gc_mutator *mut, struct gc_ref obj,
|
||||
size_t obj_size, struct gc_edge edge,
|
||||
struct gc_ref new_val) {
|
||||
switch (gc_write_barrier_kind(obj_size)) {
|
||||
case GC_WRITE_BARRIER_NONE:
|
||||
return 0;
|
||||
case GC_WRITE_BARRIER_FIELD: {
|
||||
if (!gc_object_is_old_generation(mut, obj, obj_size))
|
||||
return 0;
|
||||
|
||||
size_t field_table_alignment = gc_write_barrier_field_table_alignment();
|
||||
size_t fields_per_byte = gc_write_barrier_field_fields_per_byte();
|
||||
uint8_t first_bit_pattern = gc_write_barrier_field_first_bit_pattern();
|
||||
ssize_t table_offset = gc_write_barrier_field_table_offset();
|
||||
|
||||
uintptr_t addr = gc_edge_address(edge);
|
||||
uintptr_t base = addr & ~(field_table_alignment - 1);
|
||||
uintptr_t field = (addr & (field_table_alignment - 1)) / sizeof(uintptr_t);
|
||||
uintptr_t log_byte = field / fields_per_byte;
|
||||
uint8_t log_bit = first_bit_pattern << (field % fields_per_byte);
|
||||
uint8_t *byte_loc = (uint8_t*)(base + table_offset + log_byte);
|
||||
uint8_t byte = atomic_load_explicit(byte_loc, memory_order_relaxed);
|
||||
return !(byte & log_bit);
|
||||
}
|
||||
case GC_WRITE_BARRIER_SLOW:
|
||||
return 1;
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void gc_write_barrier(struct gc_mutator *mut, struct gc_ref obj,
|
||||
size_t obj_size, struct gc_edge edge,
|
||||
struct gc_ref new_val) GC_ALWAYS_INLINE;
|
||||
static inline void gc_write_barrier(struct gc_mutator *mut, struct gc_ref obj,
|
||||
size_t obj_size, struct gc_edge edge,
|
||||
struct gc_ref new_val) {
|
||||
if (GC_UNLIKELY(gc_write_barrier_fast(mut, obj, obj_size, edge, new_val)))
|
||||
gc_write_barrier_slow(mut, obj, obj_size, edge, new_val);
|
||||
}
|
||||
|
||||
GC_API_ void gc_pin_object(struct gc_mutator *mut, struct gc_ref obj);
|
||||
|
||||
GC_API_ void gc_safepoint_slow(struct gc_mutator *mut) GC_NEVER_INLINE;
|
||||
GC_API_ int* gc_safepoint_flag_loc(struct gc_mutator *mut);
|
||||
static inline int gc_should_stop_for_safepoint(struct gc_mutator *mut) {
|
||||
switch (gc_cooperative_safepoint_kind()) {
|
||||
case GC_COOPERATIVE_SAFEPOINT_NONE:
|
||||
return 0;
|
||||
case GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG:
|
||||
case GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG: {
|
||||
return atomic_load_explicit(gc_safepoint_flag_loc(mut),
|
||||
memory_order_relaxed);
|
||||
}
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
static inline void gc_safepoint(struct gc_mutator *mut) {
|
||||
if (GC_UNLIKELY(gc_should_stop_for_safepoint(mut)))
|
||||
gc_safepoint_slow(mut);
|
||||
}
|
||||
|
||||
#endif // GC_API_H_
|
21
libguile/whippet/api/gc-assert.h
Normal file
21
libguile/whippet/api/gc-assert.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#ifndef GC_ASSERT_H
|
||||
#define GC_ASSERT_H
|
||||
|
||||
#include "gc-config.h"
|
||||
|
||||
#define GC_UNLIKELY(e) __builtin_expect(e, 0)
|
||||
#define GC_LIKELY(e) __builtin_expect(e, 1)
|
||||
|
||||
#define GC_CRASH() __builtin_trap()
|
||||
|
||||
#if GC_DEBUG
|
||||
#define GC_ASSERT(x) do { if (GC_UNLIKELY(!(x))) GC_CRASH(); } while (0)
|
||||
#define GC_UNREACHABLE() GC_CRASH()
|
||||
#else
|
||||
#define GC_ASSERT(x) do { } while (0)
|
||||
#define GC_UNREACHABLE() __builtin_unreachable()
|
||||
#endif
|
||||
|
||||
#define GC_ASSERT_EQ(a, b) GC_ASSERT((a) == (b))
|
||||
|
||||
#endif // GC_ASSERT_H
|
69
libguile/whippet/api/gc-attrs.h
Normal file
69
libguile/whippet/api/gc-attrs.h
Normal file
|
@ -0,0 +1,69 @@
|
|||
#ifndef GC_ATTRS_H
|
||||
#define GC_ATTRS_H
|
||||
|
||||
#include "gc-inline.h"
|
||||
#include "gc-allocation-kind.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
enum gc_allocator_kind {
|
||||
GC_ALLOCATOR_INLINE_BUMP_POINTER,
|
||||
GC_ALLOCATOR_INLINE_FREELIST,
|
||||
GC_ALLOCATOR_INLINE_NONE
|
||||
};
|
||||
|
||||
static inline enum gc_allocator_kind gc_allocator_kind(void) GC_ALWAYS_INLINE;
|
||||
static inline size_t gc_allocator_large_threshold(void) GC_ALWAYS_INLINE;
|
||||
static inline size_t gc_allocator_small_granule_size(void) GC_ALWAYS_INLINE;
|
||||
|
||||
static inline size_t gc_allocator_allocation_pointer_offset(void) GC_ALWAYS_INLINE;
|
||||
static inline size_t gc_allocator_allocation_limit_offset(void) GC_ALWAYS_INLINE;
|
||||
|
||||
static inline size_t gc_allocator_freelist_offset(size_t size,
|
||||
enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
|
||||
static inline size_t gc_allocator_alloc_table_alignment(void) GC_ALWAYS_INLINE;
|
||||
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) GC_ALWAYS_INLINE;
|
||||
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) GC_ALWAYS_INLINE;
|
||||
|
||||
enum gc_old_generation_check_kind {
|
||||
GC_OLD_GENERATION_CHECK_NONE,
|
||||
GC_OLD_GENERATION_CHECK_ALLOC_TABLE,
|
||||
GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY,
|
||||
GC_OLD_GENERATION_CHECK_SLOW
|
||||
};
|
||||
|
||||
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) GC_ALWAYS_INLINE;
|
||||
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) GC_ALWAYS_INLINE;
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) GC_ALWAYS_INLINE;
|
||||
|
||||
enum gc_write_barrier_kind {
|
||||
GC_WRITE_BARRIER_NONE,
|
||||
GC_WRITE_BARRIER_FIELD,
|
||||
GC_WRITE_BARRIER_SLOW
|
||||
};
|
||||
|
||||
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) GC_ALWAYS_INLINE;
|
||||
static inline size_t gc_write_barrier_field_table_alignment(void) GC_ALWAYS_INLINE;
|
||||
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) GC_ALWAYS_INLINE;
|
||||
static inline size_t gc_write_barrier_field_fields_per_byte(void) GC_ALWAYS_INLINE;
|
||||
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) GC_ALWAYS_INLINE;
|
||||
|
||||
enum gc_safepoint_mechanism {
|
||||
GC_SAFEPOINT_MECHANISM_COOPERATIVE,
|
||||
GC_SAFEPOINT_MECHANISM_SIGNAL,
|
||||
};
|
||||
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) GC_ALWAYS_INLINE;
|
||||
|
||||
enum gc_cooperative_safepoint_kind {
|
||||
GC_COOPERATIVE_SAFEPOINT_NONE,
|
||||
GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG,
|
||||
GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG,
|
||||
};
|
||||
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) GC_ALWAYS_INLINE;
|
||||
|
||||
static inline int gc_can_pin_objects(void) GC_ALWAYS_INLINE;
|
||||
|
||||
#endif // GC_ATTRS_H
|
177
libguile/whippet/api/gc-basic-stats.h
Normal file
177
libguile/whippet/api/gc-basic-stats.h
Normal file
|
@ -0,0 +1,177 @@
|
|||
#ifndef GC_BASIC_STATS_H
|
||||
#define GC_BASIC_STATS_H
|
||||
|
||||
#include "gc-event-listener.h"
|
||||
#include "gc-histogram.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
|
||||
GC_DEFINE_HISTOGRAM(gc_latency, 25, 4);
|
||||
|
||||
struct gc_basic_stats {
|
||||
uint64_t major_collection_count;
|
||||
uint64_t minor_collection_count;
|
||||
uint64_t last_time_usec;
|
||||
uint64_t last_cpu_time_usec;
|
||||
uint64_t elapsed_mutator_usec;
|
||||
uint64_t elapsed_collector_usec;
|
||||
uint64_t cpu_mutator_usec;
|
||||
uint64_t cpu_collector_usec;
|
||||
size_t heap_size;
|
||||
size_t max_heap_size;
|
||||
size_t max_live_data_size;
|
||||
struct gc_latency pause_times;
|
||||
};
|
||||
|
||||
static inline uint64_t gc_basic_stats_now(void) {
|
||||
struct timeval tv;
|
||||
if (gettimeofday(&tv, NULL) != 0) GC_CRASH();
|
||||
uint64_t ret = tv.tv_sec;
|
||||
ret *= 1000 * 1000;
|
||||
ret += tv.tv_usec;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline uint64_t gc_basic_stats_cpu_time(void) {
|
||||
struct timespec ts;
|
||||
clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &ts);
|
||||
uint64_t ret = ts.tv_sec;
|
||||
ret *= 1000 * 1000;
|
||||
ret += ts.tv_nsec / 1000;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void gc_basic_stats_init(void *data, size_t heap_size) {
|
||||
struct gc_basic_stats *stats = data;
|
||||
memset(stats, 0, sizeof(*stats));
|
||||
stats->last_time_usec = gc_basic_stats_now();
|
||||
stats->last_cpu_time_usec = gc_basic_stats_cpu_time();
|
||||
stats->heap_size = stats->max_heap_size = heap_size;
|
||||
}
|
||||
|
||||
static inline void gc_basic_stats_requesting_stop(void *data) {
|
||||
struct gc_basic_stats *stats = data;
|
||||
uint64_t now = gc_basic_stats_now();
|
||||
uint64_t cpu_time = gc_basic_stats_cpu_time();
|
||||
stats->elapsed_mutator_usec += now - stats->last_time_usec;
|
||||
stats->cpu_mutator_usec += cpu_time - stats->last_cpu_time_usec;
|
||||
stats->last_time_usec = now;
|
||||
stats->last_cpu_time_usec = cpu_time;
|
||||
}
|
||||
static inline void gc_basic_stats_waiting_for_stop(void *data) {}
|
||||
static inline void gc_basic_stats_mutators_stopped(void *data) {}
|
||||
|
||||
static inline void gc_basic_stats_prepare_gc(void *data,
|
||||
enum gc_collection_kind kind) {
|
||||
struct gc_basic_stats *stats = data;
|
||||
if (kind == GC_COLLECTION_MINOR)
|
||||
stats->minor_collection_count++;
|
||||
else
|
||||
stats->major_collection_count++;
|
||||
}
|
||||
|
||||
static inline void gc_basic_stats_roots_traced(void *data) {}
|
||||
static inline void gc_basic_stats_heap_traced(void *data) {}
|
||||
static inline void gc_basic_stats_ephemerons_traced(void *data) {}
|
||||
static inline void gc_basic_stats_finalizers_traced(void *data) {}
|
||||
|
||||
static inline void gc_basic_stats_restarting_mutators(void *data) {
|
||||
struct gc_basic_stats *stats = data;
|
||||
uint64_t now = gc_basic_stats_now();
|
||||
uint64_t cpu_time = gc_basic_stats_cpu_time();
|
||||
uint64_t pause_time = now - stats->last_time_usec;
|
||||
uint64_t pause_cpu_time = cpu_time - stats->last_cpu_time_usec;
|
||||
stats->elapsed_collector_usec += pause_time;
|
||||
stats->cpu_collector_usec += pause_cpu_time;
|
||||
gc_latency_record(&stats->pause_times, pause_time);
|
||||
stats->last_time_usec = now;
|
||||
stats->last_cpu_time_usec = cpu_time;
|
||||
}
|
||||
|
||||
static inline void* gc_basic_stats_mutator_added(void *data) {
|
||||
return NULL;
|
||||
}
|
||||
static inline void gc_basic_stats_mutator_cause_gc(void *mutator_data) {}
|
||||
static inline void gc_basic_stats_mutator_stopping(void *mutator_data) {}
|
||||
static inline void gc_basic_stats_mutator_stopped(void *mutator_data) {}
|
||||
static inline void gc_basic_stats_mutator_restarted(void *mutator_data) {}
|
||||
static inline void gc_basic_stats_mutator_removed(void *mutator_data) {}
|
||||
|
||||
static inline void gc_basic_stats_heap_resized(void *data, size_t size) {
|
||||
struct gc_basic_stats *stats = data;
|
||||
stats->heap_size = size;
|
||||
if (size > stats->max_heap_size)
|
||||
stats->max_heap_size = size;
|
||||
}
|
||||
|
||||
static inline void gc_basic_stats_live_data_size(void *data, size_t size) {
|
||||
struct gc_basic_stats *stats = data;
|
||||
if (size > stats->max_live_data_size)
|
||||
stats->max_live_data_size = size;
|
||||
}
|
||||
|
||||
#define GC_BASIC_STATS \
|
||||
((struct gc_event_listener) { \
|
||||
gc_basic_stats_init, \
|
||||
gc_basic_stats_requesting_stop, \
|
||||
gc_basic_stats_waiting_for_stop, \
|
||||
gc_basic_stats_mutators_stopped, \
|
||||
gc_basic_stats_prepare_gc, \
|
||||
gc_basic_stats_roots_traced, \
|
||||
gc_basic_stats_heap_traced, \
|
||||
gc_basic_stats_ephemerons_traced, \
|
||||
gc_basic_stats_finalizers_traced, \
|
||||
gc_basic_stats_restarting_mutators, \
|
||||
gc_basic_stats_mutator_added, \
|
||||
gc_basic_stats_mutator_cause_gc, \
|
||||
gc_basic_stats_mutator_stopping, \
|
||||
gc_basic_stats_mutator_stopped, \
|
||||
gc_basic_stats_mutator_restarted, \
|
||||
gc_basic_stats_mutator_removed, \
|
||||
gc_basic_stats_heap_resized, \
|
||||
gc_basic_stats_live_data_size, \
|
||||
})
|
||||
|
||||
static inline void gc_basic_stats_finish(struct gc_basic_stats *stats) {
|
||||
uint64_t now = gc_basic_stats_now();
|
||||
uint64_t cpu_time = gc_basic_stats_cpu_time();
|
||||
stats->elapsed_mutator_usec += now - stats->last_time_usec;
|
||||
stats->cpu_mutator_usec += cpu_time - stats->last_cpu_time_usec;
|
||||
stats->last_time_usec = now;
|
||||
stats->last_cpu_time_usec = cpu_time;
|
||||
}
|
||||
|
||||
static inline void gc_basic_stats_print(struct gc_basic_stats *stats, FILE *f) {
|
||||
fprintf(f, "Completed %" PRIu64 " major collections (%" PRIu64 " minor).\n",
|
||||
stats->major_collection_count, stats->minor_collection_count);
|
||||
uint64_t stopped = stats->elapsed_collector_usec;
|
||||
uint64_t elapsed = stats->elapsed_mutator_usec + stopped;
|
||||
uint64_t cpu_stopped = stats->cpu_collector_usec;
|
||||
uint64_t cpu_total = stats->cpu_mutator_usec + cpu_stopped;
|
||||
uint64_t ms = 1000; // per usec
|
||||
fprintf(f, "%" PRIu64 ".%.3" PRIu64 " ms total time "
|
||||
"(%" PRIu64 ".%.3" PRIu64 " stopped); "
|
||||
"%" PRIu64 ".%.3" PRIu64 " ms CPU time "
|
||||
"(%" PRIu64 ".%.3" PRIu64 " stopped).\n",
|
||||
elapsed / ms, elapsed % ms, stopped / ms, stopped % ms,
|
||||
cpu_total / ms, cpu_total % ms, cpu_stopped / ms, cpu_stopped % ms);
|
||||
uint64_t pause_median = gc_latency_median(&stats->pause_times);
|
||||
uint64_t pause_p95 = gc_latency_percentile(&stats->pause_times, 0.95);
|
||||
uint64_t pause_max = gc_latency_max(&stats->pause_times);
|
||||
fprintf(f, "%" PRIu64 ".%.3" PRIu64 " ms median pause time, "
|
||||
"%" PRIu64 ".%.3" PRIu64 " p95, "
|
||||
"%" PRIu64 ".%.3" PRIu64 " max.\n",
|
||||
pause_median / ms, pause_median % ms, pause_p95 / ms, pause_p95 % ms,
|
||||
pause_max / ms, pause_max % ms);
|
||||
double MB = 1e6;
|
||||
fprintf(f, "Heap size is %.3f MB (max %.3f MB); peak live data %.3f MB.\n",
|
||||
stats->heap_size / MB, stats->max_heap_size / MB,
|
||||
stats->max_live_data_size / MB);
|
||||
}
|
||||
|
||||
#endif // GC_BASIC_STATS_H_
|
11
libguile/whippet/api/gc-collection-kind.h
Normal file
11
libguile/whippet/api/gc-collection-kind.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#ifndef GC_COLLECTION_KIND_H
|
||||
#define GC_COLLECTION_KIND_H
|
||||
|
||||
enum gc_collection_kind {
|
||||
GC_COLLECTION_ANY,
|
||||
GC_COLLECTION_MINOR,
|
||||
GC_COLLECTION_MAJOR,
|
||||
GC_COLLECTION_COMPACTING,
|
||||
};
|
||||
|
||||
#endif // GC_COLLECTION_KIND_H
|
40
libguile/whippet/api/gc-config.h
Normal file
40
libguile/whippet/api/gc-config.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
#ifndef GC_CONFIG_H
|
||||
#define GC_CONFIG_H
|
||||
|
||||
#ifndef GC_DEBUG
|
||||
#define GC_DEBUG 0
|
||||
#endif
|
||||
|
||||
#ifndef GC_HAS_IMMEDIATES
|
||||
#define GC_HAS_IMMEDIATES 1
|
||||
#endif
|
||||
|
||||
#ifndef GC_PARALLEL
|
||||
#define GC_PARALLEL 0
|
||||
#endif
|
||||
|
||||
#ifndef GC_GENERATIONAL
|
||||
#define GC_GENERATIONAL 0
|
||||
#endif
|
||||
|
||||
// Though you normally wouldn't configure things this way, it's possible
|
||||
// to have both precise and conservative roots. However we have to
|
||||
// either have precise or conservative tracing; not a mix.
|
||||
|
||||
#ifndef GC_PRECISE_ROOTS
|
||||
#define GC_PRECISE_ROOTS 0
|
||||
#endif
|
||||
|
||||
#ifndef GC_CONSERVATIVE_ROOTS
|
||||
#define GC_CONSERVATIVE_ROOTS 0
|
||||
#endif
|
||||
|
||||
#ifndef GC_CONSERVATIVE_TRACE
|
||||
#define GC_CONSERVATIVE_TRACE 0
|
||||
#endif
|
||||
|
||||
#ifndef GC_CONCURRENT_TRACE
|
||||
#define GC_CONCURRENT_TRACE 0
|
||||
#endif
|
||||
|
||||
#endif // GC_CONFIG_H
|
17
libguile/whippet/api/gc-conservative-ref.h
Normal file
17
libguile/whippet/api/gc-conservative-ref.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef GC_CONSERVATIVE_REF_H
|
||||
#define GC_CONSERVATIVE_REF_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct gc_conservative_ref {
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
static inline struct gc_conservative_ref gc_conservative_ref(uintptr_t value) {
|
||||
return (struct gc_conservative_ref){value};
|
||||
}
|
||||
static inline uintptr_t gc_conservative_ref_value(struct gc_conservative_ref ref) {
|
||||
return ref.value;
|
||||
}
|
||||
|
||||
#endif // GC_CONSERVATIVE_REF_H
|
26
libguile/whippet/api/gc-edge.h
Normal file
26
libguile/whippet/api/gc-edge.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
#ifndef GC_EDGE_H
|
||||
#define GC_EDGE_H
|
||||
|
||||
#include "gc-ref.h"
|
||||
|
||||
struct gc_edge {
|
||||
struct gc_ref *dst;
|
||||
};
|
||||
|
||||
static inline struct gc_edge gc_edge(void* addr) {
|
||||
return (struct gc_edge){addr};
|
||||
}
|
||||
static inline struct gc_ref gc_edge_ref(struct gc_edge edge) {
|
||||
return *edge.dst;
|
||||
}
|
||||
static inline struct gc_ref* gc_edge_loc(struct gc_edge edge) {
|
||||
return edge.dst;
|
||||
}
|
||||
static inline uintptr_t gc_edge_address(struct gc_edge edge) {
|
||||
return (uintptr_t)gc_edge_loc(edge);
|
||||
}
|
||||
static inline void gc_edge_update(struct gc_edge edge, struct gc_ref ref) {
|
||||
*edge.dst = ref;
|
||||
}
|
||||
|
||||
#endif // GC_EDGE_H
|
67
libguile/whippet/api/gc-embedder-api.h
Normal file
67
libguile/whippet/api/gc-embedder-api.h
Normal file
|
@ -0,0 +1,67 @@
|
|||
#ifndef GC_EMBEDDER_API_H
|
||||
#define GC_EMBEDDER_API_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "gc-edge.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-forwarding.h"
|
||||
|
||||
#ifndef GC_EMBEDDER_API
|
||||
#define GC_EMBEDDER_API static
|
||||
#endif
|
||||
|
||||
struct gc_mutator_roots;
|
||||
struct gc_heap_roots;
|
||||
struct gc_atomic_forward;
|
||||
struct gc_heap;
|
||||
struct gc_extern_space;
|
||||
|
||||
GC_EMBEDDER_API inline int gc_is_valid_conservative_ref_displacement(uintptr_t displacement);
|
||||
GC_EMBEDDER_API inline size_t gc_finalizer_priority_count(void);
|
||||
|
||||
GC_EMBEDDER_API inline int gc_extern_space_visit(struct gc_extern_space *space,
|
||||
struct gc_edge edge,
|
||||
struct gc_ref ref) GC_ALWAYS_INLINE;
|
||||
GC_EMBEDDER_API inline void gc_extern_space_start_gc(struct gc_extern_space *space,
|
||||
int is_minor_gc);
|
||||
GC_EMBEDDER_API inline void gc_extern_space_finish_gc(struct gc_extern_space *space,
|
||||
int is_minor_gc);
|
||||
|
||||
GC_EMBEDDER_API inline void gc_trace_object(struct gc_ref ref,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data,
|
||||
size_t *size) GC_ALWAYS_INLINE;
|
||||
|
||||
GC_EMBEDDER_API inline void gc_trace_mutator_roots(struct gc_mutator_roots *roots,
|
||||
void (*trace_edge)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *trace_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data);
|
||||
GC_EMBEDDER_API inline void gc_trace_heap_roots(struct gc_heap_roots *roots,
|
||||
void (*trace_edge)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *trace_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data);
|
||||
|
||||
GC_EMBEDDER_API inline uintptr_t gc_object_forwarded_nonatomic(struct gc_ref ref);
|
||||
GC_EMBEDDER_API inline void gc_object_forward_nonatomic(struct gc_ref ref,
|
||||
struct gc_ref new_ref);
|
||||
|
||||
GC_EMBEDDER_API inline struct gc_atomic_forward gc_atomic_forward_begin(struct gc_ref ref);
|
||||
GC_EMBEDDER_API inline void gc_atomic_forward_acquire(struct gc_atomic_forward *);
|
||||
GC_EMBEDDER_API inline int gc_atomic_forward_retry_busy(struct gc_atomic_forward *);
|
||||
GC_EMBEDDER_API inline void gc_atomic_forward_abort(struct gc_atomic_forward *);
|
||||
GC_EMBEDDER_API inline size_t gc_atomic_forward_object_size(struct gc_atomic_forward *);
|
||||
GC_EMBEDDER_API inline void gc_atomic_forward_commit(struct gc_atomic_forward *,
|
||||
struct gc_ref new_ref);
|
||||
GC_EMBEDDER_API inline uintptr_t gc_atomic_forward_address(struct gc_atomic_forward *);
|
||||
|
||||
|
||||
#endif // GC_EMBEDDER_API_H
|
42
libguile/whippet/api/gc-ephemeron.h
Normal file
42
libguile/whippet/api/gc-ephemeron.h
Normal file
|
@ -0,0 +1,42 @@
|
|||
#ifndef GC_EPHEMERON_H_
|
||||
#define GC_EPHEMERON_H_
|
||||
|
||||
#include "gc-edge.h"
|
||||
#include "gc-ref.h"
|
||||
#include "gc-visibility.h"
|
||||
|
||||
// Ephemerons establish an association between a "key" object and a
|
||||
// "value" object. If the ephemeron and the key are live, then the
|
||||
// value is live, and can be retrieved from the ephemeron. Ephemerons
|
||||
// can be chained together, which allows them to function as links in a
|
||||
// buckets-and-chains hash table.
|
||||
//
|
||||
// This file defines the user-facing API for ephemerons.
|
||||
|
||||
struct gc_heap;
|
||||
struct gc_mutator;
|
||||
struct gc_ephemeron;
|
||||
|
||||
GC_API_ size_t gc_ephemeron_size(void);
|
||||
GC_API_ struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut);
|
||||
GC_API_ void gc_ephemeron_init(struct gc_mutator *mut,
|
||||
struct gc_ephemeron *ephemeron,
|
||||
struct gc_ref key, struct gc_ref value);
|
||||
|
||||
GC_API_ struct gc_ref gc_ephemeron_key(struct gc_ephemeron *ephemeron);
|
||||
GC_API_ struct gc_ref gc_ephemeron_value(struct gc_ephemeron *ephemeron);
|
||||
|
||||
GC_API_ struct gc_ephemeron* gc_ephemeron_chain_head(struct gc_ephemeron **loc);
|
||||
GC_API_ void gc_ephemeron_chain_push(struct gc_ephemeron **loc,
|
||||
struct gc_ephemeron *ephemeron);
|
||||
GC_API_ struct gc_ephemeron* gc_ephemeron_chain_next(struct gc_ephemeron *ephemeron);
|
||||
GC_API_ void gc_ephemeron_mark_dead(struct gc_ephemeron *ephemeron);
|
||||
|
||||
GC_API_ void gc_trace_ephemeron(struct gc_ephemeron *ephemeron,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data);
|
||||
|
||||
#endif // GC_EPHEMERON_H_
|
145
libguile/whippet/api/gc-event-listener-chain.h
Normal file
145
libguile/whippet/api/gc-event-listener-chain.h
Normal file
|
@ -0,0 +1,145 @@
|
|||
#ifndef GC_EVENT_LISTENER_CHAIN_H
|
||||
#define GC_EVENT_LISTENER_CHAIN_H
|
||||
|
||||
#include "gc-event-listener.h"
|
||||
|
||||
struct gc_event_listener_chain {
|
||||
struct gc_event_listener head; void *head_data;
|
||||
struct gc_event_listener tail; void *tail_data;
|
||||
};
|
||||
|
||||
struct gc_event_listener_chain_mutator {
|
||||
struct gc_event_listener_chain *chain;
|
||||
void *head_mutator_data;
|
||||
void *tail_mutator_data;
|
||||
};
|
||||
|
||||
static inline void gc_event_listener_chain_init(void *data, size_t heap_size) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.init(chain->head_data, heap_size);
|
||||
chain->tail.init(chain->tail_data, heap_size);
|
||||
}
|
||||
|
||||
static inline void gc_event_listener_chain_requesting_stop(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.requesting_stop(chain->head_data);
|
||||
chain->tail.requesting_stop(chain->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_waiting_for_stop(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.waiting_for_stop(chain->head_data);
|
||||
chain->tail.waiting_for_stop(chain->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_mutators_stopped(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.mutators_stopped(chain->head_data);
|
||||
chain->tail.mutators_stopped(chain->tail_data);
|
||||
}
|
||||
static inline void
|
||||
gc_event_listener_chain_prepare_gc(void *data, enum gc_collection_kind kind) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.prepare_gc(chain->head_data, kind);
|
||||
chain->tail.prepare_gc(chain->tail_data, kind);
|
||||
}
|
||||
static inline void gc_event_listener_chain_roots_traced(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.roots_traced(chain->head_data);
|
||||
chain->tail.roots_traced(chain->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_heap_traced(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.heap_traced(chain->head_data);
|
||||
chain->tail.heap_traced(chain->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_ephemerons_traced(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.ephemerons_traced(chain->head_data);
|
||||
chain->tail.ephemerons_traced(chain->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_finalizers_traced(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.finalizers_traced(chain->head_data);
|
||||
chain->tail.finalizers_traced(chain->tail_data);
|
||||
}
|
||||
|
||||
static inline void gc_event_listener_chain_restarting_mutators(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.restarting_mutators(chain->head_data);
|
||||
chain->tail.restarting_mutators(chain->tail_data);
|
||||
}
|
||||
|
||||
static inline void* gc_event_listener_chain_mutator_added(void *data) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
struct gc_event_listener_chain_mutator *mutator = malloc(sizeof(*mutator));;
|
||||
if (!mutator) abort();
|
||||
mutator->chain = chain;
|
||||
mutator->head_mutator_data = chain->head.mutator_added(chain->head_data);
|
||||
mutator->tail_mutator_data = chain->tail.mutator_added(chain->tail_data);
|
||||
return mutator;
|
||||
}
|
||||
|
||||
static inline void gc_event_listener_chain_mutator_cause_gc(void *mutator_data) {
|
||||
struct gc_event_listener_chain_mutator *mutator = mutator_data;
|
||||
mutator->chain->head.restarting_mutators(mutator->head_data);
|
||||
mutator->chain->tail.restarting_mutators(mutator->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_mutator_stopping(void *mutator_data) {
|
||||
struct gc_event_listener_chain_mutator *mutator = mutator_data;
|
||||
mutator->chain->head.mutator_stopping(mutator->head_data);
|
||||
mutator->chain->tail.mutator_stopping(mutator->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_mutator_stopped(void *mutator_data) {
|
||||
struct gc_event_listener_chain_mutator *mutator = mutator_data;
|
||||
mutator->chain->head.mutator_stopped(mutator->head_data);
|
||||
mutator->chain->tail.mutator_stopped(mutator->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_mutator_restarted(void *mutator_data) {
|
||||
struct gc_event_listener_chain_mutator *mutator = mutator_data;
|
||||
mutator->chain->head.mutator_restarted(mutator->head_data);
|
||||
mutator->chain->tail.mutator_restarted(mutator->tail_data);
|
||||
}
|
||||
static inline void gc_event_listener_chain_mutator_removed(void *mutator_data) {
|
||||
struct gc_event_listener_chain_mutator *mutator = mutator_data;
|
||||
mutator->chain->head.mutator_removed(mutator->head_data);
|
||||
mutator->chain->tail.mutator_removed(mutator->tail_data);
|
||||
free(mutator);
|
||||
}
|
||||
|
||||
static inline void gc_event_listener_chain_heap_resized(void *data, size_t size) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.heap_resized(chain->head_data, size);
|
||||
chain->tail.heap_resized(chain->tail_data, size);
|
||||
}
|
||||
|
||||
static inline void gc_event_listener_chain_live_data_size(void *data, size_t size) {
|
||||
struct gc_event_listener_chain *chain = data;
|
||||
chain->head.live_data_size(chain->head_data, size);
|
||||
chain->tail.live_data_size(chain->tail_data, size);
|
||||
}
|
||||
|
||||
#define GC_EVENT_LISTENER_CHAIN \
|
||||
((struct gc_event_listener) { \
|
||||
gc_event_listener_chain_init, \
|
||||
gc_event_listener_chain_requesting_stop, \
|
||||
gc_event_listener_chain_waiting_for_stop, \
|
||||
gc_event_listener_chain_mutators_stopped, \
|
||||
gc_event_listener_chain_prepare_gc, \
|
||||
gc_event_listener_chain_roots_traced, \
|
||||
gc_event_listener_chain_heap_traced, \
|
||||
gc_event_listener_chain_ephemerons_traced, \
|
||||
gc_event_listener_chain_finalizers_traced, \
|
||||
gc_event_listener_chain_restarting_mutators, \
|
||||
gc_event_listener_chain_mutator_added, \
|
||||
gc_event_listener_chain_mutator_cause_gc, \
|
||||
gc_event_listener_chain_mutator_stopping, \
|
||||
gc_event_listener_chain_mutator_stopped, \
|
||||
gc_event_listener_chain_mutator_restarted, \
|
||||
gc_event_listener_chain_mutator_removed, \
|
||||
gc_event_listener_chain_heap_resized, \
|
||||
gc_event_listener_chain_live_data_size, \
|
||||
})
|
||||
|
||||
#define GC_EVENT_LISTENER_CHAIN_DATA(head, head_data, tail, tail_data) \
|
||||
((struct gc_event_listener_chain_data){head, head_data, tail, tail_data})
|
||||
|
||||
#endif // GC_EVENT_LISTENER_CHAIN_H
|
29
libguile/whippet/api/gc-event-listener.h
Normal file
29
libguile/whippet/api/gc-event-listener.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef GC_EVENT_LISTENER_H
|
||||
#define GC_EVENT_LISTENER_H
|
||||
|
||||
#include "gc-collection-kind.h"
|
||||
|
||||
struct gc_event_listener {
|
||||
void (*init)(void *data, size_t heap_size);
|
||||
void (*requesting_stop)(void *data);
|
||||
void (*waiting_for_stop)(void *data);
|
||||
void (*mutators_stopped)(void *data);
|
||||
void (*prepare_gc)(void *data, enum gc_collection_kind kind);
|
||||
void (*roots_traced)(void *data);
|
||||
void (*heap_traced)(void *data);
|
||||
void (*ephemerons_traced)(void *data);
|
||||
void (*finalizers_traced)(void *data);
|
||||
void (*restarting_mutators)(void *data);
|
||||
|
||||
void* (*mutator_added)(void *data);
|
||||
void (*mutator_cause_gc)(void *mutator_data);
|
||||
void (*mutator_stopping)(void *mutator_data);
|
||||
void (*mutator_stopped)(void *mutator_data);
|
||||
void (*mutator_restarted)(void *mutator_data);
|
||||
void (*mutator_removed)(void *mutator_data);
|
||||
|
||||
void (*heap_resized)(void *data, size_t size);
|
||||
void (*live_data_size)(void *data, size_t size);
|
||||
};
|
||||
|
||||
#endif // GC_EVENT_LISTENER_H
|
81
libguile/whippet/api/gc-finalizer.h
Normal file
81
libguile/whippet/api/gc-finalizer.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
#ifndef GC_FINALIZER_H_
|
||||
#define GC_FINALIZER_H_
|
||||
|
||||
#include "gc-edge.h"
|
||||
#include "gc-ref.h"
|
||||
#include "gc-visibility.h"
|
||||
|
||||
// A finalizer allows the embedder to be notified when an object becomes
|
||||
// unreachable.
|
||||
//
|
||||
// A finalizer has a priority. When the heap is created, the embedder
|
||||
// should declare how many priorities there are. Lower-numbered
|
||||
// priorities take precedence; if an object has a priority-0 finalizer
|
||||
// outstanding, that will prevent any finalizer at level 1 (or 2, ...)
|
||||
// from firing until no priority-0 finalizer remains.
|
||||
//
|
||||
// Call gc_attach_finalizer to attach a finalizer to an object.
|
||||
//
|
||||
// A finalizer also references an associated GC-managed closure object.
|
||||
// A finalizer's reference to the closure object is strong: if a
|
||||
// finalizer's closure closure references its finalizable object,
|
||||
// directly or indirectly, the finalizer will never fire.
|
||||
//
|
||||
// When an object with a finalizer becomes unreachable, it is added to a
|
||||
// queue. The embedder can call gc_pop_finalizable to get the next
|
||||
// finalizable object and its associated closure. At that point the
|
||||
// embedder can do anything with the object, including keeping it alive.
|
||||
// Ephemeron associations will still be present while the finalizable
|
||||
// object is live. Note however that any objects referenced by the
|
||||
// finalizable object may themselves be already finalized; finalizers
|
||||
// are enqueued for objects when they become unreachable, which can
|
||||
// concern whole subgraphs of objects at once.
|
||||
//
|
||||
// The usual way for an embedder to know when the queue of finalizable
|
||||
// object is non-empty is to call gc_set_finalizer_callback to
|
||||
// provide a function that will be invoked when there are pending
|
||||
// finalizers.
|
||||
//
|
||||
// Arranging to call gc_pop_finalizable and doing something with the
|
||||
// finalizable object and closure is the responsibility of the embedder.
|
||||
// The embedder's finalization action can end up invoking arbitrary
|
||||
// code, so unless the embedder imposes some kind of restriction on what
|
||||
// finalizers can do, generally speaking finalizers should be run in a
|
||||
// dedicated thread instead of recursively from within whatever mutator
|
||||
// thread caused GC. Setting up such a thread is the responsibility of
|
||||
// the mutator. gc_pop_finalizable is thread-safe, allowing multiple
|
||||
// finalization threads if that is appropriate.
|
||||
//
|
||||
// gc_allocate_finalizer returns a finalizer, which is a fresh
|
||||
// GC-managed heap object. The mutator should then directly attach it
|
||||
// to an object using gc_finalizer_attach. When the finalizer is fired,
|
||||
// it becomes available to the mutator via gc_pop_finalizable.
|
||||
|
||||
struct gc_heap;
|
||||
struct gc_mutator;
|
||||
struct gc_finalizer;
|
||||
|
||||
GC_API_ size_t gc_finalizer_size(void);
|
||||
GC_API_ struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut);
|
||||
GC_API_ void gc_finalizer_attach(struct gc_mutator *mut,
|
||||
struct gc_finalizer *finalizer,
|
||||
unsigned priority,
|
||||
struct gc_ref object, struct gc_ref closure);
|
||||
|
||||
GC_API_ struct gc_ref gc_finalizer_object(struct gc_finalizer *finalizer);
|
||||
GC_API_ struct gc_ref gc_finalizer_closure(struct gc_finalizer *finalizer);
|
||||
|
||||
GC_API_ struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut);
|
||||
|
||||
typedef void (*gc_finalizer_callback)(struct gc_heap *heap, size_t count);
|
||||
GC_API_ void gc_set_finalizer_callback(struct gc_heap *heap,
|
||||
gc_finalizer_callback callback);
|
||||
|
||||
GC_API_ void gc_trace_finalizer(struct gc_finalizer *finalizer,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data);
|
||||
|
||||
#endif // GC_FINALIZER_H_
|
20
libguile/whippet/api/gc-forwarding.h
Normal file
20
libguile/whippet/api/gc-forwarding.h
Normal file
|
@ -0,0 +1,20 @@
|
|||
#ifndef GC_FORWARDING_H
|
||||
#define GC_FORWARDING_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "gc-ref.h"
|
||||
|
||||
enum gc_forwarding_state {
|
||||
GC_FORWARDING_STATE_FORWARDED,
|
||||
GC_FORWARDING_STATE_BUSY,
|
||||
GC_FORWARDING_STATE_ACQUIRED,
|
||||
GC_FORWARDING_STATE_NOT_FORWARDED
|
||||
};
|
||||
|
||||
struct gc_atomic_forward {
|
||||
struct gc_ref ref;
|
||||
uintptr_t data;
|
||||
enum gc_forwarding_state state;
|
||||
};
|
||||
|
||||
#endif // GC_FORWARDING_H
|
82
libguile/whippet/api/gc-histogram.h
Normal file
82
libguile/whippet/api/gc-histogram.h
Normal file
|
@ -0,0 +1,82 @@
|
|||
#ifndef GC_HISTOGRAM_H
|
||||
#define GC_HISTOGRAM_H
|
||||
|
||||
#include "gc-assert.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static inline size_t gc_histogram_bucket(uint64_t max_value_bits,
|
||||
uint64_t precision,
|
||||
uint64_t val) {
|
||||
uint64_t major = val < (1ULL << precision)
|
||||
? 0ULL
|
||||
: 64ULL - __builtin_clzl(val) - precision;
|
||||
uint64_t minor = val < (1 << precision)
|
||||
? val
|
||||
: (val >> (major - 1ULL)) & ((1ULL << precision) - 1ULL);
|
||||
uint64_t idx = (major << precision) | minor;
|
||||
if (idx >= (max_value_bits << precision))
|
||||
idx = max_value_bits << precision;
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline uint64_t gc_histogram_bucket_min_val(uint64_t precision,
|
||||
size_t idx) {
|
||||
uint64_t major = idx >> precision;
|
||||
uint64_t minor = idx & ((1ULL << precision) - 1ULL);
|
||||
uint64_t min_val = major
|
||||
? ((1ULL << precision) | minor) << (major - 1ULL)
|
||||
: minor;
|
||||
return min_val;
|
||||
}
|
||||
|
||||
#define GC_DEFINE_HISTOGRAM(name, max_value_bits, precision) \
|
||||
struct name { uint32_t buckets[((max_value_bits) << (precision)) + 1]; }; \
|
||||
static inline size_t name##_size(void) { \
|
||||
return ((max_value_bits) << (precision)) + 1; \
|
||||
} \
|
||||
static inline uint64_t name##_bucket_min_val(size_t idx) { \
|
||||
GC_ASSERT(idx < name##_size()); \
|
||||
return gc_histogram_bucket_min_val((precision), idx); \
|
||||
} \
|
||||
static inline struct name make_##name(void) { \
|
||||
return (struct name) { { 0, }}; \
|
||||
} \
|
||||
static inline void name##_record(struct name *h, uint64_t val) { \
|
||||
h->buckets[gc_histogram_bucket((max_value_bits), (precision), val)]++; \
|
||||
} \
|
||||
static inline uint64_t name##_ref(struct name *h, size_t idx) { \
|
||||
GC_ASSERT(idx < name##_size()); \
|
||||
return h->buckets[idx]; \
|
||||
} \
|
||||
static inline uint64_t name##_min(struct name *h) { \
|
||||
for (size_t bucket = 0; bucket < name##_size(); bucket++) \
|
||||
if (h->buckets[bucket]) return name##_bucket_min_val(bucket); \
|
||||
return -1; \
|
||||
} \
|
||||
static inline uint64_t name##_max(struct name *h) { \
|
||||
if (h->buckets[name##_size()-1]) return -1LL; \
|
||||
for (ssize_t bucket = name##_size() - 1; bucket >= 0; bucket--) \
|
||||
if (h->buckets[bucket]) return name##_bucket_min_val(bucket+1); \
|
||||
return 0; \
|
||||
} \
|
||||
static inline uint64_t name##_count(struct name *h) { \
|
||||
uint64_t sum = 0; \
|
||||
for (size_t bucket = 0; bucket < name##_size(); bucket++) \
|
||||
sum += h->buckets[bucket]; \
|
||||
return sum; \
|
||||
} \
|
||||
static inline uint64_t name##_percentile(struct name *h, double p) { \
|
||||
uint64_t n = name##_count(h) * p; \
|
||||
uint64_t sum = 0; \
|
||||
for (size_t bucket = 0; bucket + 1 < name##_size(); bucket++) { \
|
||||
sum += h->buckets[bucket]; \
|
||||
if (sum >= n) return name##_bucket_min_val(bucket+1); \
|
||||
} \
|
||||
return -1ULL; \
|
||||
} \
|
||||
static inline uint64_t name##_median(struct name *h) { \
|
||||
return name##_percentile(h, 0.5); \
|
||||
}
|
||||
|
||||
#endif // GC_HISTOGRAM_H
|
7
libguile/whippet/api/gc-inline.h
Normal file
7
libguile/whippet/api/gc-inline.h
Normal file
|
@ -0,0 +1,7 @@
|
|||
#ifndef GC_INLINE_H_
|
||||
#define GC_INLINE_H_
|
||||
|
||||
#define GC_ALWAYS_INLINE __attribute__((always_inline))
|
||||
#define GC_NEVER_INLINE __attribute__((noinline))
|
||||
|
||||
#endif // GC_INLINE_H_
|
100
libguile/whippet/api/gc-lttng.h
Normal file
100
libguile/whippet/api/gc-lttng.h
Normal file
|
@ -0,0 +1,100 @@
|
|||
#define LTTNG_UST_TRACEPOINT_PROVIDER whippet
|
||||
|
||||
#undef LTTNG_UST_TRACEPOINT_INCLUDE
|
||||
#define LTTNG_UST_TRACEPOINT_INCLUDE "gc-lttng.h"
|
||||
|
||||
#if !defined(_TP_H) || defined(LTTNG_UST_TRACEPOINT_HEADER_MULTI_READ)
|
||||
#define _TP_H
|
||||
|
||||
#include <lttng/tracepoint.h>
|
||||
|
||||
LTTNG_UST_TRACEPOINT_ENUM(
|
||||
whippet, gc_kind,
|
||||
LTTNG_UST_TP_ENUM_VALUES
|
||||
(lttng_ust_field_enum_value("MINOR", 1)
|
||||
lttng_ust_field_enum_value("MAJOR", 2)
|
||||
lttng_ust_field_enum_value("COMPACTING", 3)))
|
||||
|
||||
LTTNG_UST_TRACEPOINT_EVENT_CLASS(
|
||||
whippet, tracepoint,
|
||||
LTTNG_UST_TP_ARGS(),
|
||||
LTTNG_UST_TP_FIELDS())
|
||||
|
||||
LTTNG_UST_TRACEPOINT_EVENT_CLASS(
|
||||
whippet, size_tracepoint,
|
||||
LTTNG_UST_TP_ARGS(size_t, size),
|
||||
LTTNG_UST_TP_FIELDS(lttng_ust_field_integer(size_t, size, size)))
|
||||
|
||||
|
||||
/* The tracepoint instances */
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, size_tracepoint, whippet, init,
|
||||
LTTNG_UST_TP_ARGS(size_t, size))
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, size_tracepoint, whippet, heap_resized,
|
||||
LTTNG_UST_TP_ARGS(size_t, size))
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, size_tracepoint, whippet, live_data_size,
|
||||
LTTNG_UST_TP_ARGS(size_t, size))
|
||||
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, requesting_stop, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, waiting_for_stop, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutators_stopped, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT(
|
||||
whippet, prepare_gc,
|
||||
LTTNG_UST_TP_ARGS(int, gc_kind),
|
||||
LTTNG_UST_TP_FIELDS(
|
||||
lttng_ust_field_enum(whippet, gc_kind, int, gc_kind, gc_kind)))
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, roots_traced, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, heap_traced, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, ephemerons_traced, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, finalizers_traced, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, restarting_mutators, LTTNG_UST_TP_ARGS())
|
||||
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutator_added, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutator_cause_gc, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutator_stopping, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutator_stopped, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutator_restarted, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, mutator_removed, LTTNG_UST_TP_ARGS())
|
||||
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_unpark_all, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_share, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_check_termination_begin, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_check_termination_end, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_steal, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_roots_begin, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_roots_end, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_objects_begin, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_objects_end, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_worker_begin, LTTNG_UST_TP_ARGS())
|
||||
LTTNG_UST_TRACEPOINT_EVENT_INSTANCE(
|
||||
whippet, tracepoint, whippet, trace_worker_end, LTTNG_UST_TP_ARGS())
|
||||
|
||||
#endif /* _TP_H */
|
||||
|
||||
#include <lttng/tracepoint-event.h>
|
50
libguile/whippet/api/gc-null-event-listener.h
Normal file
50
libguile/whippet/api/gc-null-event-listener.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
#ifndef GC_NULL_EVENT_LISTENER_H
|
||||
#define GC_NULL_EVENT_LISTENER_H
|
||||
|
||||
#include "gc-event-listener.h"
|
||||
|
||||
static inline void gc_null_event_listener_init(void *data, size_t size) {}
|
||||
static inline void gc_null_event_listener_requesting_stop(void *data) {}
|
||||
static inline void gc_null_event_listener_waiting_for_stop(void *data) {}
|
||||
static inline void gc_null_event_listener_mutators_stopped(void *data) {}
|
||||
static inline void gc_null_event_listener_prepare_gc(void *data,
|
||||
enum gc_collection_kind) {}
|
||||
static inline void gc_null_event_listener_roots_traced(void *data) {}
|
||||
static inline void gc_null_event_listener_heap_traced(void *data) {}
|
||||
static inline void gc_null_event_listener_ephemerons_traced(void *data) {}
|
||||
static inline void gc_null_event_listener_finalizers_traced(void *data) {}
|
||||
static inline void gc_null_event_listener_restarting_mutators(void *data) {}
|
||||
|
||||
static inline void* gc_null_event_listener_mutator_added(void *data) {}
|
||||
static inline void gc_null_event_listener_mutator_cause_gc(void *mutator_data) {}
|
||||
static inline void gc_null_event_listener_mutator_stopping(void *mutator_data) {}
|
||||
static inline void gc_null_event_listener_mutator_stopped(void *mutator_data) {}
|
||||
static inline void gc_null_event_listener_mutator_restarted(void *mutator_data) {}
|
||||
static inline void gc_null_event_listener_mutator_removed(void *mutator_data) {}
|
||||
|
||||
static inline void gc_null_event_listener_heap_resized(void *, size_t) {}
|
||||
static inline void gc_null_event_listener_live_data_size(void *, size_t) {}
|
||||
|
||||
#define GC_NULL_EVENT_LISTENER \
|
||||
((struct gc_event_listener) { \
|
||||
gc_null_event_listener_init, \
|
||||
gc_null_event_listener_requesting_stop, \
|
||||
gc_null_event_listener_waiting_for_stop, \
|
||||
gc_null_event_listener_mutators_stopped, \
|
||||
gc_null_event_listener_prepare_gc, \
|
||||
gc_null_event_listener_roots_traced, \
|
||||
gc_null_event_listener_heap_traced, \
|
||||
gc_null_event_listener_ephemerons_traced, \
|
||||
gc_null_event_listener_finalizers_traced, \
|
||||
gc_null_event_listener_restarting_mutators, \
|
||||
gc_null_event_listener_mutator_added, \
|
||||
gc_null_event_listener_mutator_cause_gc, \
|
||||
gc_null_event_listener_mutator_stopping, \
|
||||
gc_null_event_listener_mutator_stopped, \
|
||||
gc_null_event_listener_mutator_restarted, \
|
||||
gc_null_event_listener_mutator_removed, \
|
||||
gc_null_event_listener_heap_resized, \
|
||||
gc_null_event_listener_live_data_size, \
|
||||
})
|
||||
|
||||
#endif // GC_NULL_EVENT_LISTENER_H_
|
39
libguile/whippet/api/gc-options.h
Normal file
39
libguile/whippet/api/gc-options.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#ifndef GC_OPTIONS_H
|
||||
#define GC_OPTIONS_H
|
||||
|
||||
#include "gc-visibility.h"
|
||||
|
||||
enum gc_heap_size_policy {
|
||||
GC_HEAP_SIZE_FIXED,
|
||||
GC_HEAP_SIZE_GROWABLE,
|
||||
GC_HEAP_SIZE_ADAPTIVE,
|
||||
};
|
||||
|
||||
enum {
|
||||
GC_OPTION_HEAP_SIZE_POLICY,
|
||||
GC_OPTION_HEAP_SIZE,
|
||||
GC_OPTION_MAXIMUM_HEAP_SIZE,
|
||||
GC_OPTION_HEAP_SIZE_MULTIPLIER,
|
||||
GC_OPTION_HEAP_EXPANSIVENESS,
|
||||
GC_OPTION_PARALLELISM
|
||||
};
|
||||
|
||||
struct gc_options;
|
||||
|
||||
GC_API_ int gc_option_from_string(const char *str);
|
||||
|
||||
GC_API_ struct gc_options* gc_allocate_options(void);
|
||||
|
||||
GC_API_ int gc_options_set_int(struct gc_options *options, int option,
|
||||
int value);
|
||||
GC_API_ int gc_options_set_size(struct gc_options *options, int option,
|
||||
size_t value);
|
||||
GC_API_ int gc_options_set_double(struct gc_options *options, int option,
|
||||
double value);
|
||||
|
||||
GC_API_ int gc_options_parse_and_set(struct gc_options *options,
|
||||
int option, const char *value);
|
||||
GC_API_ int gc_options_parse_and_set_many(struct gc_options *options,
|
||||
const char *str);
|
||||
|
||||
#endif // GC_OPTIONS_H
|
50
libguile/whippet/api/gc-ref.h
Normal file
50
libguile/whippet/api/gc-ref.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
#ifndef GC_REF_H
|
||||
#define GC_REF_H
|
||||
|
||||
#include "gc-assert.h"
|
||||
#include "gc-config.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct gc_ref {
|
||||
uintptr_t value;
|
||||
};
|
||||
|
||||
static inline struct gc_ref gc_ref(uintptr_t value) {
|
||||
return (struct gc_ref){value};
|
||||
}
|
||||
static inline uintptr_t gc_ref_value(struct gc_ref ref) {
|
||||
return ref.value;
|
||||
}
|
||||
|
||||
static inline struct gc_ref gc_ref_null(void) {
|
||||
return gc_ref(0);
|
||||
}
|
||||
static inline int gc_ref_is_null(struct gc_ref ref) {
|
||||
return ref.value == 0;
|
||||
}
|
||||
static inline int gc_ref_is_immediate(struct gc_ref ref) {
|
||||
GC_ASSERT(!gc_ref_is_null(ref));
|
||||
return GC_HAS_IMMEDIATES && (ref.value & (sizeof(void*) - 1));
|
||||
}
|
||||
static inline struct gc_ref gc_ref_immediate(uintptr_t val) {
|
||||
GC_ASSERT(val & (sizeof(void*) - 1));
|
||||
GC_ASSERT(GC_HAS_IMMEDIATES);
|
||||
return gc_ref(val);
|
||||
}
|
||||
static inline int gc_ref_is_heap_object(struct gc_ref ref) {
|
||||
return !gc_ref_is_immediate(ref);
|
||||
}
|
||||
static inline struct gc_ref gc_ref_from_heap_object_or_null(void *obj) {
|
||||
return gc_ref((uintptr_t) obj);
|
||||
}
|
||||
static inline struct gc_ref gc_ref_from_heap_object(void *obj) {
|
||||
GC_ASSERT(obj);
|
||||
return gc_ref_from_heap_object_or_null(obj);
|
||||
}
|
||||
static inline void* gc_ref_heap_object(struct gc_ref ref) {
|
||||
GC_ASSERT(gc_ref_is_heap_object(ref));
|
||||
return (void *) gc_ref_value(ref);
|
||||
}
|
||||
|
||||
#endif // GC_REF_H
|
17
libguile/whippet/api/gc-tracepoint.h
Normal file
17
libguile/whippet/api/gc-tracepoint.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef GC_TRACEPOINT_H
|
||||
#define GC_TRACEPOINT_H
|
||||
|
||||
#ifdef GC_TRACEPOINT_LTTNG
|
||||
|
||||
#include "gc-lttng.h"
|
||||
|
||||
#define GC_TRACEPOINT(...) \
|
||||
lttng_ust_tracepoint(whippet, __VA_ARGS__)
|
||||
|
||||
#else // GC_TRACEPOINT_LTTNG
|
||||
|
||||
#define GC_TRACEPOINT(...) do {} while (0)
|
||||
|
||||
#endif // GC_TRACEPOINT_LTTNG
|
||||
|
||||
#endif // GC_TRACEPOINT_H
|
12
libguile/whippet/api/gc-visibility.h
Normal file
12
libguile/whippet/api/gc-visibility.h
Normal file
|
@ -0,0 +1,12 @@
|
|||
#ifndef GC_VISIBILITY_H_
|
||||
#define GC_VISIBILITY_H_
|
||||
|
||||
#define GC_INTERNAL __attribute__((visibility("hidden")))
|
||||
#define GC_PUBLIC __attribute__((visibility("default")))
|
||||
|
||||
// FIXME: Conflict with bdw-gc GC_API. Switch prefix?
|
||||
#ifndef GC_API_
|
||||
#define GC_API_ GC_INTERNAL
|
||||
#endif
|
||||
|
||||
#endif // GC_VISIBILITY_H
|
121
libguile/whippet/api/mmc-attrs.h
Normal file
121
libguile/whippet/api/mmc-attrs.h
Normal file
|
@ -0,0 +1,121 @@
|
|||
#ifndef MMC_ATTRS_H
|
||||
#define MMC_ATTRS_H
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "gc-assert.h"
|
||||
#include "gc-attrs.h"
|
||||
|
||||
static inline enum gc_allocator_kind gc_allocator_kind(void) {
|
||||
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
|
||||
}
|
||||
static inline size_t gc_allocator_small_granule_size(void) {
|
||||
return 16;
|
||||
}
|
||||
static inline size_t gc_allocator_large_threshold(void) {
|
||||
return 8192;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_allocation_pointer_offset(void) {
|
||||
return sizeof(uintptr_t) * 0;
|
||||
}
|
||||
static inline size_t gc_allocator_allocation_limit_offset(void) {
|
||||
return sizeof(uintptr_t) * 1;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_freelist_offset(size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_alloc_table_alignment(void) {
|
||||
return 4 * 1024 * 1024;
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) {
|
||||
uint8_t young = 1;
|
||||
uint8_t trace_precisely = 0;
|
||||
uint8_t trace_none = 8;
|
||||
uint8_t trace_conservatively = 16;
|
||||
uint8_t pinned = 16;
|
||||
if (GC_CONSERVATIVE_TRACE) {
|
||||
switch (kind) {
|
||||
case GC_ALLOCATION_TAGGED:
|
||||
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
|
||||
return young | trace_conservatively;
|
||||
case GC_ALLOCATION_TAGGED_POINTERLESS:
|
||||
return young | trace_none;
|
||||
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
|
||||
return young | trace_none;
|
||||
default:
|
||||
GC_CRASH();
|
||||
};
|
||||
} else {
|
||||
switch (kind) {
|
||||
case GC_ALLOCATION_TAGGED:
|
||||
return young | trace_precisely;
|
||||
case GC_ALLOCATION_TAGGED_POINTERLESS:
|
||||
return young | trace_none;
|
||||
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
|
||||
return young | trace_none | pinned;
|
||||
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
|
||||
default:
|
||||
GC_CRASH();
|
||||
};
|
||||
}
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
|
||||
return 32;
|
||||
}
|
||||
|
||||
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t obj_size) {
|
||||
if (GC_GENERATIONAL) {
|
||||
if (obj_size <= gc_allocator_large_threshold())
|
||||
return GC_OLD_GENERATION_CHECK_ALLOC_TABLE;
|
||||
return GC_OLD_GENERATION_CHECK_SLOW;
|
||||
}
|
||||
return GC_OLD_GENERATION_CHECK_NONE;
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
|
||||
return 7;
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
|
||||
if (GC_GENERATIONAL) {
|
||||
if (obj_size <= gc_allocator_large_threshold())
|
||||
return GC_WRITE_BARRIER_FIELD;
|
||||
return GC_WRITE_BARRIER_SLOW;
|
||||
}
|
||||
return GC_WRITE_BARRIER_NONE;
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_table_alignment(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return gc_allocator_alloc_table_alignment();
|
||||
}
|
||||
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 0;
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 2;
|
||||
}
|
||||
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 64; // NOFL_METADATA_BYTE_LOGGED_0
|
||||
}
|
||||
|
||||
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
|
||||
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
|
||||
}
|
||||
|
||||
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
|
||||
return GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG;
|
||||
}
|
||||
|
||||
static inline int gc_can_pin_objects(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // MMC_ATTRS_H
|
92
libguile/whippet/api/pcc-attrs.h
Normal file
92
libguile/whippet/api/pcc-attrs.h
Normal file
|
@ -0,0 +1,92 @@
|
|||
#ifndef PCC_ATTRS_H
|
||||
#define PCC_ATTRS_H
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "gc-assert.h"
|
||||
#include "gc-attrs.h"
|
||||
|
||||
static const uintptr_t GC_ALIGNMENT = 8;
|
||||
static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192;
|
||||
|
||||
static inline enum gc_allocator_kind gc_allocator_kind(void) {
|
||||
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
|
||||
}
|
||||
static inline size_t gc_allocator_small_granule_size(void) {
|
||||
return GC_ALIGNMENT;
|
||||
}
|
||||
static inline size_t gc_allocator_large_threshold(void) {
|
||||
return GC_LARGE_OBJECT_THRESHOLD;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_allocation_pointer_offset(void) {
|
||||
return sizeof(uintptr_t) * 0;
|
||||
}
|
||||
static inline size_t gc_allocator_allocation_limit_offset(void) {
|
||||
return sizeof(uintptr_t) * 1;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_freelist_offset(size_t size, enum gc_allocation_kind kind) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_alloc_table_alignment(void) {
|
||||
return 0;
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t size) {
|
||||
if (!GC_GENERATIONAL)
|
||||
return GC_OLD_GENERATION_CHECK_NONE;
|
||||
if (size <= gc_allocator_large_threshold())
|
||||
return GC_OLD_GENERATION_CHECK_SMALL_OBJECT_NURSERY;
|
||||
return GC_OLD_GENERATION_CHECK_SLOW;
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t obj_size) {
|
||||
if (!GC_GENERATIONAL)
|
||||
return GC_WRITE_BARRIER_NONE;
|
||||
if (obj_size <= gc_allocator_large_threshold())
|
||||
return GC_WRITE_BARRIER_FIELD;
|
||||
return GC_WRITE_BARRIER_SLOW;
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_table_alignment(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 64 * 1024 * 1024;
|
||||
}
|
||||
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 128 * 1024;
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 8;
|
||||
}
|
||||
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
|
||||
GC_ASSERT(GC_GENERATIONAL);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
|
||||
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
|
||||
}
|
||||
|
||||
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
|
||||
return GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG;
|
||||
}
|
||||
|
||||
static inline int gc_can_pin_objects(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // PCC_ATTRS_H
|
80
libguile/whippet/api/semi-attrs.h
Normal file
80
libguile/whippet/api/semi-attrs.h
Normal file
|
@ -0,0 +1,80 @@
|
|||
#ifndef SEMI_ATTRS_H
|
||||
#define SEMI_ATTRS_H
|
||||
|
||||
#include "gc-attrs.h"
|
||||
#include "gc-assert.h"
|
||||
|
||||
static const uintptr_t GC_ALIGNMENT = 8;
|
||||
static const size_t GC_LARGE_OBJECT_THRESHOLD = 8192;
|
||||
|
||||
static inline enum gc_allocator_kind gc_allocator_kind(void) {
|
||||
return GC_ALLOCATOR_INLINE_BUMP_POINTER;
|
||||
}
|
||||
static inline size_t gc_allocator_small_granule_size(void) {
|
||||
return GC_ALIGNMENT;
|
||||
}
|
||||
static inline size_t gc_allocator_large_threshold(void) {
|
||||
return GC_LARGE_OBJECT_THRESHOLD;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_allocation_pointer_offset(void) {
|
||||
return sizeof(uintptr_t) * 0;
|
||||
}
|
||||
static inline size_t gc_allocator_allocation_limit_offset(void) {
|
||||
return sizeof(uintptr_t) * 1;
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_freelist_offset(size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline size_t gc_allocator_alloc_table_alignment(void) {
|
||||
return 0;
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_begin_pattern(enum gc_allocation_kind kind) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_allocator_alloc_table_end_pattern(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_old_generation_check_kind gc_old_generation_check_kind(size_t) {
|
||||
return GC_OLD_GENERATION_CHECK_NONE;
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_tag_mask(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_old_generation_check_alloc_table_young_tag(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_write_barrier_kind gc_write_barrier_kind(size_t) {
|
||||
return GC_WRITE_BARRIER_NONE;
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_table_alignment(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline ptrdiff_t gc_write_barrier_field_table_offset(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline size_t gc_write_barrier_field_fields_per_byte(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline uint8_t gc_write_barrier_field_first_bit_pattern(void) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum gc_safepoint_mechanism gc_safepoint_mechanism(void) {
|
||||
return GC_SAFEPOINT_MECHANISM_COOPERATIVE;
|
||||
}
|
||||
|
||||
static inline enum gc_cooperative_safepoint_kind gc_cooperative_safepoint_kind(void) {
|
||||
return GC_COOPERATIVE_SAFEPOINT_NONE;
|
||||
}
|
||||
|
||||
static inline int gc_can_pin_objects(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif // SEMI_ATTRS_H
|
35
libguile/whippet/benchmarks/README.md
Normal file
35
libguile/whippet/benchmarks/README.md
Normal file
|
@ -0,0 +1,35 @@
|
|||
# Benchmarks
|
||||
|
||||
- [`mt-gcbench.c`](./mt-gcbench.c): The multi-threaded [GCBench
|
||||
benchmark](https://hboehm.info/gc/gc_bench.html). An old but
|
||||
standard benchmark that allocates different sizes of binary trees.
|
||||
As parameters it takes a heap multiplier and a number of mutator
|
||||
threads. We analytically compute the peak amount of live data and
|
||||
then size the GC heap as a multiplier of that size. It has a peak
|
||||
heap consumption of 10 MB or so per mutator thread: not very large.
|
||||
At a 2x heap multiplier, it causes about 30 collections for the `mmc`
|
||||
collector, and runs somewhere around 200-400 milliseconds in
|
||||
single-threaded mode, on the machines I have in 2022. For low thread
|
||||
counts, the GCBench benchmark is small; but then again many Guile
|
||||
processes also are quite short-lived, so perhaps it is useful to
|
||||
ensure that small heaps remain lightweight.
|
||||
|
||||
To stress `mmc`'s handling of fragmentation, we modified this
|
||||
benchmark to intersperse pseudorandomly-sized holes between tree
|
||||
nodes.
|
||||
|
||||
- [`quads.c`](./quads.c): A synthetic benchmark that allocates quad
|
||||
trees. The mutator begins by allocating one long-lived tree of depth
|
||||
N, and then allocates 13% of the heap in depth-3 trees, 20 times,
|
||||
simulating a fixed working set and otherwise an allocation-heavy
|
||||
workload. By observing the times to allocate 13% of the heap in
|
||||
garbage we can infer mutator overheads, and also note the variance
|
||||
for the cycles in which GC hits.
|
||||
|
||||
## License
|
||||
|
||||
mt-gcbench.c was originally from https://hboehm.info/gc/gc_bench/, which
|
||||
has a somewhat unclear license. I have modified GCBench significantly
|
||||
so that I can slot in different GC implementations. Other files are
|
||||
distributed under the Whippet license; see the top-level
|
||||
[README.md](../README.md) for more.
|
54
libguile/whippet/benchmarks/ephemerons-embedder.h
Normal file
54
libguile/whippet/benchmarks/ephemerons-embedder.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
#ifndef EPHEMERONS_EMBEDDER_H
|
||||
#define EPHEMERONS_EMBEDDER_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "ephemerons-types.h"
|
||||
#include "gc-ephemeron.h"
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
#define DEFINE_METHODS(name, Name, NAME) \
|
||||
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
|
||||
static inline void visit_##name##_fields(Name *obj,\
|
||||
void (*visit)(struct gc_edge edge, \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data), \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data) GC_ALWAYS_INLINE;
|
||||
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
|
||||
#undef DEFINE_METHODS
|
||||
|
||||
static inline size_t small_object_size(SmallObject *obj) { return sizeof(*obj); }
|
||||
static inline size_t ephemeron_size(Ephemeron *obj) { return gc_ephemeron_size(); }
|
||||
static inline size_t box_size(Box *obj) { return sizeof(*obj); }
|
||||
|
||||
static inline void
|
||||
visit_small_object_fields(SmallObject *obj,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {}
|
||||
|
||||
static inline void
|
||||
visit_ephemeron_fields(Ephemeron *ephemeron,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
gc_trace_ephemeron((struct gc_ephemeron*)ephemeron, visit, heap, visit_data);
|
||||
}
|
||||
|
||||
static inline void
|
||||
visit_box_fields(Box *box,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
visit(gc_edge(&box->obj), heap, visit_data);
|
||||
}
|
||||
|
||||
#include "simple-gc-embedder.h"
|
||||
|
||||
#endif // EPHEMERONS_EMBEDDER_H
|
21
libguile/whippet/benchmarks/ephemerons-types.h
Normal file
21
libguile/whippet/benchmarks/ephemerons-types.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#ifndef EPHEMERONS_TYPES_H
|
||||
#define EPHEMERONS_TYPES_H
|
||||
|
||||
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
|
||||
M(box, Box, BOX) \
|
||||
M(ephemeron, Ephemeron, EPHEMERON) \
|
||||
M(small_object, SmallObject, SMALL_OBJECT)
|
||||
|
||||
#include "heap-objects.h"
|
||||
#include "simple-tagging-scheme.h"
|
||||
|
||||
struct SmallObject {
|
||||
struct gc_header header;
|
||||
};
|
||||
|
||||
struct Box {
|
||||
struct gc_header header;
|
||||
void *obj;
|
||||
};
|
||||
|
||||
#endif // EPHEMERONS_TYPES_H
|
272
libguile/whippet/benchmarks/ephemerons.c
Normal file
272
libguile/whippet/benchmarks/ephemerons.c
Normal file
|
@ -0,0 +1,272 @@
|
|||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "gc-api.h"
|
||||
#include "gc-basic-stats.h"
|
||||
#include "gc-ephemeron.h"
|
||||
#include "simple-roots-api.h"
|
||||
#include "ephemerons-types.h"
|
||||
#include "simple-allocator.h"
|
||||
|
||||
typedef HANDLE_TO(SmallObject) SmallObjectHandle;
|
||||
typedef HANDLE_TO(struct gc_ephemeron) EphemeronHandle;
|
||||
typedef HANDLE_TO(Box) BoxHandle;
|
||||
|
||||
static SmallObject* allocate_small_object(struct gc_mutator *mut) {
|
||||
return gc_allocate_with_kind(mut, ALLOC_KIND_SMALL_OBJECT, sizeof(SmallObject));
|
||||
}
|
||||
|
||||
static Box* allocate_box(struct gc_mutator *mut) {
|
||||
return gc_allocate_with_kind(mut, ALLOC_KIND_BOX, sizeof(Box));
|
||||
}
|
||||
|
||||
static struct gc_ephemeron* allocate_ephemeron(struct gc_mutator *mut) {
|
||||
struct gc_ephemeron *ret = gc_allocate_ephemeron(mut);
|
||||
*tag_word(gc_ref_from_heap_object(ret)) = tag_live(ALLOC_KIND_EPHEMERON);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Get the current time in microseconds */
|
||||
static unsigned long current_time(void)
|
||||
{
|
||||
struct timeval t;
|
||||
if (gettimeofday(&t, NULL) == -1)
|
||||
return 0;
|
||||
return t.tv_sec * 1000 * 1000 + t.tv_usec;
|
||||
}
|
||||
|
||||
struct thread {
|
||||
struct gc_mutator *mut;
|
||||
struct gc_mutator_roots roots;
|
||||
};
|
||||
|
||||
static void print_elapsed(const char *what, unsigned long start) {
|
||||
unsigned long end = current_time();
|
||||
unsigned long msec = (end - start) / 1000;
|
||||
unsigned long usec = (end - start) % 1000;
|
||||
printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec);
|
||||
}
|
||||
|
||||
struct call_with_gc_data {
|
||||
void* (*f)(struct thread *);
|
||||
struct gc_heap *heap;
|
||||
};
|
||||
static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) {
|
||||
struct call_with_gc_data *data = arg;
|
||||
struct gc_mutator *mut = gc_init_for_thread(addr, data->heap);
|
||||
struct thread t = { mut, };
|
||||
gc_mutator_set_roots(mut, &t.roots);
|
||||
void *ret = data->f(&t);
|
||||
gc_finish_for_thread(mut);
|
||||
return ret;
|
||||
}
|
||||
static void* call_with_gc(void* (*f)(struct thread *),
|
||||
struct gc_heap *heap) {
|
||||
struct call_with_gc_data data = { f, heap };
|
||||
return gc_call_with_stack_addr(call_with_gc_inner, &data);
|
||||
}
|
||||
|
||||
#define CHECK(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "%s:%d: check failed: %s\n", __FILE__, __LINE__, #x); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_EQ(x, y) CHECK((x) == (y))
|
||||
#define CHECK_NE(x, y) CHECK((x) != (y))
|
||||
#define CHECK_NULL(x) CHECK_EQ(x, NULL)
|
||||
#define CHECK_NOT_NULL(x) CHECK_NE(x, NULL)
|
||||
|
||||
static size_t ephemeron_chain_length(struct gc_ephemeron **loc,
|
||||
SmallObject *key) {
|
||||
struct gc_ephemeron *head = gc_ephemeron_chain_head(loc);
|
||||
size_t len = 0;
|
||||
while (head) {
|
||||
CHECK_EQ(key, (SmallObject*)gc_ref_value(gc_ephemeron_key(head)));
|
||||
Box *value = gc_ref_heap_object(gc_ephemeron_value(head));
|
||||
CHECK_NOT_NULL(value);
|
||||
key = value->obj;
|
||||
CHECK_NOT_NULL(key);
|
||||
head = gc_ephemeron_chain_next(head);
|
||||
len++;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static double heap_size;
|
||||
static double heap_multiplier;
|
||||
static size_t nthreads;
|
||||
|
||||
static void cause_gc(struct gc_mutator *mut) {
|
||||
// Doing a full collection lets us reason precisely about liveness.
|
||||
gc_collect(mut, GC_COLLECTION_MAJOR);
|
||||
}
|
||||
|
||||
static void make_ephemeron_chain(struct thread *t, EphemeronHandle *head,
|
||||
SmallObjectHandle *head_key, size_t length) {
|
||||
BoxHandle tail_box = { NULL };
|
||||
PUSH_HANDLE(t, tail_box);
|
||||
|
||||
CHECK_NULL(HANDLE_REF(*head_key));
|
||||
HANDLE_SET(*head_key, allocate_small_object(t->mut));
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
HANDLE_SET(tail_box, allocate_box(t->mut));
|
||||
HANDLE_REF(tail_box)->obj = HANDLE_REF(*head_key);
|
||||
HANDLE_SET(*head_key, allocate_small_object(t->mut));
|
||||
struct gc_ephemeron *ephemeron = allocate_ephemeron(t->mut);
|
||||
gc_ephemeron_init(t->mut, ephemeron,
|
||||
gc_ref_from_heap_object(HANDLE_REF(*head_key)),
|
||||
gc_ref_from_heap_object(HANDLE_REF(tail_box)));
|
||||
gc_ephemeron_chain_push(HANDLE_LOC(*head), ephemeron);
|
||||
}
|
||||
|
||||
POP_HANDLE(t);
|
||||
}
|
||||
|
||||
static void* run_one_test(struct thread *t) {
|
||||
size_t unit_size = gc_ephemeron_size() + sizeof(Box);
|
||||
size_t list_length = heap_size / nthreads / heap_multiplier / unit_size;
|
||||
|
||||
printf("Allocating ephemeron list %zu nodes long. Total size %.3fGB.\n",
|
||||
list_length, list_length * unit_size / 1e9);
|
||||
|
||||
unsigned long thread_start = current_time();
|
||||
|
||||
SmallObjectHandle head_key = { NULL };
|
||||
EphemeronHandle head = { NULL };
|
||||
|
||||
PUSH_HANDLE(t, head_key);
|
||||
PUSH_HANDLE(t, head);
|
||||
|
||||
make_ephemeron_chain(t, &head, &head_key, list_length);
|
||||
|
||||
size_t measured_length = ephemeron_chain_length(HANDLE_LOC(head),
|
||||
HANDLE_REF(head_key));
|
||||
CHECK_EQ(measured_length, list_length);
|
||||
|
||||
cause_gc(t->mut);
|
||||
measured_length = ephemeron_chain_length(HANDLE_LOC(head),
|
||||
HANDLE_REF(head_key));
|
||||
CHECK_EQ(measured_length, list_length);
|
||||
|
||||
if (!GC_CONSERVATIVE_ROOTS) {
|
||||
HANDLE_SET(head_key, NULL);
|
||||
cause_gc(t->mut);
|
||||
measured_length = ephemeron_chain_length(HANDLE_LOC(head),
|
||||
HANDLE_REF(head_key));
|
||||
CHECK_EQ(measured_length, 0);
|
||||
}
|
||||
|
||||
// swap head_key for a key halfway in, cause gc
|
||||
// check length is expected half-length; warn, or error if precise
|
||||
// clear and return
|
||||
|
||||
print_elapsed("thread", thread_start);
|
||||
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void* run_one_test_in_thread(void *arg) {
|
||||
struct gc_heap *heap = arg;
|
||||
return call_with_gc(run_one_test, heap);
|
||||
}
|
||||
|
||||
struct join_data { int status; pthread_t thread; };
|
||||
static void *join_thread(void *data) {
|
||||
struct join_data *join_data = data;
|
||||
void *ret;
|
||||
join_data->status = pthread_join(join_data->thread, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define MAX_THREAD_COUNT 256
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 4 || 5 < argc) {
|
||||
fprintf(stderr, "usage: %s HEAP_SIZE MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
heap_size = atof(argv[1]);
|
||||
heap_multiplier = atof(argv[2]);
|
||||
nthreads = atol(argv[3]);
|
||||
|
||||
if (heap_size < 8192) {
|
||||
fprintf(stderr,
|
||||
"Heap size should probably be at least 8192, right? '%s'\n",
|
||||
argv[1]);
|
||||
return 1;
|
||||
}
|
||||
if (!(1.0 < heap_multiplier && heap_multiplier < 100)) {
|
||||
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
|
||||
return 1;
|
||||
}
|
||||
if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
|
||||
fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n",
|
||||
(int)MAX_THREAD_COUNT, argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n",
|
||||
heap_size / 1e9, heap_multiplier);
|
||||
|
||||
struct gc_options *options = gc_allocate_options();
|
||||
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
|
||||
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
|
||||
if (argc == 5) {
|
||||
if (!gc_options_parse_and_set_many(options, argv[4])) {
|
||||
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[4]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator *mut;
|
||||
struct gc_basic_stats stats;
|
||||
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
|
||||
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
|
||||
(size_t)heap_size);
|
||||
return 1;
|
||||
}
|
||||
struct thread main_thread = { mut, };
|
||||
gc_mutator_set_roots(mut, &main_thread.roots);
|
||||
|
||||
pthread_t threads[MAX_THREAD_COUNT];
|
||||
// Run one of the threads in the main thread.
|
||||
for (size_t i = 1; i < nthreads; i++) {
|
||||
int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap);
|
||||
if (status) {
|
||||
errno = status;
|
||||
perror("Failed to create thread");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
run_one_test(&main_thread);
|
||||
for (size_t i = 1; i < nthreads; i++) {
|
||||
struct join_data data = { 0, threads[i] };
|
||||
gc_call_without_gc(mut, join_thread, &data);
|
||||
if (data.status) {
|
||||
errno = data.status;
|
||||
perror("Failed to join thread");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
gc_basic_stats_finish(&stats);
|
||||
fputs("\n", stdout);
|
||||
gc_basic_stats_print(&stats, stdout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
55
libguile/whippet/benchmarks/finalizers-embedder.h
Normal file
55
libguile/whippet/benchmarks/finalizers-embedder.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
#ifndef FINALIZERS_EMBEDDER_H
|
||||
#define FINALIZERS_EMBEDDER_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "finalizers-types.h"
|
||||
#include "gc-finalizer.h"
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
#define DEFINE_METHODS(name, Name, NAME) \
|
||||
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
|
||||
static inline void visit_##name##_fields(Name *obj,\
|
||||
void (*visit)(struct gc_edge edge, \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data), \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data) GC_ALWAYS_INLINE;
|
||||
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
|
||||
#undef DEFINE_METHODS
|
||||
|
||||
static inline size_t small_object_size(SmallObject *obj) { return sizeof(*obj); }
|
||||
static inline size_t finalizer_size(Finalizer *obj) { return gc_finalizer_size(); }
|
||||
static inline size_t pair_size(Pair *obj) { return sizeof(*obj); }
|
||||
|
||||
static inline void
|
||||
visit_small_object_fields(SmallObject *obj,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {}
|
||||
|
||||
static inline void
|
||||
visit_finalizer_fields(Finalizer *finalizer,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
gc_trace_finalizer((struct gc_finalizer*)finalizer, visit, heap, visit_data);
|
||||
}
|
||||
|
||||
static inline void
|
||||
visit_pair_fields(Pair *pair,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
visit(gc_edge(&pair->car), heap, visit_data);
|
||||
visit(gc_edge(&pair->cdr), heap, visit_data);
|
||||
}
|
||||
|
||||
#include "simple-gc-embedder.h"
|
||||
|
||||
#endif // FINALIZERS_EMBEDDER_H
|
22
libguile/whippet/benchmarks/finalizers-types.h
Normal file
22
libguile/whippet/benchmarks/finalizers-types.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#ifndef FINALIZERS_TYPES_H
|
||||
#define FINALIZERS_TYPES_H
|
||||
|
||||
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
|
||||
M(pair, Pair, PAIR) \
|
||||
M(finalizer, Finalizer, FINALIZER) \
|
||||
M(small_object, SmallObject, SMALL_OBJECT)
|
||||
|
||||
#include "heap-objects.h"
|
||||
#include "simple-tagging-scheme.h"
|
||||
|
||||
struct SmallObject {
|
||||
struct gc_header header;
|
||||
};
|
||||
|
||||
struct Pair {
|
||||
struct gc_header header;
|
||||
void *car;
|
||||
void *cdr;
|
||||
};
|
||||
|
||||
#endif // FINALIZERS_TYPES_H
|
284
libguile/whippet/benchmarks/finalizers.c
Normal file
284
libguile/whippet/benchmarks/finalizers.c
Normal file
|
@ -0,0 +1,284 @@
|
|||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "gc-api.h"
|
||||
#include "gc-basic-stats.h"
|
||||
#include "gc-finalizer.h"
|
||||
#include "simple-roots-api.h"
|
||||
#include "finalizers-types.h"
|
||||
#include "simple-allocator.h"
|
||||
|
||||
typedef HANDLE_TO(SmallObject) SmallObjectHandle;
|
||||
typedef HANDLE_TO(struct gc_finalizer) FinalizerHandle;
|
||||
typedef HANDLE_TO(Pair) PairHandle;
|
||||
|
||||
static SmallObject* allocate_small_object(struct gc_mutator *mut) {
|
||||
return gc_allocate_with_kind(mut, ALLOC_KIND_SMALL_OBJECT, sizeof(SmallObject));
|
||||
}
|
||||
|
||||
static Pair* allocate_pair(struct gc_mutator *mut) {
|
||||
return gc_allocate_with_kind(mut, ALLOC_KIND_PAIR, sizeof(Pair));
|
||||
}
|
||||
|
||||
static struct gc_finalizer* allocate_finalizer(struct gc_mutator *mut) {
|
||||
struct gc_finalizer *ret = gc_allocate_finalizer(mut);
|
||||
*tag_word(gc_ref_from_heap_object(ret)) = tag_live(ALLOC_KIND_FINALIZER);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Get the current time in microseconds */
|
||||
static unsigned long current_time(void)
|
||||
{
|
||||
struct timeval t;
|
||||
if (gettimeofday(&t, NULL) == -1)
|
||||
return 0;
|
||||
return t.tv_sec * 1000 * 1000 + t.tv_usec;
|
||||
}
|
||||
|
||||
struct thread {
|
||||
struct gc_mutator *mut;
|
||||
struct gc_mutator_roots roots;
|
||||
};
|
||||
|
||||
static void print_elapsed(const char *what, unsigned long start) {
|
||||
unsigned long end = current_time();
|
||||
unsigned long msec = (end - start) / 1000;
|
||||
unsigned long usec = (end - start) % 1000;
|
||||
printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec);
|
||||
}
|
||||
|
||||
struct call_with_gc_data {
|
||||
void* (*f)(struct thread *);
|
||||
struct gc_heap *heap;
|
||||
};
|
||||
static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) {
|
||||
struct call_with_gc_data *data = arg;
|
||||
struct gc_mutator *mut = gc_init_for_thread(addr, data->heap);
|
||||
struct thread t = { mut, };
|
||||
gc_mutator_set_roots(mut, &t.roots);
|
||||
void *ret = data->f(&t);
|
||||
gc_finish_for_thread(mut);
|
||||
return ret;
|
||||
}
|
||||
static void* call_with_gc(void* (*f)(struct thread *),
|
||||
struct gc_heap *heap) {
|
||||
struct call_with_gc_data data = { f, heap };
|
||||
return gc_call_with_stack_addr(call_with_gc_inner, &data);
|
||||
}
|
||||
|
||||
#define CHECK(x) \
|
||||
do { \
|
||||
if (!(x)) { \
|
||||
fprintf(stderr, "%s:%d: check failed: %s\n", __FILE__, __LINE__, #x); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_EQ(x, y) CHECK((x) == (y))
|
||||
#define CHECK_NE(x, y) CHECK((x) != (y))
|
||||
#define CHECK_NULL(x) CHECK_EQ(x, NULL)
|
||||
#define CHECK_NOT_NULL(x) CHECK_NE(x, NULL)
|
||||
|
||||
static double heap_size;
|
||||
static double heap_multiplier;
|
||||
static size_t nthreads;
|
||||
|
||||
static void cause_gc(struct gc_mutator *mut) {
|
||||
// Doing a full collection lets us reason precisely about liveness.
|
||||
gc_collect(mut, GC_COLLECTION_MAJOR);
|
||||
}
|
||||
|
||||
static inline void set_car(struct gc_mutator *mut, Pair *obj, void *val) {
|
||||
void **field = &obj->car;
|
||||
if (val)
|
||||
gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Pair),
|
||||
gc_edge(field),
|
||||
gc_ref_from_heap_object(val));
|
||||
*field = val;
|
||||
}
|
||||
|
||||
static inline void set_cdr(struct gc_mutator *mut, Pair *obj, void *val) {
|
||||
void **field = &obj->cdr;
|
||||
if (val)
|
||||
gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Pair),
|
||||
gc_edge(field),
|
||||
gc_ref_from_heap_object(val));
|
||||
field = val;
|
||||
}
|
||||
|
||||
static Pair* make_finalizer_chain(struct thread *t, size_t length) {
|
||||
PairHandle head = { NULL };
|
||||
PairHandle tail = { NULL };
|
||||
PUSH_HANDLE(t, head);
|
||||
PUSH_HANDLE(t, tail);
|
||||
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
HANDLE_SET(tail, HANDLE_REF(head));
|
||||
HANDLE_SET(head, allocate_pair(t->mut));
|
||||
set_car(t->mut, HANDLE_REF(head), allocate_small_object(t->mut));
|
||||
set_cdr(t->mut, HANDLE_REF(head), HANDLE_REF(tail));
|
||||
struct gc_finalizer *finalizer = allocate_finalizer(t->mut);
|
||||
gc_finalizer_attach(t->mut, finalizer, 0,
|
||||
gc_ref_from_heap_object(HANDLE_REF(head)),
|
||||
gc_ref_from_heap_object(HANDLE_REF(head)->car));
|
||||
}
|
||||
|
||||
Pair *ret = HANDLE_REF(head);
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void* run_one_test(struct thread *t) {
|
||||
size_t unit_size = gc_finalizer_size() + sizeof(Pair);
|
||||
size_t list_length = heap_size / nthreads / heap_multiplier / unit_size;
|
||||
ssize_t outstanding = list_length;
|
||||
|
||||
printf("Allocating list %zu nodes long. Total size %.3fGB.\n",
|
||||
list_length, list_length * unit_size / 1e9);
|
||||
|
||||
unsigned long thread_start = current_time();
|
||||
|
||||
PairHandle chain = { NULL };
|
||||
PUSH_HANDLE(t, chain);
|
||||
|
||||
HANDLE_SET(chain, make_finalizer_chain(t, list_length));
|
||||
cause_gc(t->mut);
|
||||
|
||||
size_t finalized = 0;
|
||||
for (struct gc_finalizer *f = gc_pop_finalizable(t->mut);
|
||||
f;
|
||||
f = gc_pop_finalizable(t->mut)) {
|
||||
Pair* p = gc_ref_heap_object(gc_finalizer_object(f));
|
||||
SmallObject* o = gc_ref_heap_object(gc_finalizer_closure(f));
|
||||
CHECK_EQ(p->car, o);
|
||||
finalized++;
|
||||
}
|
||||
printf("thread %p: GC before clear finalized %zu nodes.\n", t, finalized);
|
||||
outstanding -= finalized;
|
||||
|
||||
HANDLE_SET(chain, NULL);
|
||||
cause_gc(t->mut);
|
||||
|
||||
finalized = 0;
|
||||
for (struct gc_finalizer *f = gc_pop_finalizable(t->mut);
|
||||
f;
|
||||
f = gc_pop_finalizable(t->mut)) {
|
||||
Pair* p = gc_ref_heap_object(gc_finalizer_object(f));
|
||||
SmallObject* o = gc_ref_heap_object(gc_finalizer_closure(f));
|
||||
CHECK_EQ(p->car, o);
|
||||
finalized++;
|
||||
}
|
||||
printf("thread %p: GC after clear finalized %zu nodes.\n", t, finalized);
|
||||
outstanding -= finalized;
|
||||
|
||||
print_elapsed("thread", thread_start);
|
||||
|
||||
POP_HANDLE(t);
|
||||
|
||||
return (void*)outstanding;
|
||||
}
|
||||
|
||||
static void* run_one_test_in_thread(void *arg) {
|
||||
struct gc_heap *heap = arg;
|
||||
return call_with_gc(run_one_test, heap);
|
||||
}
|
||||
|
||||
struct join_data { int status; pthread_t thread; };
|
||||
static void *join_thread(void *data) {
|
||||
struct join_data *join_data = data;
|
||||
void *ret;
|
||||
join_data->status = pthread_join(join_data->thread, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define MAX_THREAD_COUNT 256
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 4 || 5 < argc) {
|
||||
fprintf(stderr, "usage: %s HEAP_SIZE MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
heap_size = atof(argv[1]);
|
||||
heap_multiplier = atof(argv[2]);
|
||||
nthreads = atol(argv[3]);
|
||||
|
||||
if (heap_size < 8192) {
|
||||
fprintf(stderr,
|
||||
"Heap size should probably be at least 8192, right? '%s'\n",
|
||||
argv[1]);
|
||||
return 1;
|
||||
}
|
||||
if (!(1.0 < heap_multiplier && heap_multiplier < 100)) {
|
||||
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
|
||||
return 1;
|
||||
}
|
||||
if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
|
||||
fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n",
|
||||
(int)MAX_THREAD_COUNT, argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n",
|
||||
heap_size / 1e9, heap_multiplier);
|
||||
|
||||
struct gc_options *options = gc_allocate_options();
|
||||
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
|
||||
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
|
||||
if (argc == 5) {
|
||||
if (!gc_options_parse_and_set_many(options, argv[4])) {
|
||||
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[4]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator *mut;
|
||||
struct gc_basic_stats stats;
|
||||
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
|
||||
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
|
||||
(size_t)heap_size);
|
||||
return 1;
|
||||
}
|
||||
struct thread main_thread = { mut, };
|
||||
gc_mutator_set_roots(mut, &main_thread.roots);
|
||||
|
||||
pthread_t threads[MAX_THREAD_COUNT];
|
||||
// Run one of the threads in the main thread.
|
||||
for (size_t i = 1; i < nthreads; i++) {
|
||||
int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap);
|
||||
if (status) {
|
||||
errno = status;
|
||||
perror("Failed to create thread");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
ssize_t outstanding = (size_t)run_one_test(&main_thread);
|
||||
for (size_t i = 1; i < nthreads; i++) {
|
||||
struct join_data data = { 0, threads[i] };
|
||||
void *ret = gc_call_without_gc(mut, join_thread, &data);
|
||||
if (data.status) {
|
||||
errno = data.status;
|
||||
perror("Failed to join thread");
|
||||
return 1;
|
||||
}
|
||||
ssize_t thread_outstanding = (ssize_t)ret;
|
||||
outstanding += thread_outstanding;
|
||||
}
|
||||
|
||||
if (outstanding)
|
||||
printf("\n\nWARNING: %zd nodes outstanding!!!\n\n", outstanding);
|
||||
|
||||
gc_basic_stats_finish(&stats);
|
||||
fputs("\n", stdout);
|
||||
gc_basic_stats_print(&stats, stdout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
19
libguile/whippet/benchmarks/heap-objects.h
Normal file
19
libguile/whippet/benchmarks/heap-objects.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
#ifndef HEAP_OBJECTS_H
|
||||
#define HEAP_OBJECTS_H
|
||||
|
||||
#include "gc-inline.h"
|
||||
#include "gc-edge.h"
|
||||
|
||||
#define DECLARE_NODE_TYPE(name, Name, NAME) \
|
||||
struct Name; \
|
||||
typedef struct Name Name;
|
||||
FOR_EACH_HEAP_OBJECT_KIND(DECLARE_NODE_TYPE)
|
||||
#undef DECLARE_NODE_TYPE
|
||||
|
||||
#define DEFINE_ENUM(name, Name, NAME) ALLOC_KIND_##NAME,
|
||||
enum alloc_kind {
|
||||
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_ENUM)
|
||||
};
|
||||
#undef DEFINE_ENUM
|
||||
|
||||
#endif // HEAP_OBJECTS_H
|
54
libguile/whippet/benchmarks/mt-gcbench-embedder.h
Normal file
54
libguile/whippet/benchmarks/mt-gcbench-embedder.h
Normal file
|
@ -0,0 +1,54 @@
|
|||
#ifndef MT_GCBENCH_EMBEDDER_H
|
||||
#define MT_GCBENCH_EMBEDDER_H
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "mt-gcbench-types.h"
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
#define DEFINE_METHODS(name, Name, NAME) \
|
||||
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
|
||||
static inline void visit_##name##_fields(Name *obj,\
|
||||
void (*visit)(struct gc_edge edge, \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data), \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data) GC_ALWAYS_INLINE;
|
||||
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
|
||||
#undef DEFINE_METHODS
|
||||
|
||||
static inline size_t node_size(Node *obj) {
|
||||
return sizeof(Node);
|
||||
}
|
||||
static inline size_t double_array_size(DoubleArray *array) {
|
||||
return sizeof(*array) + array->length * sizeof(double);
|
||||
}
|
||||
static inline size_t hole_size(Hole *hole) {
|
||||
return sizeof(*hole) + hole->length * sizeof(uintptr_t);
|
||||
}
|
||||
static inline void
|
||||
visit_node_fields(Node *node,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap, void *visit_data) {
|
||||
visit(gc_edge(&node->left), heap, visit_data);
|
||||
visit(gc_edge(&node->right), heap, visit_data);
|
||||
}
|
||||
static inline void
|
||||
visit_double_array_fields(DoubleArray *obj,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap, void *visit_data),
|
||||
struct gc_heap *heap, void *visit_data) {
|
||||
}
|
||||
static inline void
|
||||
visit_hole_fields(Hole *obj,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap, void *visit_data),
|
||||
struct gc_heap *heap, void *visit_data) {
|
||||
if (GC_PRECISE_ROOTS)
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
#include "simple-gc-embedder.h"
|
||||
|
||||
#endif // MT_GCBENCH_EMBEDDER_H
|
34
libguile/whippet/benchmarks/mt-gcbench-types.h
Normal file
34
libguile/whippet/benchmarks/mt-gcbench-types.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
#ifndef GCBENCH_TYPES_H
|
||||
#define GCBENCH_TYPES_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
|
||||
M(node, Node, NODE) \
|
||||
M(double_array, DoubleArray, DOUBLE_ARRAY) \
|
||||
M(hole, Hole, HOLE)
|
||||
|
||||
#include "heap-objects.h"
|
||||
#include "simple-tagging-scheme.h"
|
||||
|
||||
struct Node {
|
||||
struct gc_header header;
|
||||
struct Node *left;
|
||||
struct Node *right;
|
||||
int i, j;
|
||||
};
|
||||
|
||||
struct DoubleArray {
|
||||
struct gc_header header;
|
||||
size_t length;
|
||||
double values[0];
|
||||
};
|
||||
|
||||
struct Hole {
|
||||
struct gc_header header;
|
||||
size_t length;
|
||||
uintptr_t values[0];
|
||||
};
|
||||
|
||||
#endif // GCBENCH_TYPES_H
|
402
libguile/whippet/benchmarks/mt-gcbench.c
Normal file
402
libguile/whippet/benchmarks/mt-gcbench.c
Normal file
|
@ -0,0 +1,402 @@
|
|||
// This is adapted from a benchmark written by John Ellis and Pete Kovac
|
||||
// of Post Communications.
|
||||
// It was modified by Hans Boehm of Silicon Graphics.
|
||||
// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
|
||||
// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
|
||||
//
|
||||
// This is no substitute for real applications. No actual application
|
||||
// is likely to behave in exactly this way. However, this benchmark was
|
||||
// designed to be more representative of real applications than other
|
||||
// Java GC benchmarks of which we are aware.
|
||||
// It attempts to model those properties of allocation requests that
|
||||
// are important to current GC techniques.
|
||||
// It is designed to be used either to obtain a single overall performance
|
||||
// number, or to give a more detailed estimate of how collector
|
||||
// performance varies with object lifetimes. It prints the time
|
||||
// required to allocate and collect balanced binary trees of various
|
||||
// sizes. Smaller trees result in shorter object lifetimes. Each cycle
|
||||
// allocates roughly the same amount of memory.
|
||||
// Two data structures are kept around during the entire process, so
|
||||
// that the measured performance is representative of applications
|
||||
// that maintain some live in-memory data. One of these is a tree
|
||||
// containing many pointers. The other is a large array containing
|
||||
// double precision floating point numbers. Both should be of comparable
|
||||
// size.
|
||||
//
|
||||
// The results are only really meaningful together with a specification
|
||||
// of how much memory was used. It is possible to trade memory for
|
||||
// better time performance. This benchmark should be run in a 32 MB
|
||||
// heap, though we don't currently know how to enforce that uniformly.
|
||||
//
|
||||
// Unlike the original Ellis and Kovac benchmark, we do not attempt
|
||||
// measure pause times. This facility should eventually be added back
|
||||
// in. There are several reasons for omitting it for now. The original
|
||||
// implementation depended on assumptions about the thread scheduler
|
||||
// that don't hold uniformly. The results really measure both the
|
||||
// scheduler and GC. Pause time measurements tend to not fit well with
|
||||
// current benchmark suites. As far as we know, none of the current
|
||||
// commercial Java implementations seriously attempt to minimize GC pause
|
||||
// times.
|
||||
|
||||
#include <errno.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "gc-api.h"
|
||||
#include "gc-basic-stats.h"
|
||||
#include "mt-gcbench-types.h"
|
||||
#include "simple-roots-api.h"
|
||||
#include "simple-allocator.h"
|
||||
|
||||
#define MAX_THREAD_COUNT 256
|
||||
|
||||
static const int long_lived_tree_depth = 16; // about 4Mb
|
||||
static const int array_size = 500000; // about 4Mb
|
||||
static const int min_tree_depth = 4;
|
||||
static const int max_tree_depth = 16;
|
||||
|
||||
typedef HANDLE_TO(Node) NodeHandle;
|
||||
typedef HANDLE_TO(DoubleArray) DoubleArrayHandle;
|
||||
|
||||
static Node* allocate_node(struct gc_mutator *mut) {
|
||||
// memset to 0 by the collector.
|
||||
return gc_allocate_with_kind(mut, ALLOC_KIND_NODE, sizeof (Node));
|
||||
}
|
||||
|
||||
static DoubleArray* allocate_double_array(struct gc_mutator *mut,
|
||||
size_t size) {
|
||||
// May be uninitialized.
|
||||
size_t bytes = sizeof(DoubleArray) + sizeof (double) * size;
|
||||
DoubleArray *ret =
|
||||
gc_allocate_pointerless_with_kind(mut, ALLOC_KIND_DOUBLE_ARRAY, bytes);
|
||||
ret->length = size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static Hole* allocate_hole(struct gc_mutator *mut, size_t size) {
|
||||
size_t bytes = sizeof(Hole) + sizeof (uintptr_t) * size;
|
||||
Hole *ret = gc_allocate_with_kind(mut, ALLOC_KIND_HOLE, bytes);
|
||||
ret->length = size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned long current_time(void) {
|
||||
struct timeval t = { 0 };
|
||||
gettimeofday(&t, NULL);
|
||||
return t.tv_sec * 1000 * 1000 + t.tv_usec;
|
||||
}
|
||||
|
||||
static double elapsed_millis(unsigned long start) {
|
||||
return (current_time() - start) * 1e-3;
|
||||
}
|
||||
|
||||
// Nodes used by a tree of a given size
|
||||
static int tree_size(int i) {
|
||||
return ((1 << (i + 1)) - 1);
|
||||
}
|
||||
|
||||
// Number of iterations to use for a given tree depth
|
||||
static int compute_num_iters(int i) {
|
||||
return 2 * tree_size(max_tree_depth + 2) / tree_size(i);
|
||||
}
|
||||
|
||||
// A power-law distribution. Each integer was selected by starting at 0, taking
|
||||
// a random number in [0,1), and then accepting the integer if the random number
|
||||
// was less than 0.15, or trying again with the next integer otherwise. Useful
|
||||
// for modelling allocation sizes or number of garbage objects to allocate
|
||||
// between live allocations.
|
||||
static const uint8_t power_law_distribution[256] = {
|
||||
1, 15, 3, 12, 2, 8, 4, 0, 18, 7, 9, 8, 15, 2, 36, 5,
|
||||
1, 9, 6, 11, 9, 19, 2, 0, 0, 3, 9, 6, 3, 2, 1, 1,
|
||||
6, 1, 8, 4, 2, 0, 5, 3, 7, 0, 0, 3, 0, 4, 1, 7,
|
||||
1, 8, 2, 2, 2, 14, 0, 7, 8, 0, 2, 1, 4, 12, 7, 5,
|
||||
0, 3, 4, 13, 10, 2, 3, 7, 0, 8, 0, 23, 0, 16, 1, 1,
|
||||
6, 28, 1, 18, 0, 3, 6, 5, 8, 6, 14, 5, 2, 5, 0, 11,
|
||||
0, 18, 4, 16, 1, 4, 3, 13, 3, 23, 7, 4, 10, 5, 3, 13,
|
||||
0, 14, 5, 5, 2, 5, 0, 16, 2, 0, 1, 1, 0, 0, 4, 2,
|
||||
7, 7, 0, 5, 7, 2, 1, 24, 27, 3, 7, 1, 0, 8, 1, 4,
|
||||
0, 3, 0, 7, 7, 3, 9, 2, 9, 2, 5, 10, 1, 1, 12, 6,
|
||||
2, 9, 5, 0, 4, 6, 0, 7, 2, 1, 5, 4, 1, 0, 1, 15,
|
||||
4, 0, 15, 4, 0, 0, 32, 18, 2, 2, 1, 7, 8, 3, 11, 1,
|
||||
2, 7, 11, 1, 9, 1, 2, 6, 11, 17, 1, 2, 5, 1, 14, 3,
|
||||
6, 1, 1, 15, 3, 1, 0, 6, 10, 8, 1, 3, 2, 7, 0, 1,
|
||||
0, 11, 3, 3, 5, 8, 2, 0, 0, 7, 12, 2, 5, 20, 3, 7,
|
||||
4, 4, 5, 22, 1, 5, 2, 7, 15, 2, 4, 6, 11, 8, 12, 1
|
||||
};
|
||||
|
||||
static size_t power_law(size_t *counter) {
|
||||
return power_law_distribution[(*counter)++ & 0xff];
|
||||
}
|
||||
|
||||
struct thread {
|
||||
struct gc_mutator *mut;
|
||||
struct gc_mutator_roots roots;
|
||||
size_t counter;
|
||||
};
|
||||
|
||||
static void allocate_garbage(struct thread *t) {
|
||||
size_t hole = power_law(&t->counter);
|
||||
if (hole) {
|
||||
allocate_hole(t->mut, hole);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void set_field(struct gc_mutator *mut, Node *obj,
|
||||
Node **field, Node *val) {
|
||||
gc_write_barrier(mut, gc_ref_from_heap_object(obj), sizeof(Node),
|
||||
gc_edge(field),
|
||||
gc_ref_from_heap_object(val));
|
||||
*field = val;
|
||||
}
|
||||
|
||||
// Build tree top down, assigning to older objects.
|
||||
static void populate(struct thread *t, int depth, Node *node) {
|
||||
struct gc_mutator *mut = t->mut;
|
||||
if (depth <= 0)
|
||||
return;
|
||||
|
||||
NodeHandle self = { node };
|
||||
PUSH_HANDLE(t, self);
|
||||
allocate_garbage(t);
|
||||
NodeHandle l = { allocate_node(mut) };
|
||||
PUSH_HANDLE(t, l);
|
||||
allocate_garbage(t);
|
||||
NodeHandle r = { allocate_node(mut) };
|
||||
PUSH_HANDLE(t, r);
|
||||
|
||||
set_field(mut, HANDLE_REF(self), &HANDLE_REF(self)->left, HANDLE_REF(l));
|
||||
set_field(mut, HANDLE_REF(self), &HANDLE_REF(self)->right, HANDLE_REF(r));
|
||||
// i is 0 because the memory is zeroed.
|
||||
HANDLE_REF(self)->j = depth;
|
||||
|
||||
populate(t, depth-1, HANDLE_REF(self)->left);
|
||||
populate(t, depth-1, HANDLE_REF(self)->right);
|
||||
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
}
|
||||
|
||||
// Build tree bottom-up
|
||||
static Node* make_tree(struct thread *t, int depth) {
|
||||
struct gc_mutator *mut = t->mut;
|
||||
if (depth <= 0)
|
||||
return allocate_node(mut);
|
||||
|
||||
NodeHandle left = { make_tree(t, depth-1) };
|
||||
PUSH_HANDLE(t, left);
|
||||
NodeHandle right = { make_tree(t, depth-1) };
|
||||
PUSH_HANDLE(t, right);
|
||||
|
||||
allocate_garbage(t);
|
||||
Node *result = allocate_node(mut);
|
||||
result->left = HANDLE_REF(left);
|
||||
result->right = HANDLE_REF(right);
|
||||
// i is 0 because the memory is zeroed.
|
||||
result->j = depth;
|
||||
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void validate_tree(Node *tree, int depth) {
|
||||
#ifndef NDEBUG
|
||||
GC_ASSERT_EQ(tree->i, 0);
|
||||
GC_ASSERT_EQ(tree->j, depth);
|
||||
if (depth == 0) {
|
||||
GC_ASSERT(!tree->left);
|
||||
GC_ASSERT(!tree->right);
|
||||
} else {
|
||||
GC_ASSERT(tree->left);
|
||||
GC_ASSERT(tree->right);
|
||||
validate_tree(tree->left, depth - 1);
|
||||
validate_tree(tree->right, depth - 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void time_construction(struct thread *t, int depth) {
|
||||
struct gc_mutator *mut = t->mut;
|
||||
int num_iters = compute_num_iters(depth);
|
||||
NodeHandle temp_tree = { NULL };
|
||||
PUSH_HANDLE(t, temp_tree);
|
||||
|
||||
printf("Creating %d trees of depth %d\n", num_iters, depth);
|
||||
|
||||
{
|
||||
unsigned long start = current_time();
|
||||
for (int i = 0; i < num_iters; ++i) {
|
||||
HANDLE_SET(temp_tree, allocate_node(mut));
|
||||
populate(t, depth, HANDLE_REF(temp_tree));
|
||||
validate_tree(HANDLE_REF(temp_tree), depth);
|
||||
HANDLE_SET(temp_tree, NULL);
|
||||
}
|
||||
printf("\tTop down construction took %.3f msec\n",
|
||||
elapsed_millis(start));
|
||||
}
|
||||
|
||||
{
|
||||
long start = current_time();
|
||||
for (int i = 0; i < num_iters; ++i) {
|
||||
HANDLE_SET(temp_tree, make_tree(t, depth));
|
||||
validate_tree(HANDLE_REF(temp_tree), depth);
|
||||
HANDLE_SET(temp_tree, NULL);
|
||||
}
|
||||
printf("\tBottom up construction took %.3f msec\n",
|
||||
elapsed_millis(start));
|
||||
}
|
||||
|
||||
POP_HANDLE(t);
|
||||
}
|
||||
|
||||
struct call_with_gc_data {
|
||||
void* (*f)(struct thread *);
|
||||
struct gc_heap *heap;
|
||||
};
|
||||
static void* call_with_gc_inner(struct gc_stack_addr *addr, void *arg) {
|
||||
struct call_with_gc_data *data = arg;
|
||||
struct gc_mutator *mut = gc_init_for_thread(addr, data->heap);
|
||||
struct thread t = { mut, };
|
||||
gc_mutator_set_roots(mut, &t.roots);
|
||||
void *ret = data->f(&t);
|
||||
gc_finish_for_thread(mut);
|
||||
return ret;
|
||||
}
|
||||
static void* call_with_gc(void* (*f)(struct thread *),
|
||||
struct gc_heap *heap) {
|
||||
struct call_with_gc_data data = { f, heap };
|
||||
return gc_call_with_stack_addr(call_with_gc_inner, &data);
|
||||
}
|
||||
|
||||
static void* run_one_test(struct thread *t) {
|
||||
NodeHandle long_lived_tree = { NULL };
|
||||
NodeHandle temp_tree = { NULL };
|
||||
DoubleArrayHandle array = { NULL };
|
||||
|
||||
PUSH_HANDLE(t, long_lived_tree);
|
||||
PUSH_HANDLE(t, temp_tree);
|
||||
PUSH_HANDLE(t, array);
|
||||
|
||||
// Create a long lived object
|
||||
printf(" Creating a long-lived binary tree of depth %d\n",
|
||||
long_lived_tree_depth);
|
||||
HANDLE_SET(long_lived_tree, allocate_node(t->mut));
|
||||
populate(t, long_lived_tree_depth, HANDLE_REF(long_lived_tree));
|
||||
|
||||
// Create long-lived array, filling half of it
|
||||
printf(" Creating a long-lived array of %d doubles\n", array_size);
|
||||
HANDLE_SET(array, allocate_double_array(t->mut, array_size));
|
||||
for (int i = 0; i < array_size/2; ++i) {
|
||||
HANDLE_REF(array)->values[i] = 1.0/i;
|
||||
}
|
||||
|
||||
for (int d = min_tree_depth; d <= max_tree_depth; d += 2) {
|
||||
time_construction(t, d);
|
||||
}
|
||||
|
||||
validate_tree(HANDLE_REF(long_lived_tree), long_lived_tree_depth);
|
||||
|
||||
// Fake reference to LongLivedTree and array to keep them from being optimized
|
||||
// away.
|
||||
if (HANDLE_REF(long_lived_tree)->i != 0
|
||||
|| HANDLE_REF(array)->values[1000] != 1.0/1000)
|
||||
fprintf(stderr, "Failed\n");
|
||||
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
POP_HANDLE(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void* run_one_test_in_thread(void *arg) {
|
||||
struct gc_heap *heap = arg;
|
||||
return call_with_gc(run_one_test, heap);
|
||||
}
|
||||
|
||||
struct join_data { int status; pthread_t thread; };
|
||||
static void *join_thread(void *data) {
|
||||
struct join_data *join_data = data;
|
||||
void *ret;
|
||||
join_data->status = pthread_join(join_data->thread, &ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
size_t heap_max_live =
|
||||
tree_size(long_lived_tree_depth) * sizeof(Node) +
|
||||
tree_size(max_tree_depth) * sizeof(Node) +
|
||||
sizeof(DoubleArray) + sizeof(double) * array_size;
|
||||
if (argc < 3 || argc > 4) {
|
||||
fprintf(stderr, "usage: %s MULTIPLIER NTHREADS [GC-OPTIONS]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
double multiplier = atof(argv[1]);
|
||||
size_t nthreads = atol(argv[2]);
|
||||
|
||||
if (!(0.1 < multiplier && multiplier < 100)) {
|
||||
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
if (nthreads < 1 || nthreads > MAX_THREAD_COUNT) {
|
||||
fprintf(stderr, "Expected integer between 1 and %d for thread count, got '%s'\n",
|
||||
(int)MAX_THREAD_COUNT, argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t heap_size = heap_max_live * multiplier * nthreads;
|
||||
|
||||
struct gc_options *options = gc_allocate_options();
|
||||
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
|
||||
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
|
||||
if (argc == 4) {
|
||||
if (!gc_options_parse_and_set_many(options, argv[3])) {
|
||||
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[3]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator *mut;
|
||||
struct gc_basic_stats stats;
|
||||
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
|
||||
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
|
||||
heap_size);
|
||||
return 1;
|
||||
}
|
||||
struct thread main_thread = { mut, };
|
||||
gc_mutator_set_roots(mut, &main_thread.roots);
|
||||
|
||||
printf("Garbage Collector Test\n");
|
||||
printf(" Live storage will peak at %zd bytes.\n\n", heap_max_live);
|
||||
|
||||
pthread_t threads[MAX_THREAD_COUNT];
|
||||
// Run one of the threads in the main thread.
|
||||
for (size_t i = 1; i < nthreads; i++) {
|
||||
int status = pthread_create(&threads[i], NULL, run_one_test_in_thread, heap);
|
||||
if (status) {
|
||||
errno = status;
|
||||
perror("Failed to create thread");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
run_one_test(&main_thread);
|
||||
for (size_t i = 1; i < nthreads; i++) {
|
||||
struct join_data data = { 0, threads[i] };
|
||||
gc_call_without_gc(mut, join_thread, &data);
|
||||
if (data.status) {
|
||||
errno = data.status;
|
||||
perror("Failed to join thread");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
gc_basic_stats_finish(&stats);
|
||||
fputs("\n", stdout);
|
||||
gc_basic_stats_print(&stats, stdout);
|
||||
}
|
37
libguile/whippet/benchmarks/quads-embedder.h
Normal file
37
libguile/whippet/benchmarks/quads-embedder.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#ifndef QUADS_EMBEDDER_H
|
||||
#define QUADS_EMBEDDER_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "quads-types.h"
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
#define DEFINE_METHODS(name, Name, NAME) \
|
||||
static inline size_t name##_size(Name *obj) GC_ALWAYS_INLINE; \
|
||||
static inline void visit_##name##_fields(Name *obj,\
|
||||
void (*visit)(struct gc_edge edge, \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data), \
|
||||
struct gc_heap *heap, \
|
||||
void *visit_data) GC_ALWAYS_INLINE;
|
||||
FOR_EACH_HEAP_OBJECT_KIND(DEFINE_METHODS)
|
||||
#undef DEFINE_METHODS
|
||||
|
||||
static inline size_t quad_size(Quad *obj) {
|
||||
return sizeof(Quad);
|
||||
}
|
||||
|
||||
static inline void
|
||||
visit_quad_fields(Quad *quad,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
visit(gc_edge(&quad->kids[i]), heap, visit_data);
|
||||
}
|
||||
|
||||
#include "simple-gc-embedder.h"
|
||||
|
||||
#endif // QUADS_EMBEDDER_H
|
15
libguile/whippet/benchmarks/quads-types.h
Normal file
15
libguile/whippet/benchmarks/quads-types.h
Normal file
|
@ -0,0 +1,15 @@
|
|||
#ifndef QUADS_TYPES_H
|
||||
#define QUADS_TYPES_H
|
||||
|
||||
#define FOR_EACH_HEAP_OBJECT_KIND(M) \
|
||||
M(quad, Quad, QUAD)
|
||||
|
||||
#include "heap-objects.h"
|
||||
#include "simple-tagging-scheme.h"
|
||||
|
||||
struct Quad {
|
||||
struct gc_header header;
|
||||
struct Quad *kids[4];
|
||||
};
|
||||
|
||||
#endif // QUADS_TYPES_H
|
181
libguile/whippet/benchmarks/quads.c
Normal file
181
libguile/whippet/benchmarks/quads.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "gc-api.h"
|
||||
#include "gc-basic-stats.h"
|
||||
#include "simple-roots-api.h"
|
||||
#include "quads-types.h"
|
||||
#include "simple-allocator.h"
|
||||
|
||||
typedef HANDLE_TO(Quad) QuadHandle;
|
||||
|
||||
static Quad* allocate_quad(struct gc_mutator *mut) {
|
||||
// memset to 0 by the collector.
|
||||
return gc_allocate_with_kind(mut, ALLOC_KIND_QUAD, sizeof (Quad));
|
||||
}
|
||||
|
||||
/* Get the current time in microseconds */
|
||||
static unsigned long current_time(void)
|
||||
{
|
||||
struct timeval t;
|
||||
if (gettimeofday(&t, NULL) == -1)
|
||||
return 0;
|
||||
return t.tv_sec * 1000 * 1000 + t.tv_usec;
|
||||
}
|
||||
|
||||
struct thread {
|
||||
struct gc_mutator *mut;
|
||||
struct gc_mutator_roots roots;
|
||||
size_t counter;
|
||||
};
|
||||
|
||||
// Build tree bottom-up
|
||||
static Quad* make_tree(struct thread *t, int depth) {
|
||||
if (depth<=0) {
|
||||
return allocate_quad(t->mut);
|
||||
} else {
|
||||
QuadHandle kids[4] = { { NULL }, };
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
HANDLE_SET(kids[i], make_tree(t, depth-1));
|
||||
PUSH_HANDLE(t, kids[i]);
|
||||
}
|
||||
|
||||
Quad *result = allocate_quad(t->mut);
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
result->kids[i] = HANDLE_REF(kids[i]);
|
||||
|
||||
for (size_t i = 0; i < 4; i++)
|
||||
POP_HANDLE(t);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static void validate_tree(Quad *tree, int depth) {
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
if (depth == 0) {
|
||||
if (tree->kids[i])
|
||||
abort();
|
||||
} else {
|
||||
if (!tree->kids[i])
|
||||
abort();
|
||||
validate_tree(tree->kids[i], depth - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void print_elapsed(const char *what, unsigned long start) {
|
||||
unsigned long end = current_time();
|
||||
unsigned long msec = (end - start) / 1000;
|
||||
unsigned long usec = (end - start) % 1000;
|
||||
printf("Completed %s in %lu.%.3lu msec\n", what, msec, usec);
|
||||
}
|
||||
|
||||
static size_t parse_size(char *arg, const char *what) {
|
||||
long val = atol(arg);
|
||||
if (val <= 0) {
|
||||
fprintf(stderr, "Failed to parse %s '%s'\n", what, arg);
|
||||
exit(1);
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static size_t tree_size(size_t depth) {
|
||||
size_t nquads = 0;
|
||||
size_t leaf_count = 1;
|
||||
for (size_t i = 0; i <= depth; i++) {
|
||||
if (nquads > ((size_t)-1) - leaf_count) {
|
||||
fprintf(stderr,
|
||||
"error: address space too small for quad tree of depth %zu\n",
|
||||
depth);
|
||||
exit(1);
|
||||
}
|
||||
nquads += leaf_count;
|
||||
leaf_count *= 4;
|
||||
}
|
||||
return nquads;
|
||||
}
|
||||
|
||||
#define MAX_THREAD_COUNT 256
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 3 || 4 < argc) {
|
||||
fprintf(stderr, "usage: %s DEPTH MULTIPLIER [GC-OPTIONS]\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t depth = parse_size(argv[1], "depth");
|
||||
double multiplier = atof(argv[2]);
|
||||
|
||||
if (!(1.0 < multiplier && multiplier < 100)) {
|
||||
fprintf(stderr, "Failed to parse heap multiplier '%s'\n", argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t nquads = tree_size(depth);
|
||||
size_t tree_bytes = nquads * sizeof(Quad);
|
||||
size_t heap_size = tree_bytes * multiplier;
|
||||
|
||||
printf("Allocating heap of %.3fGB (%.2f multiplier of live data).\n",
|
||||
heap_size / 1e9, multiplier);
|
||||
|
||||
struct gc_options *options = gc_allocate_options();
|
||||
gc_options_set_int(options, GC_OPTION_HEAP_SIZE_POLICY, GC_HEAP_SIZE_FIXED);
|
||||
gc_options_set_size(options, GC_OPTION_HEAP_SIZE, heap_size);
|
||||
if (argc == 4) {
|
||||
if (!gc_options_parse_and_set_many(options, argv[3])) {
|
||||
fprintf(stderr, "Failed to set GC options: '%s'\n", argv[3]);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator *mut;
|
||||
struct gc_basic_stats stats;
|
||||
if (!gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats)) {
|
||||
fprintf(stderr, "Failed to initialize GC with heap size %zu bytes\n",
|
||||
heap_size);
|
||||
return 1;
|
||||
}
|
||||
struct thread t = { mut, };
|
||||
gc_mutator_set_roots(mut, &t.roots);
|
||||
|
||||
QuadHandle quad = { NULL };
|
||||
|
||||
PUSH_HANDLE(&t, quad);
|
||||
|
||||
printf("Making quad tree of depth %zu (%zu nodes). Total size %.3fGB.\n",
|
||||
depth, nquads, (nquads * sizeof(Quad)) / 1e9);
|
||||
unsigned long start = current_time();
|
||||
HANDLE_SET(quad, make_tree(&t, depth));
|
||||
print_elapsed("construction", start);
|
||||
|
||||
validate_tree(HANDLE_REF(quad), depth);
|
||||
|
||||
size_t garbage_step = heap_size / 7.5;
|
||||
printf("Allocating %.3f GB of garbage, 20 times, validating live tree each time.\n",
|
||||
garbage_step / 1e9);
|
||||
unsigned long garbage_start = current_time();
|
||||
for (size_t i = 0; i < 20; i++) {
|
||||
size_t garbage_depth = 3;
|
||||
start = current_time();
|
||||
for (size_t i = garbage_step/(tree_size(garbage_depth)*4*sizeof(Quad*)); i; i--)
|
||||
make_tree(&t, garbage_depth);
|
||||
print_elapsed("allocating garbage", start);
|
||||
|
||||
start = current_time();
|
||||
validate_tree(HANDLE_REF(quad), depth);
|
||||
}
|
||||
print_elapsed("allocation loop", garbage_start);
|
||||
|
||||
gc_basic_stats_finish(&stats);
|
||||
fputs("\n", stdout);
|
||||
gc_basic_stats_print(&stats, stdout);
|
||||
|
||||
POP_HANDLE(&t);
|
||||
return 0;
|
||||
}
|
||||
|
21
libguile/whippet/benchmarks/simple-allocator.h
Normal file
21
libguile/whippet/benchmarks/simple-allocator.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
#ifndef SIMPLE_ALLOCATOR_H
|
||||
#define SIMPLE_ALLOCATOR_H
|
||||
|
||||
#include "simple-tagging-scheme.h"
|
||||
#include "gc-api.h"
|
||||
|
||||
static inline void*
|
||||
gc_allocate_with_kind(struct gc_mutator *mut, enum alloc_kind kind, size_t bytes) {
|
||||
void *obj = gc_allocate(mut, bytes, GC_ALLOCATION_TAGGED);
|
||||
*tag_word(gc_ref_from_heap_object(obj)) = tag_live(kind);
|
||||
return obj;
|
||||
}
|
||||
|
||||
static inline void*
|
||||
gc_allocate_pointerless_with_kind(struct gc_mutator *mut, enum alloc_kind kind, size_t bytes) {
|
||||
void *obj = gc_allocate(mut, bytes, GC_ALLOCATION_TAGGED_POINTERLESS);
|
||||
*tag_word(gc_ref_from_heap_object(obj)) = tag_live(kind);
|
||||
return obj;
|
||||
}
|
||||
|
||||
#endif // SIMPLE_ALLOCATOR_H
|
183
libguile/whippet/benchmarks/simple-gc-embedder.h
Normal file
183
libguile/whippet/benchmarks/simple-gc-embedder.h
Normal file
|
@ -0,0 +1,183 @@
|
|||
#include <stdatomic.h>
|
||||
|
||||
#include "simple-tagging-scheme.h"
|
||||
#include "simple-roots-types.h"
|
||||
#include "gc-config.h"
|
||||
#include "gc-embedder-api.h"
|
||||
|
||||
#define GC_EMBEDDER_EPHEMERON_HEADER struct gc_header header;
|
||||
#define GC_EMBEDDER_FINALIZER_HEADER struct gc_header header;
|
||||
|
||||
static inline size_t gc_finalizer_priority_count(void) { return 2; }
|
||||
|
||||
static inline int
|
||||
gc_is_valid_conservative_ref_displacement(uintptr_t displacement) {
|
||||
#if GC_CONSERVATIVE_ROOTS || GC_CONSERVATIVE_TRACE
|
||||
// Here is where you would allow tagged heap object references.
|
||||
return displacement == 0;
|
||||
#else
|
||||
// Shouldn't get here.
|
||||
GC_CRASH();
|
||||
#endif
|
||||
}
|
||||
|
||||
// No external objects in simple benchmarks.
|
||||
static inline int gc_extern_space_visit(struct gc_extern_space *space,
|
||||
struct gc_edge edge,
|
||||
struct gc_ref ref) {
|
||||
GC_CRASH();
|
||||
}
|
||||
static inline void gc_extern_space_start_gc(struct gc_extern_space *space,
|
||||
int is_minor_gc) {
|
||||
}
|
||||
static inline void gc_extern_space_finish_gc(struct gc_extern_space *space,
|
||||
int is_minor_gc) {
|
||||
}
|
||||
|
||||
static inline void gc_trace_object(struct gc_ref ref,
|
||||
void (*trace_edge)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *trace_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data,
|
||||
size_t *size) {
|
||||
#if GC_CONSERVATIVE_TRACE
|
||||
// Shouldn't get here.
|
||||
GC_CRASH();
|
||||
#else
|
||||
switch (tag_live_alloc_kind(*tag_word(ref))) {
|
||||
#define SCAN_OBJECT(name, Name, NAME) \
|
||||
case ALLOC_KIND_##NAME: \
|
||||
if (trace_edge) \
|
||||
visit_##name##_fields(gc_ref_heap_object(ref), trace_edge, \
|
||||
heap, trace_data); \
|
||||
if (size) \
|
||||
*size = name##_size(gc_ref_heap_object(ref)); \
|
||||
break;
|
||||
FOR_EACH_HEAP_OBJECT_KIND(SCAN_OBJECT)
|
||||
#undef SCAN_OBJECT
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void visit_roots(struct handle *roots,
|
||||
void (*trace_edge)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *trace_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data) {
|
||||
for (struct handle *h = roots; h; h = h->next)
|
||||
trace_edge(gc_edge(&h->v), heap, trace_data);
|
||||
}
|
||||
|
||||
static inline void gc_trace_mutator_roots(struct gc_mutator_roots *roots,
|
||||
void (*trace_edge)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *trace_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data) {
|
||||
if (roots)
|
||||
visit_roots(roots->roots, trace_edge, heap, trace_data);
|
||||
}
|
||||
|
||||
static inline void gc_trace_heap_roots(struct gc_heap_roots *roots,
|
||||
void (*trace_edge)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *trace_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data) {
|
||||
if (roots)
|
||||
visit_roots(roots->roots, trace_edge, heap, trace_data);
|
||||
}
|
||||
|
||||
static inline uintptr_t gc_object_forwarded_nonatomic(struct gc_ref ref) {
|
||||
uintptr_t tag = *tag_word(ref);
|
||||
return (tag & gcobj_not_forwarded_bit) ? 0 : tag;
|
||||
}
|
||||
|
||||
static inline void gc_object_forward_nonatomic(struct gc_ref ref,
|
||||
struct gc_ref new_ref) {
|
||||
*tag_word(ref) = gc_ref_value(new_ref);
|
||||
}
|
||||
|
||||
static inline struct gc_atomic_forward
|
||||
gc_atomic_forward_begin(struct gc_ref ref) {
|
||||
uintptr_t tag = atomic_load_explicit(tag_word(ref), memory_order_acquire);
|
||||
enum gc_forwarding_state state;
|
||||
if (tag == gcobj_busy)
|
||||
state = GC_FORWARDING_STATE_BUSY;
|
||||
else if (tag & gcobj_not_forwarded_bit)
|
||||
state = GC_FORWARDING_STATE_NOT_FORWARDED;
|
||||
else
|
||||
state = GC_FORWARDING_STATE_FORWARDED;
|
||||
return (struct gc_atomic_forward){ ref, tag, state };
|
||||
}
|
||||
|
||||
static inline int
|
||||
gc_atomic_forward_retry_busy(struct gc_atomic_forward *fwd) {
|
||||
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_BUSY);
|
||||
uintptr_t tag = atomic_load_explicit(tag_word(fwd->ref),
|
||||
memory_order_acquire);
|
||||
if (tag == gcobj_busy)
|
||||
return 0;
|
||||
if (tag & gcobj_not_forwarded_bit) {
|
||||
fwd->state = GC_FORWARDING_STATE_NOT_FORWARDED;
|
||||
fwd->data = tag;
|
||||
} else {
|
||||
fwd->state = GC_FORWARDING_STATE_FORWARDED;
|
||||
fwd->data = tag;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_atomic_forward_acquire(struct gc_atomic_forward *fwd) {
|
||||
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_NOT_FORWARDED);
|
||||
if (atomic_compare_exchange_strong(tag_word(fwd->ref), &fwd->data,
|
||||
gcobj_busy))
|
||||
fwd->state = GC_FORWARDING_STATE_ACQUIRED;
|
||||
else if (fwd->data == gcobj_busy)
|
||||
fwd->state = GC_FORWARDING_STATE_BUSY;
|
||||
else {
|
||||
GC_ASSERT((fwd->data & gcobj_not_forwarded_bit) == 0);
|
||||
fwd->state = GC_FORWARDING_STATE_FORWARDED;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_atomic_forward_abort(struct gc_atomic_forward *fwd) {
|
||||
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
|
||||
atomic_store_explicit(tag_word(fwd->ref), fwd->data, memory_order_release);
|
||||
fwd->state = GC_FORWARDING_STATE_NOT_FORWARDED;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
gc_atomic_forward_object_size(struct gc_atomic_forward *fwd) {
|
||||
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
|
||||
switch (tag_live_alloc_kind(fwd->data)) {
|
||||
#define OBJECT_SIZE(name, Name, NAME) \
|
||||
case ALLOC_KIND_##NAME: \
|
||||
return name##_size(gc_ref_heap_object(fwd->ref));
|
||||
FOR_EACH_HEAP_OBJECT_KIND(OBJECT_SIZE)
|
||||
#undef OBJECT_SIZE
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_atomic_forward_commit(struct gc_atomic_forward *fwd, struct gc_ref new_ref) {
|
||||
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_ACQUIRED);
|
||||
*tag_word(new_ref) = fwd->data;
|
||||
atomic_store_explicit(tag_word(fwd->ref), gc_ref_value(new_ref),
|
||||
memory_order_release);
|
||||
fwd->state = GC_FORWARDING_STATE_FORWARDED;
|
||||
}
|
||||
|
||||
static inline uintptr_t
|
||||
gc_atomic_forward_address(struct gc_atomic_forward *fwd) {
|
||||
GC_ASSERT(fwd->state == GC_FORWARDING_STATE_FORWARDED);
|
||||
return fwd->data;
|
||||
}
|
26
libguile/whippet/benchmarks/simple-roots-api.h
Normal file
26
libguile/whippet/benchmarks/simple-roots-api.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
#ifndef SIMPLE_ROOTS_API_H
|
||||
#define SIMPLE_ROOTS_API_H
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "simple-roots-types.h"
|
||||
|
||||
#define HANDLE_TO(T) union { T* v; struct handle handle; }
|
||||
#define HANDLE_LOC(h) &(h).v
|
||||
#define HANDLE_REF(h) (h).v
|
||||
#define HANDLE_SET(h,val) do { (h).v = val; } while (0)
|
||||
#define PUSH_HANDLE(cx, h) push_handle(&(cx)->roots.roots, &h.handle)
|
||||
#define POP_HANDLE(cx) pop_handle(&(cx)->roots.roots)
|
||||
|
||||
static inline void push_handle(struct handle **roots, struct handle *handle) {
|
||||
if (GC_PRECISE_ROOTS) {
|
||||
handle->next = *roots;
|
||||
*roots = handle;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void pop_handle(struct handle **roots) {
|
||||
if (GC_PRECISE_ROOTS)
|
||||
*roots = (*roots)->next;
|
||||
}
|
||||
|
||||
#endif // SIMPLE_ROOTS_API_H
|
17
libguile/whippet/benchmarks/simple-roots-types.h
Normal file
17
libguile/whippet/benchmarks/simple-roots-types.h
Normal file
|
@ -0,0 +1,17 @@
|
|||
#ifndef SIMPLE_ROOTS_TYPES_H
|
||||
#define SIMPLE_ROOTS_TYPES_H
|
||||
|
||||
struct handle {
|
||||
void *v;
|
||||
struct handle *next;
|
||||
};
|
||||
|
||||
struct gc_heap_roots {
|
||||
struct handle *roots;
|
||||
};
|
||||
|
||||
struct gc_mutator_roots {
|
||||
struct handle *roots;
|
||||
};
|
||||
|
||||
#endif // SIMPLE_ROOTS_TYPES_H
|
29
libguile/whippet/benchmarks/simple-tagging-scheme.h
Normal file
29
libguile/whippet/benchmarks/simple-tagging-scheme.h
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef SIMPLE_TAGGING_SCHEME_H
|
||||
#define SIMPLE_TAGGING_SCHEME_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
struct gc_header {
|
||||
uintptr_t tag;
|
||||
};
|
||||
|
||||
// Alloc kind is in bits 1-7, for live objects.
|
||||
static const uintptr_t gcobj_alloc_kind_mask = 0x7f;
|
||||
static const uintptr_t gcobj_alloc_kind_shift = 1;
|
||||
static const uintptr_t gcobj_forwarded_mask = 0x1;
|
||||
static const uintptr_t gcobj_not_forwarded_bit = 0x1;
|
||||
static const uintptr_t gcobj_busy = 0;
|
||||
static inline uint8_t tag_live_alloc_kind(uintptr_t tag) {
|
||||
return (tag >> gcobj_alloc_kind_shift) & gcobj_alloc_kind_mask;
|
||||
}
|
||||
static inline uintptr_t tag_live(uint8_t alloc_kind) {
|
||||
return ((uintptr_t)alloc_kind << gcobj_alloc_kind_shift)
|
||||
| gcobj_not_forwarded_bit;
|
||||
}
|
||||
|
||||
static inline uintptr_t* tag_word(struct gc_ref ref) {
|
||||
struct gc_header *header = gc_ref_heap_object(ref);
|
||||
return &header->tag;
|
||||
}
|
||||
|
||||
#endif // SIMPLE_TAGGING_SCHEME_H
|
160
libguile/whippet/ctf_to_json.py
Executable file
160
libguile/whippet/ctf_to_json.py
Executable file
|
@ -0,0 +1,160 @@
|
|||
#!/usr/bin/env python3
|
||||
# Any copyright is dedicated to the Public Domain.
|
||||
# https://creativecommons.org/publicdomain/zero/1.0/
|
||||
#
|
||||
# Originally written by Andy Wingo <wingo@igalia.com>.
|
||||
|
||||
import bt2 # From the babeltrace2 package.
|
||||
import sys
|
||||
import json
|
||||
from enum import Enum
|
||||
|
||||
# Usage: ./ctf_to_json.py ~/lttng-traces/name-of-your-trace > foo.json
|
||||
#
|
||||
# Convert a Common Trace Format (CTF) trace, for example as produced by
|
||||
# LTTng, to the JSON-based Trace Event Format (TEF), for example as
|
||||
# consumed by `chrome://tracing`, `https://ui.perfetto.dev/`, or
|
||||
# `https://profiler.firefox.com`.
|
||||
|
||||
# The Trace Event Format is documented here:
|
||||
#
|
||||
# https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0
|
||||
|
||||
# By default, events are emitted as EventPhase.INSTANT. We also support
|
||||
# rewriting the event stream so as to generate EventPhase.BEGIN /
|
||||
# EventPhase.END events for specific named events.
|
||||
|
||||
synthetic_events = {
|
||||
'gc': ['whippet:mutator_cause_gc',
|
||||
'whippet:restarting_mutators'],
|
||||
'stop-the-world': ['whippet:requesting_stop',
|
||||
'whippet:mutators_stopped'],
|
||||
'trace': ['whippet:prepare_gc',
|
||||
'whippet:restarting_mutators'],
|
||||
'mutator-stopped': ['whippet:mutator_stopping',
|
||||
'whippet:mutator_restarted'],
|
||||
'trace-roots': ['whippet:trace_roots_begin',
|
||||
'whippet:trace_roots_end'],
|
||||
'trace-check-termination': ['whippet:trace_check_termination_begin',
|
||||
'whippet:trace_check_termination_end'],
|
||||
'trace-objects': ['whippet:trace_objects_begin',
|
||||
'whippet:trace_objects_end'],
|
||||
'trace-worker': ['whippet:trace_worker_begin',
|
||||
'whippet:trace_worker_end']
|
||||
}
|
||||
|
||||
class EventPhase(Enum):
|
||||
BEGIN = 'B'
|
||||
END = 'E'
|
||||
COMPLETE = 'X'
|
||||
INSTANT = 'i'
|
||||
COUNTER = 'C'
|
||||
NESTABLE_START = 'b'
|
||||
NESTABLE_INSTANT = 'n'
|
||||
NESTABLE_END = 'e'
|
||||
FLOW_START = 's'
|
||||
FLOW_STEP = 't'
|
||||
FLOW_END = 'f'
|
||||
SAMPLE = 'P'
|
||||
OBJECT_CREATED = 'N'
|
||||
OBJECT_SNAPSHOT = 'O'
|
||||
OBJECT_DESTROYED = 'D'
|
||||
METADATA = 'M'
|
||||
MEMORY_DUMP_GLOBAL = 'V'
|
||||
MEMORY_DUMP_PROCESS = 'V'
|
||||
MARK = 'R'
|
||||
CLOCK_SYNC = 'c'
|
||||
CONTEXT_BEGIN = '('
|
||||
CONTEXT_END = ')'
|
||||
|
||||
base_time = None
|
||||
def event_us(msg):
|
||||
assert(msg.default_clock_snapshot.clock_class.name == 'monotonic')
|
||||
assert(msg.default_clock_snapshot.clock_class.frequency == 1e9)
|
||||
global base_time
|
||||
ns = msg.default_clock_snapshot.value
|
||||
if base_time is None:
|
||||
base_time = ns
|
||||
return (ns - base_time) * 1e-3
|
||||
|
||||
def lower(x):
|
||||
if isinstance(x, str) or isinstance(x, int) or isinstance(x, float):
|
||||
return x
|
||||
if isinstance(x, dict) or isinstance(x, bt2._StructureFieldConst):
|
||||
return {lower(k):lower(v) for k, v in x.items()}
|
||||
if isinstance(x, bt2._BoolValueConst) or isinstance(x, bt2._BoolFieldConst):
|
||||
return bool(x)
|
||||
if isinstance(x, bt2._EnumerationFieldConst):
|
||||
return repr(x)
|
||||
if isinstance(x, bt2._IntegerValueConst) or isinstance(x, bt2._IntegerFieldConst):
|
||||
return int(x)
|
||||
if isinstance(x, bt2._RealValueConst) or isinstance(x, bt2._RealFieldConst):
|
||||
return float(x)
|
||||
if isinstance(x, bt2._StringValueConst) or isinstance(x, bt2._StringFieldConst):
|
||||
return str(x)
|
||||
raise ValueError("Unexpected value from trace", x)
|
||||
|
||||
# Specific Whippet events.
|
||||
synthetic_begin = {}
|
||||
synthetic_end = {}
|
||||
for synthetic, [begin, end] in synthetic_events.items():
|
||||
synthetic_begin[begin] = []
|
||||
synthetic_end[end] = []
|
||||
for synthetic, [begin, end] in synthetic_events.items():
|
||||
synthetic_begin[begin].append(synthetic)
|
||||
synthetic_end[end].append(synthetic)
|
||||
|
||||
def put(str):
|
||||
sys.stdout.write(str)
|
||||
|
||||
need_comma = False
|
||||
def print_event(ev):
|
||||
global need_comma
|
||||
if need_comma:
|
||||
sys.stdout.write(',\n ')
|
||||
else:
|
||||
need_comma = True
|
||||
# It appears to be faster to make a string, then print the string,
|
||||
# than to call json.dump with a file object.
|
||||
# json.dump(ev, sys.stdout, ensure_ascii=False, check_circular=False)
|
||||
put(json.dumps(ev, ensure_ascii=False, check_circular=False))
|
||||
|
||||
def emit_event(msg, name, phase):
|
||||
ev = {'name': name,
|
||||
'cat': 'whippet',
|
||||
'ph': phase.value,
|
||||
'ts': event_us(msg),
|
||||
'pid': lower(msg.event.common_context_field['vpid']),
|
||||
'tid': lower(msg.event.common_context_field['vtid']),
|
||||
'args': lower(msg.event.payload_field)}
|
||||
print_event(ev)
|
||||
def emit_begin_event(msg, name):
|
||||
emit_event(msg, name, EventPhase.BEGIN)
|
||||
def emit_end_event(msg, name):
|
||||
emit_event(msg, name, EventPhase.END)
|
||||
|
||||
def emit_events(msg):
|
||||
emit_event(msg, msg.event.name, EventPhase.INSTANT)
|
||||
for begin in synthetic_begin.get(msg.event.name, []):
|
||||
emit_begin_event(msg, begin)
|
||||
for end in synthetic_end.get(msg.event.name, []):
|
||||
emit_end_event(msg, end)
|
||||
|
||||
def ctf_to_json(path):
|
||||
msg_it = bt2.TraceCollectionMessageIterator(path)
|
||||
put('{\n')
|
||||
put(' "traceEvents": [\n ')
|
||||
for msg in msg_it:
|
||||
if hasattr(msg, 'event'):
|
||||
emit_events(msg)
|
||||
put('\n')
|
||||
put('\n ],\n')
|
||||
put(' "displayTimeUnit": "ns"\n')
|
||||
put('}\n')
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
sys.stderr.write(
|
||||
'usage: ' + sys.argv[0] + ' ~/lttng-traces/name-of-your-trace\n')
|
||||
sys.exit(1)
|
||||
else:
|
||||
ctf_to_json(sys.argv[1])
|
13
libguile/whippet/doc/README.md
Normal file
13
libguile/whippet/doc/README.md
Normal file
|
@ -0,0 +1,13 @@
|
|||
# Whippet documentation
|
||||
|
||||
* [Manual](./manual.md): How do you get your program to use
|
||||
Whippet? What is the API?
|
||||
|
||||
* [Collector implementations](./collectors.md): There are a number of
|
||||
implementations of the Whippet API with differing performance
|
||||
characteristics and which impose different requirements on the
|
||||
embedder.
|
||||
|
||||
* [Guile](./guile.md): Some notes on a potential rebase of Guile on
|
||||
top of Whippet.
|
||||
|
26
libguile/whippet/doc/collector-bdw.md
Normal file
26
libguile/whippet/doc/collector-bdw.md
Normal file
|
@ -0,0 +1,26 @@
|
|||
# Boehm-Demers-Weiser collector
|
||||
|
||||
Whippet's `bdw` collector is backed by a third-party garbage collector,
|
||||
the [Boehm-Demers-Weiser collector](https://github.com/ivmai/bdwgc).
|
||||
|
||||
BDW-GC is a mark-sweep collector with conservative root-finding,
|
||||
conservative heap tracing, and parallel tracing.
|
||||
|
||||
Whereas the other Whippet collectors which rely on mutators to
|
||||
[periodically check if they need to
|
||||
stop](https://github.com/wingo/whippet/blob/main/doc/manual.md#safepoints),
|
||||
`bdw` will stop mutators with a POSIX signal. Also, it doesn't really
|
||||
support ephemerons (the Whippet `bdw` collector simulates them using
|
||||
finalizers), and both ephemerons and finalizers only approximate the
|
||||
Whippet behavior, because they are implemented in terms of what BDW-GC
|
||||
provides.
|
||||
|
||||
`bdw` supports the `fixed` and `growable` heap-sizing policies, but not
|
||||
`adaptive`, as BDW-GC can't reliably return memory to the OS. Also,
|
||||
[`growable` has an effective limit of a 3x heap
|
||||
multiplier](https://github.com/wingo/whippet/blob/main/src/bdw.c#L478).
|
||||
Oh well!
|
||||
|
||||
It's a bit of an oddball from a Whippet perspective, but useful as a
|
||||
migration path if you have an embedder that is already using BDW-GC.
|
||||
And, it is a useful performance comparison.
|
148
libguile/whippet/doc/collector-mmc.md
Normal file
148
libguile/whippet/doc/collector-mmc.md
Normal file
|
@ -0,0 +1,148 @@
|
|||
# Mostly-marking collector
|
||||
|
||||
The `mmc` collector is mainly a mark-region collector, inspired by
|
||||
[Immix](http://users.cecs.anu.edu.au/~steveb/pubs/papers/immix-pldi-2008.pdf).
|
||||
To a first approximation, `mmc` is a whole-heap Immix collector with a
|
||||
large object space on the side.
|
||||
|
||||
When tracing, `mmc` mostly marks objects in place. If the heap is
|
||||
too fragmented, it can compact the heap by choosing to evacuate
|
||||
sparsely-populated heap blocks instead of marking in place. However
|
||||
evacuation is strictly optional, which means that `mmc` is also
|
||||
compatible with conservative root-finding, making it a good replacement
|
||||
for embedders that currently use the [Boehm-Demers-Weiser
|
||||
collector](./collector-bdw.md).
|
||||
|
||||
## Differences from Immix
|
||||
|
||||
The original Immix divides the heap into 32kB blocks, and then divides
|
||||
those blocks into 128B lines. An Immix allocation can span lines but
|
||||
not blocks; allocations larger than 8kB go into a separate large object
|
||||
space. Mutators request blocks from the global store and allocate into
|
||||
those blocks using bump-pointer allocation. When all blocks are
|
||||
consumed, Immix stops the world and traces the object graph, marking
|
||||
objects but also the lines that objects are on. After marking, blocks
|
||||
contain some lines with live objects and others that are completely
|
||||
free. Spans of free lines are called holes. When a mutator gets a
|
||||
recycled block from the global block store, it allocates into those
|
||||
holes. For an exposition of Immix, see the lovely detailed [Rust
|
||||
implementation](http://users.cecs.anu.edu.au/~steveb/pubs/papers/rust-ismm-2016.pdf).
|
||||
|
||||
The essential difference of `mmc` from Immix stems from a simple
|
||||
observation: Immix needs a side table of line mark bytes and also a mark
|
||||
bit or bits in each object (or in a side table). But if instead you
|
||||
choose to store mark bytes instead of bits (for concurrency reasons) in
|
||||
a side table, with one mark byte per granule (unit of allocation,
|
||||
perhaps 16 bytes), then you effectively have a line mark table where the
|
||||
granule size is the line size. You can bump-pointer allocate into holes
|
||||
in the mark byte table.
|
||||
|
||||
You might think this is a bad tradeoff, and perhaps it is: I don't know
|
||||
yet. If your granule size is two pointers, then one mark byte per
|
||||
granule is 6.25% overhead on 64-bit, or 12.5% on 32-bit. Especially on
|
||||
32-bit, it's a lot! On the other hand, instead of the worst case of one
|
||||
survivor object wasting a line (or two, in the case of conservative line
|
||||
marking), granule-size-is-line-size instead wastes nothing. Also, you
|
||||
don't need GC bits in the object itself, and you can use the mark byte
|
||||
array to record the object end, so that finding holes in a block can
|
||||
just read the mark table and can avoid looking at object memory.
|
||||
|
||||
## Optional features
|
||||
|
||||
The `mmc` collector has a few feature flags that can be turned on or
|
||||
off. If you use the [standard embedder makefile include](../embed.mk),
|
||||
then there is a name for each combination of features: `mmc` has no
|
||||
additional features, `parallel-mmc` enables parallel marking,
|
||||
`parallel-generational-mmc` enables generations,
|
||||
`stack-conservative-parallel-generational-mmc` uses conservative
|
||||
root-finding, and `heap-conservative-parallel-generational-mmc`
|
||||
additionally traces the heap conservatively. You can leave off
|
||||
components of the name to get a collector without those features.
|
||||
Underneath this corresponds to some pre-processor definitions passed to
|
||||
the compiler on the command line.
|
||||
|
||||
### Generations
|
||||
|
||||
`mmc` supports generational tracing via the [sticky mark-bit
|
||||
algorithm](https://wingolog.org/archives/2022/10/22/the-sticky-mark-bit-algorithm).
|
||||
This requires that the embedder emit [write
|
||||
barriers](https://github.com/wingo/whippet/blob/main/doc/manual.md#write-barriers);
|
||||
if your embedder cannot ensure write barriers are always invoked, then
|
||||
generational collection is not for you. (We could perhaps relax this a
|
||||
bit, following what [Ruby developers
|
||||
did](http://rvm.jp/~ko1/activities/rgengc_ismm.pdf).)
|
||||
|
||||
The write barrier is currently a card-marking barrier emitted on stores,
|
||||
with one card byte per 256 object bytes, where the card location can be
|
||||
computed from the object address because blocks are allocated in
|
||||
two-megabyte aligned slabs.
|
||||
|
||||
### Parallel tracing
|
||||
|
||||
You almost certainly want this on! `parallel-mmc` uses a the
|
||||
[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h).
|
||||
Each trace worker maintains a [local queue of objects that need
|
||||
tracing](../src/local-worklist.h), which currently has a capacity of
|
||||
1024 entries. If the local queue becomes full, the worker will publish
|
||||
3/4 of those entries to the worker's [shared
|
||||
worklist](../src/shared-worklist.h). When a worker runs out of local
|
||||
work, it will first try to remove work from its own shared worklist,
|
||||
then will try to steal from other workers.
|
||||
|
||||
The memory used for the external worklist is dynamically allocated from
|
||||
the OS and is not currently counted as contributing to the heap size.
|
||||
If you absolutely need to avoid dynamic allocation during GC, `mmc`
|
||||
(even `serial-mmc`) would need some work for your use case, to allocate
|
||||
a fixed-size space for a marking queue and to gracefully handle mark
|
||||
queue overflow.
|
||||
|
||||
### Conservative stack scanning
|
||||
|
||||
With `semi` and `pcc`, embedders must precisely enumerate the set of
|
||||
*roots*: the edges into the heap from outside. Commonly, roots include
|
||||
global variables, as well as working variables from each mutator's
|
||||
stack. `mmc` can optionally mark mutator stacks *conservatively*:
|
||||
treating each word on the stack as if it may be an object reference, and
|
||||
marking any object at that address.
|
||||
|
||||
After all these years, *whether* to mark stacks conservatively or not is
|
||||
still an open research question. Conservative stack scanning can retain
|
||||
too much data if an integer is confused for an object reference and
|
||||
removes a layer of correctness-by-construction from a system. Sometimes
|
||||
conservative stack-scanning is required, for example if your embedder
|
||||
cannot enumerate roots precisely. But there are reasons to consider it
|
||||
even if you can do precise roots: conservative scanning removes the need
|
||||
for the compiler to produce a stack map to store the precise root
|
||||
enumeration at every safepoint; it removes the need to look up a stack
|
||||
map when tracing; and it allows C or C++ support code to avoid having to
|
||||
place roots in traceable locations published to the garbage collector.
|
||||
And the [performance question is still
|
||||
open](https://dl.acm.org/doi/10.1145/2660193.2660198).
|
||||
|
||||
Anyway. `mmc` can scan roots conservatively. Those roots are pinned
|
||||
for the collection; even if the collection will compact via evacuation,
|
||||
referents of conservative roots won't be moved. Objects not directly
|
||||
referenced by roots can be evacuated, however.
|
||||
|
||||
### Conservative heap scanning
|
||||
|
||||
In addition to stack and global references, the Boehm-Demers-Weiser
|
||||
collector scans heap objects conservatively as well, treating each word
|
||||
of each heap object as if it were a reference. `mmc` can do that, if
|
||||
the embedder is unable to provide a `gc_trace_object` implementation.
|
||||
However this is generally a performance lose, and it prevents
|
||||
evacuation.
|
||||
|
||||
## Other implementation tidbits
|
||||
|
||||
`mmc` does lazy sweeping: as a mutator grabs a fresh block, it
|
||||
reclaims memory that was unmarked in the previous collection before
|
||||
making the memory available for allocation. This makes sweeping
|
||||
naturally cache-friendly and parallel.
|
||||
|
||||
The mark byte array facilitates conservative collection by being an
|
||||
oracle for "does this address start an object".
|
||||
|
||||
For a detailed introduction, see [Whippet: Towards a new local
|
||||
maximum](https://wingolog.org/archives/2023/02/07/whippet-towards-a-new-local-maximum),
|
||||
a talk given at FOSDEM 2023.
|
84
libguile/whippet/doc/collector-pcc.md
Normal file
84
libguile/whippet/doc/collector-pcc.md
Normal file
|
@ -0,0 +1,84 @@
|
|||
# Parallel copying collector
|
||||
|
||||
Whippet's `pcc` collector is a copying collector, like the more simple
|
||||
[`semi`](./collector-semi.md), but supporting multiple mutator threads,
|
||||
multiple tracing threads, and using an external FIFO worklist instead of
|
||||
a Cheney worklist.
|
||||
|
||||
Like `semi`, `pcc` traces by evacuation: it moves all live objects on
|
||||
every collection. (Exception: objects larger than 8192 bytes are
|
||||
placed into a partitioned space which traces by marking in place instead
|
||||
of copying.) Evacuation requires precise roots, so if your embedder
|
||||
does not support precise roots, `pcc` is not for you.
|
||||
|
||||
Again like `semi`, `pcc` generally requires a heap size at least twice
|
||||
as large as the maximum live heap size, and performs best with ample
|
||||
heap sizes; between 3× and 5× is best.
|
||||
|
||||
Overall, `pcc` is a better version of `semi`. It should have broadly
|
||||
the same performance characteristics with a single mutator and with
|
||||
parallelism disabled, additionally allowing multiple mutators, and
|
||||
scaling better with multiple tracing threads.
|
||||
|
||||
`pcc` has a generational configuration, conventionally referred to as
|
||||
`generational-pcc`, in which both the nursery and the old generation are
|
||||
copy spaces. Objects stay in the nursery for one cycle before moving on
|
||||
to the old generation. This configuration is a bit new (January 2025)
|
||||
and still needs some tuning.
|
||||
|
||||
## Implementation notes
|
||||
|
||||
Unlike `semi` which has a single global bump-pointer allocation region,
|
||||
`pcc` structures the heap into 64-kB blocks. In this way it supports
|
||||
multiple mutator threads: mutators do local bump-pointer allocation into
|
||||
their own block, and when their block is full, they fetch another from
|
||||
the global store.
|
||||
|
||||
The block size is 64 kB, but really it's 128 kB, because each block has
|
||||
two halves: the active region and the copy reserve. Dividing each block
|
||||
in two allows the collector to easily grow and shrink the heap while
|
||||
ensuring there is always enough reserve space.
|
||||
|
||||
Blocks are allocated in 64-MB aligned slabs, so there are 512 blocks in
|
||||
a slab. The first block in a slab is used by the collector itself, to
|
||||
keep metadata for the rest of the blocks, for example a chain pointer
|
||||
allowing blocks to be collected in lists, a saved allocation pointer for
|
||||
partially-filled blocks, whether the block is paged in or out, and so
|
||||
on.
|
||||
|
||||
`pcc` supports tracing in parallel. This mechanism works somewhat like
|
||||
allocation, in which multiple trace workers compete to evacuate objects
|
||||
into their local allocation buffers; when an allocation buffer is full,
|
||||
the trace worker grabs another, just like mutators do.
|
||||
|
||||
Unlike the simple semi-space collector which uses a Cheney grey
|
||||
worklist, `pcc` uses an external worklist. If parallelism is disabled
|
||||
at compile-time, it uses a simple first-in, first-out queue of objects
|
||||
to be traced. Like a Cheney worklist, this should result in objects
|
||||
being copied in breadth-first order. The literature would suggest that
|
||||
depth-first is generally better for locality, but that preserving
|
||||
allocation order is generally best. This is something to experiment
|
||||
with in the future.
|
||||
|
||||
If parallelism is enabled, as it is by default, `pcc` uses a
|
||||
[fine-grained work-stealing parallel tracer](../src/parallel-tracer.h).
|
||||
Each trace worker maintains a [local queue of objects that need
|
||||
tracing](../src/local-worklist.h), which currently has 1024 entries. If
|
||||
the local queue becomes full, the worker will publish 3/4 of those
|
||||
entries to the worker's [shared worklist](../src/shared-worklist.h).
|
||||
When a worker runs out of local work, it will first try to remove work
|
||||
from its own shared worklist, then will try to steal from other workers.
|
||||
|
||||
If only one tracing thread is enabled at run-time (`parallelism=1`) (or
|
||||
if parallelism is disabled at compile-time), `pcc` will evacuate by
|
||||
non-atomic forwarding, but if multiple threads compete to evacuate
|
||||
objects, `pcc` uses [atomic compare-and-swap instead of simple
|
||||
forwarding pointer updates](./manual.md#forwarding-objects). This
|
||||
imposes around a ~30% performance penalty but having multiple tracing
|
||||
threads is generally worth it, unless the object graph is itself serial.
|
||||
|
||||
The memory used for the external worklist is dynamically allocated from
|
||||
the OS and is not currently counted as contributing to the heap size.
|
||||
If you are targetting a microcontroller or something, probably you need
|
||||
to choose a different kind of collector that never dynamically
|
||||
allocates, such as `semi`.
|
23
libguile/whippet/doc/collector-semi.md
Normal file
23
libguile/whippet/doc/collector-semi.md
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Semi-space collector
|
||||
|
||||
The `semi` collector is simple. It is mostly useful as a first
|
||||
collector to try out, to make sure that a mutator correctly records all
|
||||
roots: because `semi` moves every live object on every collection, it is
|
||||
very effective at shaking out mutator bugs.
|
||||
|
||||
If your embedder chooses to not precisely record roots, for example
|
||||
instead choosing to conservatively scan the stack, then the semi-space
|
||||
collector is not for you: `semi` requires precise roots.
|
||||
|
||||
For more on semi-space collectors, see
|
||||
https://wingolog.org/archives/2022/12/10/a-simple-semi-space-collector.
|
||||
|
||||
Whippet's `semi` collector incorporates a large-object space, which
|
||||
marks objects in place instead of moving. Otherwise, `semi` generally
|
||||
requires a heap size at least twice as large as the maximum live heap
|
||||
size, and performs best with ample heap sizes; between 3× and 5× is
|
||||
best.
|
||||
|
||||
The semi-space collector doesn't support multiple mutator threads. If
|
||||
you want a copying collector for a multi-threaded mutator, look at
|
||||
[pcc](./collector-pcc.md).
|
43
libguile/whippet/doc/collectors.md
Normal file
43
libguile/whippet/doc/collectors.md
Normal file
|
@ -0,0 +1,43 @@
|
|||
# Whippet collectors
|
||||
|
||||
Whippet has four collectors currently:
|
||||
- [Semi-space collector (`semi`)](./collector-semi.md): For
|
||||
single-threaded embedders who are not too tight on memory.
|
||||
- [Parallel copying collector (`pcc`)](./collector-pcc.md): Like
|
||||
`semi`, but with support for multiple mutator and tracing threads and
|
||||
generational collection.
|
||||
- [Mostly marking collector (`mmc`)](./collector-mmc.md):
|
||||
Immix-inspired collector. Optionally parallel, conservative (stack
|
||||
and/or heap), and/or generational.
|
||||
- [Boehm-Demers-Weiser collector (`bdw`)](./collector-bdw.md):
|
||||
Conservative mark-sweep collector, implemented by
|
||||
Boehm-Demers-Weiser library.
|
||||
|
||||
## How to choose?
|
||||
|
||||
If you are migrating an embedder off BDW-GC, then it could be reasonable
|
||||
to first go to `bdw`, then `stack-conservative-parallel-mmc`.
|
||||
|
||||
If you have an embedder with precise roots, use `pcc`. That will shake
|
||||
out mutator/embedder bugs. Then if memory is tight, switch to
|
||||
`parallel-mmc`, possibly `parallel-generational-mmc`.
|
||||
|
||||
If you are aiming for maximum simplicity and minimal code size (ten
|
||||
kilobytes or so), use `semi`.
|
||||
|
||||
If you are writing a new project, you have a choice as to whether to pay
|
||||
the development cost of precise roots or not. If you choose to not have
|
||||
precise roots, then go for `stack-conservative-parallel-mmc` directly.
|
||||
|
||||
## More collectors
|
||||
|
||||
It would be nice to have a generational GC that uses the space from
|
||||
`parallel-mmc` for the old generation but a pcc-style copying nursery.
|
||||
We have `generational-pcc` now, so this should be possible.
|
||||
|
||||
Support for concurrent marking in `mmc` would be good as well, perhaps
|
||||
with a SATB barrier. (Or, if you are the sort of person to bet on
|
||||
conservative stack scanning, perhaps a retreating-wavefront barrier
|
||||
would be more appropriate.)
|
||||
|
||||
Contributions are welcome, provided they have no more dependencies!
|
26
libguile/whippet/doc/guile.md
Normal file
26
libguile/whippet/doc/guile.md
Normal file
|
@ -0,0 +1,26 @@
|
|||
# Whippet and Guile
|
||||
|
||||
If the `mmc` collector works out, it could replace Guile's garbage
|
||||
collector. Guile currently uses BDW-GC. Guile has a widely used C API
|
||||
and implements part of its run-time in C. For this reason it may be
|
||||
infeasible to require precise enumeration of GC roots -- we may need to
|
||||
allow GC roots to be conservatively identified from data sections and
|
||||
from stacks. Such conservative roots would be pinned, but other objects
|
||||
can be moved by the collector if it chooses to do so. We assume that
|
||||
object references within a heap object can be precisely identified.
|
||||
(However, Guile currently uses BDW-GC in its default configuration,
|
||||
which scans for references conservatively even on the heap.)
|
||||
|
||||
The existing C API allows direct access to mutable object fields,
|
||||
without the mediation of read or write barriers. Therefore it may be
|
||||
impossible to switch to collector strategies that need barriers, such as
|
||||
generational or concurrent collectors. However, we shouldn't write off
|
||||
this possibility entirely; an ideal replacement for Guile's GC will
|
||||
offer the possibility of migration to other GC designs without imposing
|
||||
new requirements on C API users in the initial phase.
|
||||
|
||||
In this regard, the Whippet experiment also has the goal of identifying
|
||||
a smallish GC abstraction in Guile, so that we might consider evolving
|
||||
GC implementation in the future without too much pain. If we switch
|
||||
away from BDW-GC, we should be able to evaluate that it's a win for a
|
||||
large majority of use cases.
|
718
libguile/whippet/doc/manual.md
Normal file
718
libguile/whippet/doc/manual.md
Normal file
|
@ -0,0 +1,718 @@
|
|||
# Whippet user's guide
|
||||
|
||||
Whippet is an embed-only library: it should be copied into the source
|
||||
tree of the program that uses it. The program's build system needs to
|
||||
be wired up to compile Whippet, then link it into the program that uses
|
||||
it.
|
||||
|
||||
## Subtree merges
|
||||
|
||||
One way is get Whippet is just to manually copy the files present in a
|
||||
Whippet checkout into your project. However probably the best way is to
|
||||
perform a [subtree
|
||||
merge](https://docs.github.com/en/get-started/using-git/about-git-subtree-merges)
|
||||
of Whippet into your project's Git repository, so that you can easily
|
||||
update your copy of Whippet in the future.
|
||||
|
||||
Performing the first subtree merge is annoying and full of arcane
|
||||
incantations. Follow the [subtree merge
|
||||
page](https://docs.github.com/en/get-started/using-git/about-git-subtree-merges)
|
||||
for full details, but for a cheat sheet, you might do something like
|
||||
this to copy Whippet into the `whippet/` directory of your project root:
|
||||
|
||||
```
|
||||
git remote add whippet https://github.com/wingo/whippet
|
||||
git fetch whippet
|
||||
git merge -s ours --no-commit --allow-unrelated-histories whippet/main
|
||||
git read-tree --prefix=whippet/ -u whippet/main
|
||||
git commit -m 'Added initial Whippet merge'
|
||||
```
|
||||
|
||||
Then to later update your copy of whippet, assuming you still have the
|
||||
`whippet` remote, just do:
|
||||
|
||||
```
|
||||
git pull -s subtree whippet main
|
||||
```
|
||||
|
||||
## `gc-embedder-api.h`
|
||||
|
||||
To determine the live set of objects, a tracing garbage collector starts
|
||||
with a set of root objects, and then transitively visits all reachable
|
||||
object edges. Exactly how it goes about doing this depends on the
|
||||
program that is using the garbage collector; different programs will
|
||||
have different object representations, different strategies for
|
||||
recording roots, and so on.
|
||||
|
||||
To traverse the heap in a program-specific way but without imposing an
|
||||
abstraction overhead, Whippet requires that a number of data types and
|
||||
inline functions be implemented by the program, for use by Whippet
|
||||
itself. This is the *embedder API*, and this document describes what
|
||||
Whippet requires from a program.
|
||||
|
||||
A program should provide a header file implementing the API in
|
||||
[`gc-embedder-api.h`](../api/gc-embedder-api.h). This header should only be
|
||||
included when compiling Whippet itself; it is not part of the API that
|
||||
Whippet exposes to the program.
|
||||
|
||||
### Identifying roots
|
||||
|
||||
The collector uses two opaque struct types, `struct gc_mutator_roots`
|
||||
and `struct gc_heap_roots`, that are used by the program to record
|
||||
object roots. Probably you should put the definition of these data
|
||||
types in a separate header that is included both by Whippet, via the
|
||||
embedder API, and via users of Whippet, so that programs can populate
|
||||
the root set. In any case the embedder-API use of these structs is via
|
||||
`gc_trace_mutator_roots` and `gc_trace_heap_roots`, two functions that
|
||||
are passed a trace visitor function `trace_edge`, and which should call
|
||||
that function on all edges from a given mutator or heap. (Usually
|
||||
mutator roots are per-thread roots, such as from the stack, and heap
|
||||
roots are global roots.)
|
||||
|
||||
### Tracing objects
|
||||
|
||||
The `gc_trace_object` is responsible for calling the `trace_edge`
|
||||
visitor function on all outgoing edges in an object. It also includes a
|
||||
`size` out-parameter, for when the collector wants to measure the size
|
||||
of an object. `trace_edge` and `size` may be `NULL`, in which case no
|
||||
tracing or size computation should be performed.
|
||||
|
||||
### Tracing ephemerons and finalizers
|
||||
|
||||
Most kinds of GC-managed object are defined by the program, but the GC
|
||||
itself has support for two specific object kind: ephemerons and
|
||||
finalizers. If the program allocates ephemerons, it should trace them
|
||||
in the `gc_trace_object` function by calling `gc_trace_ephemeron` from
|
||||
[`gc-ephemerons.h`](../api/gc-ephemerons.h). Likewise if the program
|
||||
allocates finalizers, it should trace them by calling
|
||||
`gc_trace_finalizer` from [`gc-finalizer.h`](../api/gc-finalizer.h).
|
||||
|
||||
### Forwarding objects
|
||||
|
||||
When built with a collector that moves objects, the embedder must also
|
||||
allow for forwarding pointers to be installed in an object. There are
|
||||
two forwarding APIs: one that is atomic and one that isn't.
|
||||
|
||||
The nonatomic API is relatively simple; there is a
|
||||
`gc_object_forwarded_nonatomic` function that returns an embedded
|
||||
forwarding address, or 0 if the object is not yet forwarded, and
|
||||
`gc_object_forward_nonatomic`, which installs a forwarding pointer.
|
||||
|
||||
The atomic API is gnarly. It is used by parallel collectors, in which
|
||||
multiple collector threads can race to evacuate an object.
|
||||
|
||||
There is a state machine associated with the `gc_atomic_forward`
|
||||
structure from [`gc-forwarding.h`](../api/gc-forwarding.h); the embedder API
|
||||
implements the state changes. The collector calls
|
||||
`gc_atomic_forward_begin` on an object to begin a forwarding attempt,
|
||||
and the resulting `gc_atomic_forward` can be in the `NOT_FORWARDED`,
|
||||
`FORWARDED`, or `BUSY` state.
|
||||
|
||||
If the `gc_atomic_forward`'s state is `BUSY`, the collector will call
|
||||
`gc_atomic_forward_retry_busy`; a return value of 0 means the object is
|
||||
still busy, because another thread is attempting to forward it.
|
||||
Otherwise the forwarding state becomes either `FORWARDED`, if the other
|
||||
thread succeeded in forwarding it, or go back to `NOT_FORWARDED`,
|
||||
indicating that the other thread failed to forward it.
|
||||
|
||||
If the forwarding state is `FORWARDED`, the collector will call
|
||||
`gc_atomic_forward_address` to get the new address.
|
||||
|
||||
If the forwarding state is `NOT_FORWARDED`, the collector may begin a
|
||||
forwarding attempt by calling `gc_atomic_forward_acquire`. The
|
||||
resulting state is `ACQUIRED` on success, or `BUSY` if another thread
|
||||
acquired the object in the meantime, or `FORWARDED` if another thread
|
||||
acquired and completed the forwarding attempt.
|
||||
|
||||
An `ACQUIRED` object can then be forwarded via
|
||||
`gc_atomic_forward_commit`, or the forwarding attempt can be aborted via
|
||||
`gc_atomic_forward_abort`. Also, when an object is acquired, the
|
||||
collector may call `gc_atomic_forward_object_size` to compute how many
|
||||
bytes to copy. (The collector may choose instead to record object sizes
|
||||
in a different way.)
|
||||
|
||||
All of these `gc_atomic_forward` functions are to be implemented by the
|
||||
embedder. Some programs may allocate a dedicated forwarding word in all
|
||||
objects; some will manage to store the forwarding word in an initial
|
||||
"tag" word, via a specific pattern for the low 3 bits of the tag that no
|
||||
non-forwarded object will have. The low-bits approach takes advantage
|
||||
of the collector's minimum object alignment, in which objects are
|
||||
aligned at least to an 8-byte boundary, so all objects have 0 for the
|
||||
low 3 bits of their address.
|
||||
|
||||
### Conservative references
|
||||
|
||||
Finally, when configured in a mode in which root edges or intra-object
|
||||
edges are *conservative*, the embedder can filter out which bit patterns
|
||||
might be an object reference by implementing
|
||||
`gc_is_valid_conservative_ref_displacement`. Here, the collector masks
|
||||
off the low bits of a conservative reference, and asks the embedder if a
|
||||
value with those low bits might point to an object. Usually the
|
||||
embedder should return 1 only if the displacement is 0, but if the
|
||||
program allows low-bit tagged pointers, then it should also return 1 for
|
||||
those pointer tags.
|
||||
|
||||
### External objects
|
||||
|
||||
Sometimes a system will allocate objects outside the GC, for example on
|
||||
the stack or in static data sections. To support this use case, Whippet
|
||||
allows the embedder to provide a `struct gc_extern_space`
|
||||
implementation. Whippet will call `gc_extern_space_start_gc` at the
|
||||
start of each collection, and `gc_extern_space_finish_gc` at the end.
|
||||
External objects will be visited by `gc_extern_space_mark`, which should
|
||||
return nonzero if the object hasn't been seen before and needs to be
|
||||
traced via `gc_trace_object` (coloring the object grey). Note,
|
||||
`gc_extern_space_mark` may be called concurrently from many threads; be
|
||||
prepared!
|
||||
|
||||
## Configuration, compilation, and linking
|
||||
|
||||
To the user, Whippet presents an abstract API that does not encode the
|
||||
specificities of any given collector. Whippet currently includes four
|
||||
implementations of that API: `semi`, a simple semi-space collector;
|
||||
`pcc`, a parallel copying collector (like semi but multithreaded);
|
||||
`bdw`, an implementation via the third-party
|
||||
[Boehm-Demers-Weiser](https://github.com/ivmai/bdwgc) conservative
|
||||
collector; and `mmc`, a mostly-marking collector inspired by Immix.
|
||||
|
||||
The program that embeds Whippet selects the collector implementation at
|
||||
build-time. For `pcc`, the program can also choose whether to be
|
||||
generational or not. For `mmc` collector, the program configures a
|
||||
specific collector mode, again at build-time: generational or not,
|
||||
parallel or not, stack-conservative or not, and heap-conservative or
|
||||
not. It may be nice in the future to be able to configure these at
|
||||
run-time, but for the time being they are compile-time options so that
|
||||
adding new features doesn't change the footprint of a more minimal
|
||||
collector.
|
||||
|
||||
Different collectors have different allocation strategies: for example,
|
||||
the BDW collector allocates from thread-local freelists, whereas the
|
||||
semi-space collector has a bump-pointer allocator. A collector may also
|
||||
expose a write barrier, for example to enable generational collection.
|
||||
For performance reasons, many of these details can't be hidden behind an
|
||||
opaque functional API: they must be inlined into call sites. Whippet's
|
||||
approach is to expose fast paths as part of its inline API, but which
|
||||
are *parameterized* on attributes of the selected garbage collector.
|
||||
The goal is to keep the user's code generic and avoid any code
|
||||
dependency on the choice of garbage collector. Because of inlining,
|
||||
however, the choice of garbage collector does need to be specified when
|
||||
compiling user code.
|
||||
|
||||
### Compiling the collector
|
||||
|
||||
As an embed-only library, Whippet needs to be integrated into the build
|
||||
system of its host (embedder). There are two build systems supported
|
||||
currently; we would be happy to add other systems over time.
|
||||
|
||||
#### GNU make
|
||||
|
||||
At a high level, first the embedder chooses a collector and defines how
|
||||
to specialize the collector against the embedder. Whippet's `embed.mk`
|
||||
Makefile snippet then defines how to build the set of object files that
|
||||
define the collector, and how to specialize the embedder against the
|
||||
chosen collector.
|
||||
|
||||
As an example, say you have a file `program.c`, and you want to compile
|
||||
it against a Whippet checkout in `whippet/`. Your headers are in
|
||||
`include/`, and you have written an implementation of the embedder
|
||||
interface in `host-gc.h`. In that case you would have a Makefile like
|
||||
this:
|
||||
|
||||
```
|
||||
HOST_DIR:=$(dir $(lastword $(MAKEFILE_LIST)))
|
||||
WHIPPET_DIR=$(HOST_DIR)whippet/
|
||||
|
||||
all: out
|
||||
|
||||
# The collector to choose: e.g. semi, bdw, pcc, generational-pcc, mmc,
|
||||
# parallel-mmc, etc.
|
||||
GC_COLLECTOR=pcc
|
||||
|
||||
include $(WHIPPET_DIR)embed.mk
|
||||
|
||||
# Host cflags go here...
|
||||
HOST_CFLAGS=
|
||||
|
||||
# Whippet's embed.mk uses this variable when it compiles code that
|
||||
# should be specialized against the embedder.
|
||||
EMBEDDER_TO_GC_CFLAGS=$(HOST_CFLAGS) -include $(HOST_DIR)host-gc.h
|
||||
|
||||
program.o: program.c
|
||||
$(GC_COMPILE) $(HOST_CFLAGS) $(GC_TO_EMBEDDER_CFLAGS) -c $<
|
||||
program: program.o $(GC_OBJS)
|
||||
$(GC_LINK) $^ $(GC_LIBS)
|
||||
```
|
||||
|
||||
The optimization settings passed to the C compiler are taken from
|
||||
`GC_BUILD_CFLAGS`. Embedders can override this variable directly, or
|
||||
via the shorthand `GC_BUILD` variable. A `GC_BUILD` of `opt` indicates
|
||||
maximum optimization and no debugging assertions; `optdebug` adds
|
||||
debugging assertions; and `debug` removes optimizations.
|
||||
|
||||
Though Whippet tries to put performance-sensitive interfaces in header
|
||||
files, users should also compile with link-time optimization (LTO) to
|
||||
remove any overhead imposed by the division of code into separate
|
||||
compilation units. `embed.mk` includes the necessary LTO flags in
|
||||
`GC_CFLAGS` and `GC_LDFLAGS`.
|
||||
|
||||
#### GNU Autotools
|
||||
|
||||
To use Whippet from an autotools project, the basic idea is to include a
|
||||
`Makefile.am` snippet from the subdirectory containing the Whippet
|
||||
checkout. That will build `libwhippet.la`, which you should link into
|
||||
your binary. There are some `m4` autoconf macros that need to be
|
||||
invoked, for example to select the collector.
|
||||
|
||||
Let us imagine you have checked out Whippet in `whippet/`. Let us also
|
||||
assume for the moment that we are going to build `mt-gcbench`, a program
|
||||
included in Whippet itself.
|
||||
|
||||
A top-level autoconf file (`configure.ac`) might look like this:
|
||||
|
||||
```autoconf
|
||||
AC_PREREQ([2.69])
|
||||
AC_INIT([whippet-autotools-example],[0.1.0])
|
||||
AC_CONFIG_SRCDIR([whippet/benchmarks/mt-gcbench.c])
|
||||
AC_CONFIG_AUX_DIR([build-aux])
|
||||
AC_CONFIG_MACRO_DIRS([m4 whippet])
|
||||
AM_INIT_AUTOMAKE([subdir-objects foreign])
|
||||
|
||||
WHIPPET_ENABLE_LTO
|
||||
|
||||
LT_INIT
|
||||
|
||||
WARN_CFLAGS=-Wall
|
||||
AC_ARG_ENABLE([Werror],
|
||||
AS_HELP_STRING([--disable-Werror],
|
||||
[Don't stop the build on errors]),
|
||||
[],
|
||||
WARN_CFLAGS="-Wall -Werror")
|
||||
CFLAGS="$CFLAGS $WARN_CFLAGS"
|
||||
|
||||
WHIPPET_PKG
|
||||
|
||||
AC_CONFIG_FILES(Makefile)
|
||||
AC_OUTPUT
|
||||
```
|
||||
|
||||
Then your `Makefile.am` might look like this:
|
||||
|
||||
```automake
|
||||
noinst_LTLIBRARIES =
|
||||
WHIPPET_EMBEDDER_CPPFLAGS = -include $(srcdir)/whippet/benchmarks/mt-gcbench-embedder.h
|
||||
include whippet/embed.am
|
||||
|
||||
noinst_PROGRAMS = whippet/benchmarks/mt-gcbench
|
||||
whippet_benchmarks_mt_gcbench_SOURCES = \
|
||||
whippet/benchmarks/heap-objects.h \
|
||||
whippet/benchmarks/mt-gcbench-embedder.h \
|
||||
whippet/benchmarks/mt-gcbench-types.h \
|
||||
whippet/benchmarks/mt-gcbench.c \
|
||||
whippet/benchmarks/simple-allocator.h \
|
||||
whippet/benchmarks/simple-gc-embedder.h \
|
||||
whippet/benchmarks/simple-roots-api.h \
|
||||
whippet/benchmarks/simple-roots-types.h \
|
||||
whippet/benchmarks/simple-tagging-scheme.h
|
||||
|
||||
AM_CFLAGS = $(WHIPPET_CPPFLAGS) $(WHIPPET_CFLAGS) $(WHIPPET_TO_EMBEDDER_CPPFLAGS)
|
||||
LDADD = libwhippet.la
|
||||
```
|
||||
|
||||
We have to list all the little header files it uses because, well,
|
||||
autotools.
|
||||
|
||||
To actually build, you do the usual autotools dance:
|
||||
|
||||
```bash
|
||||
autoreconf -vif && ./configure && make
|
||||
```
|
||||
|
||||
See `./configure --help` for a list of user-facing options. Before the
|
||||
`WHIPPET_PKG`, you can run e.g. `WHIPPET_PKG_COLLECTOR(mmc)` to set the
|
||||
default collector to `mmc`; if you don't do that, the default collector
|
||||
is `pcc`. There are also `WHIPPET_PKG_DEBUG`, `WHIPPET_PKG_TRACING`,
|
||||
and `WHIPPET_PKG_PLATFORM`; see [`whippet.m4`](../whippet.m4) for more
|
||||
details. See also
|
||||
[`whippet-autotools`](https://github.com/wingo/whippet-autotools) for an
|
||||
example of how this works.
|
||||
|
||||
#### Compile-time options
|
||||
|
||||
There are a number of pre-processor definitions that can parameterize
|
||||
the collector at build-time:
|
||||
|
||||
* `GC_DEBUG`: If nonzero, then enable debugging assertions.
|
||||
* `NDEBUG`: This one is a bit weird; if not defined, then enable
|
||||
debugging assertions and some debugging printouts. Probably
|
||||
Whippet's use of `NDEBUG` should be folded in to `GC_DEBUG`.
|
||||
* `GC_PARALLEL`: If nonzero, then enable parallelism in the collector.
|
||||
Defaults to 0.
|
||||
* `GC_GENERATIONAL`: If nonzero, then enable generational collection.
|
||||
Defaults to zero.
|
||||
* `GC_PRECISE_ROOTS`: If nonzero, then collect precise roots via
|
||||
`gc_heap_roots` and `gc_mutator_roots`. Defaults to zero.
|
||||
* `GC_CONSERVATIVE_ROOTS`: If nonzero, then scan the stack and static
|
||||
data sections for conservative roots. Defaults to zero. Not
|
||||
mutually exclusive with `GC_PRECISE_ROOTS`.
|
||||
* `GC_CONSERVATIVE_TRACE`: If nonzero, heap edges are scanned
|
||||
conservatively. Defaults to zero.
|
||||
|
||||
Some collectors require specific compile-time options. For example, the
|
||||
semi-space collector has to be able to move all objects; this is not
|
||||
compatible with conservative roots or heap edges.
|
||||
|
||||
#### Tracing support
|
||||
|
||||
Whippet includes support for low-overhead run-time tracing via
|
||||
[LTTng](https://lttng.org/). If the support library `lttng-ust` is
|
||||
present when Whippet is compiled (as checked via `pkg-config`),
|
||||
tracepoint support will be present. See
|
||||
[tracepoints.md](./tracepoints.md) for more information on how to get
|
||||
performance traces out of Whippet.
|
||||
|
||||
## Using the collector
|
||||
|
||||
Whew! So you finally built the thing! Did you also link it into your
|
||||
program? No, because your program isn't written yet? Well this section
|
||||
is for you: we describe the user-facing API of Whippet, where "user" in
|
||||
this case denotes the embedding program.
|
||||
|
||||
What is the API, you ask? It is in [`gc-api.h`](../api/gc-api.h).
|
||||
|
||||
### Heaps and mutators
|
||||
|
||||
To start with, you create a *heap*. Usually an application will create
|
||||
just one heap. A heap has one or more associated *mutators*. A mutator
|
||||
is a thread-specific handle on the heap. Allocating objects requires a
|
||||
mutator.
|
||||
|
||||
The initial heap and mutator are created via `gc_init`, which takes
|
||||
three logical input parameters: the *options*, a stack base address, and
|
||||
an *event listener*. The options specify the initial heap size and so
|
||||
on. The event listener is mostly for gathering statistics; see below
|
||||
for more. `gc_init` returns the new heap as an out parameter, and also
|
||||
returns a mutator for the current thread.
|
||||
|
||||
To make a new mutator for a new thread, use `gc_init_for_thread`. When
|
||||
a thread is finished with its mutator, call `gc_finish_for_thread`.
|
||||
Each thread that allocates or accesses GC-managed objects should have
|
||||
its own mutator.
|
||||
|
||||
The stack base address allows the collector to scan the mutator's stack,
|
||||
if conservative root-finding is enabled. It may be omitted in the call
|
||||
to `gc_init` and `gc_init_for_thread`; passing `NULL` tells Whippet to
|
||||
ask the platform for the stack bounds of the current thread. Generally
|
||||
speaking, this works on all platforms for the main thread, but not
|
||||
necessarily on other threads. The most reliable solution is to
|
||||
explicitly obtain a base address by trampolining through
|
||||
`gc_call_with_stack_addr`.
|
||||
|
||||
### Options
|
||||
|
||||
There are some run-time parameters that programs and users might want to
|
||||
set explicitly; these are encapsulated in the *options*. Make an
|
||||
options object with `gc_allocate_options()`; this object will be
|
||||
consumed by its `gc_init`. Then, the most convenient thing is to set
|
||||
those options from `gc_options_parse_and_set_many` from a string passed
|
||||
on the command line or an environment variable, but to get there we have
|
||||
to explain the low-level first. There are a few options that are
|
||||
defined for all collectors:
|
||||
|
||||
* `GC_OPTION_HEAP_SIZE_POLICY`: How should we size the heap? Either
|
||||
it's `GC_HEAP_SIZE_FIXED` (which is 0), in which the heap size is
|
||||
fixed at startup; or `GC_HEAP_SIZE_GROWABLE` (1), in which the heap
|
||||
may grow but will never shrink; or `GC_HEAP_SIZE_ADAPTIVE` (2), in
|
||||
which we take an
|
||||
[adaptive](https://wingolog.org/archives/2023/01/27/three-approaches-to-heap-sizing)
|
||||
approach, depending on the rate of allocation and the cost of
|
||||
collection. Really you want the adaptive strategy, but if you are
|
||||
benchmarking you definitely want the fixed policy.
|
||||
* `GC_OPTION_HEAP_SIZE`: The initial heap size. For a
|
||||
`GC_HEAP_SIZE_FIXED` policy, this is also the final heap size. In
|
||||
bytes.
|
||||
* `GC_OPTION_MAXIMUM_HEAP_SIZE`: For growable and adaptive heaps, the
|
||||
maximum heap size, in bytes.
|
||||
* `GC_OPTION_HEAP_SIZE_MULTIPLIER`: For growable heaps, the target heap
|
||||
multiplier. A heap multiplier of 2.5 means that for 100 MB of live
|
||||
data, the heap should be 250 MB.
|
||||
* `GC_OPTION_HEAP_EXPANSIVENESS`: For adaptive heap sizing, an
|
||||
indication of how much free space will be given to heaps, as a
|
||||
proportion of the square root of the live data size.
|
||||
* `GC_OPTION_PARALLELISM`: How many threads to devote to collection
|
||||
tasks during GC pauses. By default, the current number of
|
||||
processors, with a maximum of 8.
|
||||
|
||||
You can set these options via `gc_option_set_int` and so on; see
|
||||
[`gc-options.h`](../api/gc-options.h). Or, you can parse options from
|
||||
strings: `heap-size-policy`, `heap-size`, `maximum-heap-size`, and so
|
||||
on. Use `gc_option_from_string` to determine if a string is really an
|
||||
option. Use `gc_option_parse_and_set` to parse a value for an option.
|
||||
Use `gc_options_parse_and_set_many` to parse a number of comma-delimited
|
||||
*key=value* settings from a string.
|
||||
|
||||
### Allocation
|
||||
|
||||
So you have a heap and a mutator; great! Let's allocate! Call
|
||||
`gc_allocate`, passing the mutator and the number of bytes to allocate.
|
||||
|
||||
There is also `gc_allocate_fast`, which is an inlined fast-path. If
|
||||
that returns NULL, you need to call `gc_allocate_slow`. The advantage
|
||||
of this API is that you can punt some root-saving overhead to the slow
|
||||
path.
|
||||
|
||||
Allocation always succeeds. If it doesn't, it kills your program. The
|
||||
bytes in the resulting allocation will be initialized to 0.
|
||||
|
||||
The allocation fast path is parameterized by collector-specific
|
||||
attributes. JIT compilers can also read those attributes to emit
|
||||
appropriate inline code that replicates the logic of `gc_allocate_fast`.
|
||||
|
||||
### Write barriers
|
||||
|
||||
For some collectors, mutators have to tell the collector whenever they
|
||||
mutate an object. They tell the collector by calling a *write barrier*;
|
||||
in Whippet this is currently the case only for generational collectors.
|
||||
|
||||
The write barrier is `gc_write_barrier`; see `gc-api.h` for its
|
||||
parameters.
|
||||
|
||||
As with allocation, the fast path for the write barrier is parameterized
|
||||
by collector-specific attributes, to allow JIT compilers to inline write
|
||||
barriers.
|
||||
|
||||
### Safepoints
|
||||
|
||||
Sometimes Whippet will need to synchronize all threads, for example as
|
||||
part of the "stop" phase of a stop-and-copy semi-space collector.
|
||||
Whippet stops at *safepoints*. At a safepoint, all mutators must be
|
||||
able to enumerate all of their edges to live objects.
|
||||
|
||||
Whippet has cooperative safepoints: mutators have to periodically call
|
||||
into the collector to potentially synchronize with other mutators.
|
||||
`gc_allocate_slow` is a safepoint, so if you a bunch of threads that are
|
||||
all allocating, usually safepoints are reached in a more-or-less prompt
|
||||
fashion. But if a mutator isn't allocating, it either needs to
|
||||
temporarily mark itself as inactive by trampolining through
|
||||
`gc_call_without_gc`, or it should arrange to periodically call
|
||||
`gc_safepoint`. Marking a mutator as inactive is the right strategy
|
||||
for, for example, system calls that might block. Periodic safepoints is
|
||||
better for code that is active but not allocating.
|
||||
|
||||
Also, the BDW collector actually uses pre-emptive safepoints: it stops
|
||||
threads via POSIX signals. `gc_safepoint` is a no-op with BDW.
|
||||
|
||||
Embedders can inline safepoint checks. If
|
||||
`gc_cooperative_safepoint_kind()` is `GC_COOPERATIVE_SAFEPOINT_NONE`,
|
||||
then the collector doesn't need safepoints, as is the case for `bdw`
|
||||
which uses signals and `semi` which is single-threaded. If it is
|
||||
`GC_COOPERATIVE_SAFEPOINT_HEAP_FLAG`, then calling
|
||||
`gc_safepoint_flag_loc` on a mutator will return the address of an `int`
|
||||
in memory, which if nonzero when loaded using relaxed atomics indicates
|
||||
that the mutator should call `gc_safepoint_slow`. Similarly for
|
||||
`GC_COOPERATIVE_SAFEPOINT_MUTATOR_FLAG`, except that the address is
|
||||
per-mutator rather than global.
|
||||
|
||||
### Pinning
|
||||
|
||||
Sometimes a mutator or embedder would like to tell the collector to not
|
||||
move a particular object. This can happen for example during a foreign
|
||||
function call, or if the embedder allows programs to access the address
|
||||
of an object, for example to compute an identity hash code. To support
|
||||
this use case, some Whippet collectors allow the embedder to *pin*
|
||||
objects. Call `gc_pin_object` to prevent the collector from relocating
|
||||
an object.
|
||||
|
||||
Pinning is currently supported by the `bdw` collector, which never moves
|
||||
objects, and also by the various `mmc` collectors, which can move
|
||||
objects that have no inbound conservative references.
|
||||
|
||||
Pinning is not supported on `semi` or `pcc`.
|
||||
|
||||
Call `gc_can_pin_objects` to determine whether the current collector can
|
||||
pin objects.
|
||||
|
||||
### Statistics
|
||||
|
||||
Sometimes a program would like some information from the GC: how many
|
||||
bytes and objects have been allocated? How much time has been spent in
|
||||
the GC? How many times has GC run, and how many of those were minor
|
||||
collections? What's the maximum pause time? Stuff like that.
|
||||
|
||||
Instead of collecting a fixed set of information, Whippet emits
|
||||
callbacks when the collector reaches specific states. The embedder
|
||||
provides a *listener* for these events when initializing the collector.
|
||||
|
||||
The listener interface is defined in
|
||||
[`gc-event-listener.h`](../api/gc-event-listener.h). Whippet ships with
|
||||
two listener implementations,
|
||||
[`GC_NULL_EVENT_LISTENER`](../api/gc-null-event-listener.h), and
|
||||
[`GC_BASIC_STATS`](../api/gc-basic-stats.h). Most embedders will want
|
||||
their own listener, but starting with the basic stats listener is not a
|
||||
bad option:
|
||||
|
||||
```
|
||||
#include "gc-api.h"
|
||||
#include "gc-basic-stats.h"
|
||||
#include <stdio.h>
|
||||
|
||||
int main() {
|
||||
struct gc_options *options = NULL;
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator *mut;
|
||||
struct gc_basic_stats stats;
|
||||
gc_init(options, NULL, &heap, &mut, GC_BASIC_STATS, &stats);
|
||||
// ...
|
||||
gc_basic_stats_finish(&stats);
|
||||
gc_basic_stats_print(&stats, stdout);
|
||||
}
|
||||
```
|
||||
|
||||
As you can see, `GC_BASIC_STATS` expands to a `struct gc_event_listener`
|
||||
definition. We pass an associated pointer to a `struct gc_basic_stats`
|
||||
instance which will be passed to the listener at every event.
|
||||
|
||||
The output of this program might be something like:
|
||||
|
||||
```
|
||||
Completed 19 major collections (0 minor).
|
||||
654.597 ms total time (385.235 stopped).
|
||||
Heap size is 167.772 MB (max 167.772 MB); peak live data 55.925 MB.
|
||||
```
|
||||
|
||||
There are currently three different sorts of events: heap events to
|
||||
track heap growth, collector events to time different parts of
|
||||
collection, and mutator events to indicate when specific mutators are
|
||||
stopped.
|
||||
|
||||
There are three heap events:
|
||||
|
||||
* `init(void* data, size_t heap_size)`: Called during `gc_init`, to
|
||||
allow the listener to initialize its associated state.
|
||||
* `heap_resized(void* data, size_t new_size)`: Called if the heap grows
|
||||
or shrinks.
|
||||
* `live_data_size(void* data, size_t size)`: Called periodically when
|
||||
the collector learns about live data size.
|
||||
|
||||
The collection events form a kind of state machine, and are called in
|
||||
this order:
|
||||
|
||||
* `requesting_stop(void* data)`: Called when the collector asks
|
||||
mutators to stop.
|
||||
* `waiting_for_stop(void* data)`: Called when the collector has done
|
||||
all the pre-stop work that it is able to and is just waiting on
|
||||
mutators to stop.
|
||||
* `mutators_stopped(void* data)`: Called when all mutators have
|
||||
stopped; the trace phase follows.
|
||||
* `prepare_gc(void* data, enum gc_collection_kind gc_kind)`: Called
|
||||
to indicate which kind of collection is happening.
|
||||
* `roots_traced(void* data)`: Called when roots have been visited.
|
||||
* `heap_traced(void* data)`: Called when the whole heap has been
|
||||
traced.
|
||||
* `ephemerons_traced(void* data)`: Called when the [ephemeron
|
||||
fixpoint](https://wingolog.org/archives/2023/01/24/parallel-ephemeron-tracing)
|
||||
has been reached.
|
||||
* `restarting_mutators(void* data)`: Called right before the collector
|
||||
restarts mutators.
|
||||
|
||||
The collectors in Whippet will call all of these event handlers, but it
|
||||
may be that they are called conservatively: for example, the
|
||||
single-mutator, single-collector semi-space collector will never have to
|
||||
wait for mutators to stop. It will still call the functions, though!
|
||||
|
||||
Finally, there are the mutator events:
|
||||
* `mutator_added(void* data) -> void*`: The only event handler that
|
||||
returns a value, called when a new mutator is added. The parameter
|
||||
is the overall event listener data, and the result is
|
||||
mutator-specific data. The rest of the mutator events pass this
|
||||
mutator-specific data instead.
|
||||
* `mutator_cause_gc(void* mutator_data)`: Called when a mutator causes
|
||||
GC, either via allocation or an explicit `gc_collect` call.
|
||||
* `mutator_stopping(void* mutator_data)`: Called when a mutator has
|
||||
received the signal to stop. It may perform some marking work before
|
||||
it stops.
|
||||
* `mutator_stopped(void* mutator_data)`: Called when a mutator parks
|
||||
itself.
|
||||
* `mutator_restarted(void* mutator_data)`: Called when a mutator
|
||||
restarts.
|
||||
* `mutator_removed(void* mutator_data)`: Called when a mutator goes
|
||||
away.
|
||||
|
||||
Note that these events handlers shouldn't really do much. In
|
||||
particular, they shouldn't call into the Whippet API, and they shouldn't
|
||||
even access GC-managed objects. Event listeners are really about
|
||||
statistics and profiling and aren't a place to mutate the object graph.
|
||||
|
||||
### Ephemerons
|
||||
|
||||
Whippet supports ephemerons, first-class objects that weakly associate
|
||||
keys with values. If the an ephemeron's key ever becomes unreachable,
|
||||
the ephemeron becomes dead and loses its value.
|
||||
|
||||
The user-facing API is in [`gc-ephemeron.h`](../api/gc-ephemeron.h). To
|
||||
allocate an ephemeron, call `gc_allocate_ephemeron`, then initialize its
|
||||
key and value via `gc_ephemeron_init`. Get the key and value via
|
||||
`gc_ephemeron_key` and `gc_ephemeron_value`, respectively.
|
||||
|
||||
In Whippet, ephemerons can be linked together in a chain. During GC, if
|
||||
an ephemeron's chain points to a dead ephemeron, that link will be
|
||||
elided, allowing the dead ephemeron itself to be collected. In that
|
||||
way, ephemerons can be used to build weak data structures such as weak
|
||||
maps.
|
||||
|
||||
Weak data structures are often shared across multiple threads, so all
|
||||
routines to access and modify chain links are atomic. Use
|
||||
`gc_ephemeron_chain_head` to access the head of a storage location that
|
||||
points to an ephemeron; push a new ephemeron on a location with
|
||||
`gc_ephemeron_chain_push`; and traverse a chain with
|
||||
`gc_ephemeron_chain_next`.
|
||||
|
||||
An ephemeron association can be removed via `gc_ephemeron_mark_dead`.
|
||||
|
||||
### Finalizers
|
||||
|
||||
A finalizer allows the embedder to be notified when an object becomes
|
||||
unreachable.
|
||||
|
||||
A finalizer has a priority. When the heap is created, the embedder
|
||||
should declare how many priorities there are. Lower-numbered priorities
|
||||
take precedence; if an object has a priority-0 finalizer outstanding,
|
||||
that will prevent any finalizer at level 1 (or 2, ...) from firing
|
||||
until no priority-0 finalizer remains.
|
||||
|
||||
Call `gc_attach_finalizer`, from `gc-finalizer.h`, to attach a finalizer
|
||||
to an object.
|
||||
|
||||
A finalizer also references an associated GC-managed closure object.
|
||||
A finalizer's reference to the closure object is strong: if a
|
||||
finalizer's closure closure references its finalizable object,
|
||||
directly or indirectly, the finalizer will never fire.
|
||||
|
||||
When an object with a finalizer becomes unreachable, it is added to a
|
||||
queue. The embedder can call `gc_pop_finalizable` to get the next
|
||||
finalizable object and its associated closure. At that point the
|
||||
embedder can do anything with the object, including keeping it alive.
|
||||
Ephemeron associations will still be present while the finalizable
|
||||
object is live. Note however that any objects referenced by the
|
||||
finalizable object may themselves be already finalized; finalizers are
|
||||
enqueued for objects when they become unreachable, which can concern
|
||||
whole subgraphs of objects at once.
|
||||
|
||||
The usual way for an embedder to know when the queue of finalizable
|
||||
object is non-empty is to call `gc_set_finalizer_callback` to
|
||||
provide a function that will be invoked when there are pending
|
||||
finalizers.
|
||||
|
||||
Arranging to call `gc_pop_finalizable` and doing something with the
|
||||
finalizable object and closure is the responsibility of the embedder.
|
||||
The embedder's finalization action can end up invoking arbitrary code,
|
||||
so unless the embedder imposes some kind of restriction on what
|
||||
finalizers can do, generally speaking finalizers should be run in a
|
||||
dedicated thread instead of recursively from within whatever mutator
|
||||
thread caused GC. Setting up such a thread is the responsibility of the
|
||||
mutator. `gc_pop_finalizable` is thread-safe, allowing multiple
|
||||
finalization threads if that is appropriate.
|
||||
|
||||
`gc_allocate_finalizer` returns a finalizer, which is a fresh GC-managed
|
||||
heap object. The mutator should then directly attach it to an object
|
||||
using `gc_finalizer_attach`. When the finalizer is fired, it becomes
|
||||
available to the mutator via `gc_pop_finalizable`.
|
BIN
libguile/whippet/doc/perfetto-minor-gc.png
Normal file
BIN
libguile/whippet/doc/perfetto-minor-gc.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 169 KiB |
127
libguile/whippet/doc/tracepoints.md
Normal file
127
libguile/whippet/doc/tracepoints.md
Normal file
|
@ -0,0 +1,127 @@
|
|||
# Whippet performance tracing
|
||||
|
||||
Whippet includes support for run-time tracing via
|
||||
[LTTng](https://LTTng.org) user-space tracepoints. This allows you to
|
||||
get a detailed look at how Whippet is performing on your system.
|
||||
Tracing support is currently limited to Linux systems.
|
||||
|
||||
## Getting started
|
||||
|
||||
First, you need to build Whippet with LTTng support. Usually this is as
|
||||
easy as building it in an environment where the `lttng-ust` library is
|
||||
present, as determined by `pkg-config --libs lttng-ust`. You can know
|
||||
if your Whippet has tracing support by seeing if the resulting binaries
|
||||
are dynamically linked to `liblttng-ust`.
|
||||
|
||||
If we take as an example the `mt-gcbench` test in the Whippet source
|
||||
tree, we would have:
|
||||
|
||||
```
|
||||
$ ldd bin/mt-gcbench.pcc | grep lttng
|
||||
...
|
||||
liblttng-ust.so.1 => ...
|
||||
...
|
||||
```
|
||||
|
||||
### Capturing traces
|
||||
|
||||
Actually capturing traces is a little annoying; it's not as easy as
|
||||
`perf run`. The [LTTng
|
||||
documentation](https://lttng.org/docs/v2.13/#doc-controlling-tracing) is
|
||||
quite thorough, but here is a summary.
|
||||
|
||||
First, create your tracing session:
|
||||
|
||||
```
|
||||
$ lttng create
|
||||
Session auto-20250214-091153 created.
|
||||
Traces will be output to ~/lttng-traces/auto-20250214-091153
|
||||
```
|
||||
|
||||
You run all these commands as your own user; they don't require root
|
||||
permissions or system-wide modifications, as all of the Whippet
|
||||
tracepoints are user-space tracepoints (UST).
|
||||
|
||||
Just having an LTTng session created won't do anything though; you need
|
||||
to configure the session. Monotonic nanosecond-resolution timestamps
|
||||
are already implicitly part of each event. We also want to have process
|
||||
and thread IDs for all events:
|
||||
|
||||
```
|
||||
$ lttng add-context --userspace --type=vpid --type=vtid
|
||||
ust context vpid added to all channels
|
||||
ust context vtid added to all channels
|
||||
```
|
||||
|
||||
Now enable Whippet events:
|
||||
|
||||
```
|
||||
$ lttng enable-event --userspace 'whippet:*'
|
||||
ust event whippet:* created in channel channel0
|
||||
```
|
||||
|
||||
And now, start recording:
|
||||
|
||||
```
|
||||
$ lttng start
|
||||
Tracing started for session auto-20250214-091153
|
||||
```
|
||||
|
||||
With this, traces will be captured for our program of interest:
|
||||
|
||||
```
|
||||
$ bin/mt-gcbench.pcc 2.5 8
|
||||
...
|
||||
```
|
||||
|
||||
Now stop the trace:
|
||||
|
||||
```
|
||||
$ lttng stop
|
||||
Waiting for data availability
|
||||
Tracing stopped for session auto-20250214-091153
|
||||
```
|
||||
|
||||
Whew. If we did it right, our data is now in
|
||||
`~/lttng-traces/auto-20250214-091153`.
|
||||
|
||||
### Visualizing traces
|
||||
|
||||
LTTng produces traces in the [Common Trace Format
|
||||
(CTF)](https://diamon.org/ctf/). My favorite trace viewing tool is the
|
||||
family of web-based trace viewers derived from `chrome://tracing`. The
|
||||
best of these appear to be [the Firefox
|
||||
profiler](https://profiler.firefox.com) and
|
||||
[Perfetto](https://ui.perfetto.dev). Unfortunately neither of these can
|
||||
work with CTF directly, so we instead need to run a trace converter.
|
||||
|
||||
Oddly, there is no trace converter that can read CTF and write something
|
||||
that Perfetto (e.g.) can read. However there is a [JSON-based tracing
|
||||
format that these tools can
|
||||
read](https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview?tab=t.0#heading=h.yr4qxyxotyw),
|
||||
and [Python bindings for Babeltrace, a library that works with
|
||||
CTF](https://babeltrace.org/), so that's what we do:
|
||||
|
||||
```
|
||||
$ python3 ctf_to_json.py ~/lttng-traces/auto-20250214-091153 > trace.json
|
||||
```
|
||||
|
||||
While Firefox Profiler can load this file, it works better on Perfetto,
|
||||
as the Whippet events are visually rendered on their respective threads.
|
||||
|
||||

|
||||
|
||||
### Expanding the set of events
|
||||
|
||||
As of February 2025,
|
||||
the current set of tracepoints includes the [heap
|
||||
events](https://github.com/wingo/whippet/blob/main/doc/manual.md#statistics)
|
||||
and some detailed internals of the parallel tracer. We expect this set
|
||||
of tracepoints to expand over time.
|
||||
|
||||
### Overhead of tracepoints
|
||||
|
||||
When tracepoints are compiled in but no events are enabled, tracepoints
|
||||
appear to have no impact on run-time. When event collection is on, for
|
||||
x86-64 hardware, [emitting a tracepoint event takes about
|
||||
100ns](https://discuss.systems/@DesnoyersMa/113986344940256872).
|
207
libguile/whippet/embed.am
Normal file
207
libguile/whippet/embed.am
Normal file
|
@ -0,0 +1,207 @@
|
|||
# Automake snippet for embedding Whippet in an autotools project.
|
||||
#
|
||||
# The including Makefile.am needs to do this, assuming Whippet is in the
|
||||
# whippet/ subdirectory:
|
||||
#
|
||||
# noinst_LTLIBRARIES =
|
||||
# WHIPPET_EMBEDDER_CPPFLAGS = -include src/my-embedder.h
|
||||
# include whippet/embed.am
|
||||
#
|
||||
# my-embedder.h should provide the various hooks that Whippet needs to
|
||||
# specialize itself to the embedder's object representation.
|
||||
#
|
||||
# The result is a libwhippet.la. To compile and link against it:
|
||||
#
|
||||
# AM_CFLAGS = $(WHIPPET_CPPFLAGS) $(WHIPPET_CFLAGS) $(WHIPPET_TO_EMBEDDER_CPPFLAGS)
|
||||
# LDADD = libwhippet.la
|
||||
# AM_LDFLAGS = $(WHIPPET_TO_EMBEDDER_LDFLAGS)
|
||||
#
|
||||
# The assumption is that the embedder will build a single copy of
|
||||
# Whippet, specialized against a single collector, a single set of
|
||||
# embedder hooks, and a single target platform. The collector and
|
||||
# platform should be chosen at configure-time. Because Automake really
|
||||
# wants the set of source files to be visible to it at automake-time, we
|
||||
# need to implement these conditions via AM_CONDITIONAL in a
|
||||
# configure.ac. For example for a parallel-mmc configuration on
|
||||
# gnu-linux, we would need:
|
||||
#
|
||||
# AM_SUBST(WHIPPET_COLLECTOR, parallel-mmc)
|
||||
# AM_CONDITIONAL(WHIPPET_COLLECTOR_SEMI, 0)
|
||||
# AM_CONDITIONAL(WHIPPET_COLLECTOR_PCC, 0)
|
||||
# AM_CONDITIONAL(WHIPPET_COLLECTOR_BDW, 0)
|
||||
# AM_CONDITIONAL(WHIPPET_COLLECTOR_MMC, 1)
|
||||
# AM_CONDITIONAL(WHIPPET_PLATFORM_GNU_LINUX, 1)
|
||||
#
|
||||
# Then there are other conditionals for compilation options:
|
||||
#
|
||||
# AM_CONDITIONAL(WHIPPET_ENABLE_DEBUG, 0)
|
||||
# AM_CONDITIONAL(WHIPPET_USE_LTTNG, 1)
|
||||
#
|
||||
# Finally, LTO should be enabled, for best performance. This should be
|
||||
# added to CFLAGS at configure-time.
|
||||
#
|
||||
# Getting all of this in there is gnarly. See the example configure.ac
|
||||
# for one take on the topic.
|
||||
|
||||
noinst_LTLIBRARIES += libwhippet-common.la libwhippet.la
|
||||
|
||||
libwhippet_common_la_SOURCES = \
|
||||
%D%/src/gc-options-internal.h \
|
||||
%D%/src/gc-options.c \
|
||||
%D%/src/gc-stack.c \
|
||||
%D%/src/gc-stack.h \
|
||||
%D%/src/gc-tracepoint.c
|
||||
|
||||
if WHIPPET_PLATFORM_GNU_LINUX
|
||||
libwhippet_common_la_SOURCES += %D%/src/gc-platform-gnu-linux.c
|
||||
endif
|
||||
|
||||
libwhippet_la_SOURCES = \
|
||||
%D%/src/adaptive-heap-sizer.h \
|
||||
%D%/src/address-hash.h \
|
||||
%D%/src/address-map.h \
|
||||
%D%/src/address-set.h \
|
||||
%D%/src/assert.h \
|
||||
%D%/src/background-thread.h \
|
||||
%D%/src/copy-space.h \
|
||||
%D%/src/debug.h \
|
||||
%D%/src/extents.h \
|
||||
%D%/src/field-set.h \
|
||||
%D%/src/freelist.h \
|
||||
%D%/src/gc-align.h \
|
||||
%D%/src/gc-ephemeron-internal.h \
|
||||
%D%/src/gc-ephemeron.c \
|
||||
%D%/src/gc-finalizer-internal.h \
|
||||
%D%/src/gc-finalizer.c \
|
||||
%D%/src/gc-internal.h \
|
||||
%D%/src/gc-lock.h \
|
||||
%D%/src/gc-platform.h \
|
||||
%D%/src/gc-trace.h \
|
||||
%D%/src/growable-heap-sizer.h \
|
||||
%D%/src/heap-sizer.h \
|
||||
%D%/src/large-object-space.h \
|
||||
%D%/src/local-worklist.h \
|
||||
%D%/src/nofl-space.h \
|
||||
%D%/src/parallel-tracer.h \
|
||||
%D%/src/root.h \
|
||||
%D%/src/root-worklist.h \
|
||||
%D%/src/serial-tracer.h \
|
||||
%D%/src/shared-worklist.h \
|
||||
%D%/src/simple-worklist.h \
|
||||
%D%/src/spin.h \
|
||||
%D%/src/splay-tree.h \
|
||||
%D%/src/swar.h \
|
||||
%D%/src/tracer.h
|
||||
|
||||
WHIPPET_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
|
||||
WHIPPET_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
|
||||
WHIPPET_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
|
||||
WHIPPET_CFLAGS_generational_pcc = $(WHIPPET_CFLAGS_pcc) -DGC_GENERATIONAL=1
|
||||
WHIPPET_CFLAGS_mmc = \
|
||||
-DGC_PRECISE_ROOTS=1
|
||||
WHIPPET_CFLAGS_generational_mmc = \
|
||||
-DGC_PRECISE_ROOTS=1 -DGC_GENERATIONAL=1
|
||||
WHIPPET_CFLAGS_parallel_mmc = \
|
||||
-DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
|
||||
WHIPPET_CFLAGS_parallel_generational_mmc = \
|
||||
-DGC_PRECISE_ROOTS=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1
|
||||
WHIPPET_CFLAGS_stack_conservative_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1
|
||||
WHIPPET_CFLAGS_stack_conservative_generational_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_GENERATIONAL=1
|
||||
WHIPPET_CFLAGS_stack_conservative_parallel_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_PARALLEL=1
|
||||
WHIPPET_CFLAGS_stack_conservative_parallel_generational_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1
|
||||
WHIPPET_CFLAGS_heap_conservative_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
|
||||
WHIPPET_CFLAGS_heap_conservative_generational_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_GENERATIONAL=1
|
||||
WHIPPET_CFLAGS_heap_conservative_parallel_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_PARALLEL=1
|
||||
WHIPPET_CFLAGS_heap_conservative_parallel_generational_mmc = \
|
||||
-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1 -DGC_GENERATIONAL=1 -DGC_PARALLEL=1
|
||||
|
||||
WHIPPET_CFLAGS = $(WHIPPET_CFLAGS_$(subst -,_,$(WHIPPET_COLLECTOR)))
|
||||
WHIPPET_IMPL_CFLAGS =
|
||||
WHIPPET_LIBS = -lm
|
||||
WHIPPET_CPPFLAGS = -I$(srcdir)/%D%/api
|
||||
WHIPPET_TO_EMBEDDER_CPPFLAGS = $(WHIPPET_CPPFLAGS)
|
||||
|
||||
if WHIPPET_ENABLE_DEBUG
|
||||
WHIPPET_CFLAGS += -DGC_DEBUG=1
|
||||
endif
|
||||
|
||||
if WHIPPET_COLLECTOR_SEMI
|
||||
libwhippet_la_SOURCES += %D%/src/semi.c
|
||||
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/semi-attrs.h
|
||||
endif
|
||||
|
||||
if WHIPPET_COLLECTOR_PCC
|
||||
libwhippet_la_SOURCES += %D%/src/pcc.c
|
||||
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/pcc-attrs.h
|
||||
endif
|
||||
|
||||
if WHIPPET_COLLECTOR_BDW
|
||||
libwhippet_la_SOURCES += %D%/src/bdw.c
|
||||
WHIPPET_IMPL_CFLAGS += $(WHIPPET_BDW_CFLAGS)
|
||||
WHIPPET_LIBS += $(WHIPPET_BDW_LIBS)
|
||||
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/bdw-attrs.h
|
||||
endif
|
||||
|
||||
if WHIPPET_COLLECTOR_MMC
|
||||
libwhippet_la_SOURCES += %D%/src/mmc.c
|
||||
WHIPPET_TO_EMBEDDER_CPPFLAGS += -include $(srcdir)/%D%/api/mmc-attrs.h
|
||||
endif
|
||||
|
||||
# add to cflags: -flto -fvisibility=hidden -fno-strict-aliasing
|
||||
|
||||
libwhippet_common_la_CPPFLAGS = $(WHIPPET_CPPFLAGS)
|
||||
libwhippet_common_la_CFLAGS = -Wall -Wno-unused $(CFLAGS)
|
||||
libwhippet_common_la_CFLAGS += $(WHIPPET_CFLAGS)
|
||||
libwhippet_common_la_LDFLAGS = -lpthread $(LDFLAGS)
|
||||
libwhippet_common_la_LIBADD = $(LIBS)
|
||||
|
||||
if WHIPPET_USE_LTTNG
|
||||
libwhippet_common_la_CPPFLAGS += $(WHIPPET_LTTNG_CFLAGS) -DGC_TRACEPOINT_LTTNG=1
|
||||
WHIPPET_LIBS += $(WHIPPET_LTTNG_LIBS)
|
||||
endif
|
||||
|
||||
if !WHIPPET_ENABLE_DEBUG
|
||||
libwhippet_common_la_CFLAGS += -DNDEBUG
|
||||
endif
|
||||
|
||||
libwhippet_la_CPPFLAGS = $(libwhippet_common_la_CPPFLAGS) $(WHIPPET_EMBEDDER_CPPFLAGS)
|
||||
libwhippet_la_CFLAGS = $(libwhippet_common_la_CFLAGS)
|
||||
libwhippet_la_CFLAGS += $(WHIPPET_IMPL_CFLAGS)
|
||||
libwhippet_la_LDFLAGS = $(libwhippet_common_la_LDFLAGS) $(WHIPPET_LIBS)
|
||||
libwhippet_la_LIBADD = libwhippet-common.la
|
||||
|
||||
noinst_HEADERS = \
|
||||
%D%/api/bdw-attrs.h \
|
||||
%D%/api/gc-allocation-kind.h \
|
||||
%D%/api/gc-api.h \
|
||||
%D%/api/gc-assert.h \
|
||||
%D%/api/gc-attrs.h \
|
||||
%D%/api/gc-basic-stats.h \
|
||||
%D%/api/gc-collection-kind.h \
|
||||
%D%/api/gc-config.h \
|
||||
%D%/api/gc-conservative-ref.h \
|
||||
%D%/api/gc-edge.h \
|
||||
%D%/api/gc-embedder-api.h \
|
||||
%D%/api/gc-ephemeron.h \
|
||||
%D%/api/gc-event-listener-chain.h \
|
||||
%D%/api/gc-event-listener.h \
|
||||
%D%/api/gc-finalizer.h \
|
||||
%D%/api/gc-forwarding.h \
|
||||
%D%/api/gc-histogram.h \
|
||||
%D%/api/gc-inline.h \
|
||||
%D%/api/gc-lttng.h \
|
||||
%D%/api/gc-null-event-listener.h \
|
||||
%D%/api/gc-options.h \
|
||||
%D%/api/gc-ref.h \
|
||||
%D%/api/gc-tracepoint.h \
|
||||
%D%/api/gc-visibility.h \
|
||||
%D%/api/mmc-attrs.h \
|
||||
%D%/api/pcc-attrs.h \
|
||||
%D%/api/semi-attrs.h
|
105
libguile/whippet/embed.mk
Normal file
105
libguile/whippet/embed.mk
Normal file
|
@ -0,0 +1,105 @@
|
|||
GC_COLLECTOR ?= semi
|
||||
|
||||
DEFAULT_BUILD := opt
|
||||
|
||||
BUILD_CFLAGS_opt = -O2 -g -DNDEBUG
|
||||
BUILD_CFLAGS_optdebug = -O2 -g -DGC_DEBUG=1
|
||||
BUILD_CFLAGS_debug = -O0 -g -DGC_DEBUG=1
|
||||
|
||||
GC_BUILD_CFLAGS = $(BUILD_CFLAGS_$(or $(GC_BUILD),$(DEFAULT_BUILD)))
|
||||
|
||||
V ?= 1
|
||||
v_0 = @
|
||||
v_1 =
|
||||
|
||||
GC_USE_LTTNG_0 :=
|
||||
GC_USE_LTTNG_1 := 1
|
||||
GC_USE_LTTNG := $(shell pkg-config --exists lttng-ust && echo 1 || echo 0)
|
||||
GC_LTTNG_CPPFLAGS := $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)), $(shell pkg-config --cflags lttng-ust),)
|
||||
GC_LTTNG_LIBS := $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)), $(shell pkg-config --libs lttng-ust),)
|
||||
GC_TRACEPOINT_CPPFLAGS = $(if $(GC_USE_LTTNG_$(GC_USE_LTTNG)),$(GC_LTTNG_CPPFLAGS) -DGC_TRACEPOINT_LTTNG=1,)
|
||||
GC_TRACEPOINT_LIBS = $(GC_LTTNG_LIBS)
|
||||
|
||||
GC_V = $(v_$(V))
|
||||
GC_CC = gcc
|
||||
GC_CFLAGS = -Wall -flto -fno-strict-aliasing -fvisibility=hidden -Wno-unused $(GC_BUILD_CFLAGS)
|
||||
GC_CPPFLAGS = -I$(WHIPPET)api $(GC_TRACEPOINT_CPPFLAGS)
|
||||
GC_LDFLAGS = -lpthread -flto=auto $(GC_TRACEPOINT_LIBS)
|
||||
GC_DEPFLAGS =
|
||||
GC_COMPILE = $(GC_V)$(GC_CC) $(GC_CFLAGS) $(GC_CPPFLAGS) $(GC_DEPFLAGS) -o $@
|
||||
GC_LINK = $(GC_V)$(GC_CC) $(GC_LDFLAGS) -o $@
|
||||
GC_PLATFORM = gnu-linux
|
||||
GC_OBJDIR =
|
||||
|
||||
$(GC_OBJDIR)gc-platform.o: $(WHIPPET)src/gc-platform-$(GC_PLATFORM).c
|
||||
$(GC_COMPILE) -c $<
|
||||
$(GC_OBJDIR)gc-stack.o: $(WHIPPET)src/gc-stack.c
|
||||
$(GC_COMPILE) -c $<
|
||||
$(GC_OBJDIR)gc-options.o: $(WHIPPET)src/gc-options.c
|
||||
$(GC_COMPILE) -c $<
|
||||
$(GC_OBJDIR)gc-tracepoint.o: $(WHIPPET)src/gc-tracepoint.c
|
||||
$(GC_COMPILE) -c $<
|
||||
$(GC_OBJDIR)gc-ephemeron.o: $(WHIPPET)src/gc-ephemeron.c
|
||||
$(GC_COMPILE) $(EMBEDDER_TO_GC_CFLAGS) -c $<
|
||||
$(GC_OBJDIR)gc-finalizer.o: $(WHIPPET)src/gc-finalizer.c
|
||||
$(GC_COMPILE) $(EMBEDDER_TO_GC_CFLAGS) -c $<
|
||||
|
||||
GC_STEM_bdw = bdw
|
||||
GC_CFLAGS_bdw = -DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1
|
||||
GC_IMPL_CFLAGS_bdw = `pkg-config --cflags bdw-gc`
|
||||
GC_LIBS_bdw = `pkg-config --libs bdw-gc`
|
||||
|
||||
GC_STEM_semi = semi
|
||||
GC_CFLAGS_semi = -DGC_PRECISE_ROOTS=1
|
||||
GC_LIBS_semi = -lm
|
||||
|
||||
GC_STEM_pcc = pcc
|
||||
GC_CFLAGS_pcc = -DGC_PRECISE_ROOTS=1 -DGC_PARALLEL=1
|
||||
GC_LIBS_pcc = -lm
|
||||
|
||||
GC_STEM_generational_pcc = $(GC_STEM_pcc)
|
||||
GC_CFLAGS_generational_pcc = $(GC_CFLAGS_pcc) -DGC_GENERATIONAL=1
|
||||
GC_LIBS_generational_pcc = $(GC_LIBS_pcc)
|
||||
|
||||
define mmc_variant
|
||||
GC_STEM_$(1) = mmc
|
||||
GC_CFLAGS_$(1) = $(2)
|
||||
GC_LIBS_$(1) = -lm
|
||||
endef
|
||||
|
||||
define generational_mmc_variants
|
||||
$(call mmc_variant,$(1)mmc,$(2))
|
||||
$(call mmc_variant,$(1)generational_mmc,$(2) -DGC_GENERATIONAL=1)
|
||||
endef
|
||||
|
||||
define parallel_mmc_variants
|
||||
$(call generational_mmc_variants,$(1),$(2))
|
||||
$(call generational_mmc_variants,$(1)parallel_,$(2) -DGC_PARALLEL=1)
|
||||
endef
|
||||
|
||||
define trace_mmc_variants
|
||||
$(call parallel_mmc_variants,,-DGC_PRECISE_ROOTS=1)
|
||||
$(call parallel_mmc_variants,stack_conservative_,-DGC_CONSERVATIVE_ROOTS=1)
|
||||
$(call parallel_mmc_variants,heap_conservative_,-DGC_CONSERVATIVE_ROOTS=1 -DGC_CONSERVATIVE_TRACE=1)
|
||||
endef
|
||||
|
||||
$(eval $(call trace_mmc_variants))
|
||||
|
||||
gc_var = $($(1)$(subst -,_,$(2)))
|
||||
gc_impl = $(call gc_var,GC_STEM_,$(1)).c
|
||||
gc_attrs = $(call gc_var,GC_STEM_,$(1))-attrs.h
|
||||
gc_cflags = $(call gc_var,GC_CFLAGS_,$(1))
|
||||
gc_impl_cflags = $(call gc_var,GC_IMPL_CFLAGS_,$(1))
|
||||
gc_libs = $(call gc_var,GC_LIBS_,$(1))
|
||||
|
||||
GC_IMPL = $(call gc_impl,$(GC_COLLECTOR))
|
||||
GC_CFLAGS += $(call gc_cflags,$(GC_COLLECTOR))
|
||||
GC_IMPL_CFLAGS = $(call gc_impl_cflags,$(GC_COLLECTOR))
|
||||
GC_ATTRS = $(WHIPPET)api/$(call gc_attrs,$(GC_COLLECTOR))
|
||||
GC_TO_EMBEDDER_CFLAGS = -include $(GC_ATTRS)
|
||||
GC_LIBS = $(call gc_libs,$(GC_COLLECTOR))
|
||||
|
||||
$(GC_OBJDIR)gc-impl.o: $(WHIPPET)src/$(call gc_impl,$(GC_COLLECTOR))
|
||||
$(GC_COMPILE) $(GC_IMPL_CFLAGS) $(EMBEDDER_TO_GC_CFLAGS) -c $<
|
||||
|
||||
GC_OBJS=$(foreach O,gc-platform.o gc-stack.o gc-options.o gc-tracepoint.o gc-ephemeron.o gc-finalizer.o gc-impl.o,$(GC_OBJDIR)$(O))
|
11
libguile/whippet/manifest.scm
Normal file
11
libguile/whippet/manifest.scm
Normal file
|
@ -0,0 +1,11 @@
|
|||
(use-modules (guix packages))
|
||||
|
||||
(specifications->manifest
|
||||
'("bash"
|
||||
"coreutils"
|
||||
"gcc-toolchain"
|
||||
"lttng-ust"
|
||||
"glibc"
|
||||
"libgc"
|
||||
"make"
|
||||
"pkg-config"))
|
171
libguile/whippet/src/adaptive-heap-sizer.h
Normal file
171
libguile/whippet/src/adaptive-heap-sizer.h
Normal file
|
@ -0,0 +1,171 @@
|
|||
#ifndef ADAPTIVE_HEAP_SIZER_H
|
||||
#define ADAPTIVE_HEAP_SIZER_H
|
||||
|
||||
#include <math.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "background-thread.h"
|
||||
#include "debug.h"
|
||||
#include "gc-config.h"
|
||||
#include "gc-platform.h"
|
||||
#include "heap-sizer.h"
|
||||
|
||||
// This is the MemBalancer algorithm from "Optimal Heap Limits for Reducing
|
||||
// Browser Memory Use" by Marisa Kirisame, Pranav Shenoy, and Pavel Panchekha
|
||||
// (https://arxiv.org/abs/2204.10455).
|
||||
//
|
||||
// This implementation differs slightly in that the constant "c" of the paper
|
||||
// has been extracted outside the radical, and notionally reversed: it is a
|
||||
// unitless "expansiveness" parameter whose domain is [0,+∞]. Also there are
|
||||
// minimum and maximum heap size multipliers, and a minimum amount of free
|
||||
// space. The initial collection rate is an informed guess. The initial
|
||||
// allocation rate estimate is high, considering that allocation rates are often
|
||||
// high on program startup.
|
||||
|
||||
struct gc_adaptive_heap_sizer {
|
||||
uint64_t (*get_allocation_counter)(struct gc_heap *heap);
|
||||
void (*set_heap_size)(struct gc_heap *heap, size_t size);
|
||||
struct gc_heap *heap;
|
||||
uint64_t smoothed_pause_time;
|
||||
uint64_t smoothed_live_bytes;
|
||||
uint64_t live_bytes;
|
||||
double smoothed_allocation_rate;
|
||||
double collection_smoothing_factor;
|
||||
double allocation_smoothing_factor;
|
||||
double minimum_multiplier;
|
||||
double maximum_multiplier;
|
||||
double minimum_free_space;
|
||||
double expansiveness;
|
||||
#if GC_PARALLEL
|
||||
pthread_mutex_t lock;
|
||||
#endif
|
||||
int background_task_id;
|
||||
uint64_t last_bytes_allocated;
|
||||
uint64_t last_heartbeat;
|
||||
};
|
||||
|
||||
static void
|
||||
gc_adaptive_heap_sizer_lock(struct gc_adaptive_heap_sizer *sizer) {
|
||||
#if GC_PARALLEL
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
gc_adaptive_heap_sizer_unlock(struct gc_adaptive_heap_sizer *sizer) {
|
||||
#if GC_PARALLEL
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
// With lock
|
||||
static uint64_t
|
||||
gc_adaptive_heap_sizer_calculate_size(struct gc_adaptive_heap_sizer *sizer) {
|
||||
double allocation_rate = sizer->smoothed_allocation_rate;
|
||||
double collection_rate =
|
||||
(double)sizer->smoothed_pause_time / (double)sizer->smoothed_live_bytes;
|
||||
double radicand = sizer->live_bytes * allocation_rate / collection_rate;
|
||||
double multiplier = 1.0 + sizer->expansiveness * sqrt(radicand);
|
||||
if (isnan(multiplier) || multiplier < sizer->minimum_multiplier)
|
||||
multiplier = sizer->minimum_multiplier;
|
||||
else if (multiplier > sizer->maximum_multiplier)
|
||||
multiplier = sizer->maximum_multiplier;
|
||||
uint64_t size = sizer->live_bytes * multiplier;
|
||||
if (size - sizer->live_bytes < sizer->minimum_free_space)
|
||||
size = sizer->live_bytes + sizer->minimum_free_space;
|
||||
return size;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
gc_adaptive_heap_sizer_set_expansiveness(struct gc_adaptive_heap_sizer *sizer,
|
||||
double expansiveness) {
|
||||
gc_adaptive_heap_sizer_lock(sizer);
|
||||
sizer->expansiveness = expansiveness;
|
||||
uint64_t heap_size = gc_adaptive_heap_sizer_calculate_size(sizer);
|
||||
gc_adaptive_heap_sizer_unlock(sizer);
|
||||
return heap_size;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_adaptive_heap_sizer_on_gc(struct gc_adaptive_heap_sizer *sizer,
|
||||
size_t live_bytes, uint64_t pause_ns,
|
||||
void (*set_heap_size)(struct gc_heap*, size_t)) {
|
||||
gc_adaptive_heap_sizer_lock(sizer);
|
||||
sizer->live_bytes = live_bytes;
|
||||
sizer->smoothed_live_bytes *= 1.0 - sizer->collection_smoothing_factor;
|
||||
sizer->smoothed_live_bytes += sizer->collection_smoothing_factor * live_bytes;
|
||||
sizer->smoothed_pause_time *= 1.0 - sizer->collection_smoothing_factor;
|
||||
sizer->smoothed_pause_time += sizer->collection_smoothing_factor * pause_ns;
|
||||
set_heap_size(sizer->heap, gc_adaptive_heap_sizer_calculate_size(sizer));
|
||||
gc_adaptive_heap_sizer_unlock(sizer);
|
||||
}
|
||||
|
||||
static void
|
||||
gc_adaptive_heap_sizer_background_task(void *data) {
|
||||
struct gc_adaptive_heap_sizer *sizer = data;
|
||||
gc_adaptive_heap_sizer_lock(sizer);
|
||||
uint64_t bytes_allocated =
|
||||
sizer->get_allocation_counter(sizer->heap);
|
||||
// bytes_allocated being 0 means the request failed; retry later.
|
||||
if (bytes_allocated) {
|
||||
uint64_t heartbeat = gc_platform_monotonic_nanoseconds();
|
||||
double rate = (double) (bytes_allocated - sizer->last_bytes_allocated) /
|
||||
(double) (heartbeat - sizer->last_heartbeat);
|
||||
// Just smooth the rate, under the assumption that the denominator is almost
|
||||
// always 1.
|
||||
sizer->smoothed_allocation_rate *= 1.0 - sizer->allocation_smoothing_factor;
|
||||
sizer->smoothed_allocation_rate += rate * sizer->allocation_smoothing_factor;
|
||||
sizer->last_heartbeat = heartbeat;
|
||||
sizer->last_bytes_allocated = bytes_allocated;
|
||||
sizer->set_heap_size(sizer->heap,
|
||||
gc_adaptive_heap_sizer_calculate_size(sizer));
|
||||
}
|
||||
gc_adaptive_heap_sizer_unlock(sizer);
|
||||
}
|
||||
|
||||
static struct gc_adaptive_heap_sizer*
|
||||
gc_make_adaptive_heap_sizer(struct gc_heap *heap, double expansiveness,
|
||||
uint64_t (*get_allocation_counter)(struct gc_heap*),
|
||||
void (*set_heap_size)(struct gc_heap*, size_t),
|
||||
struct gc_background_thread *thread) {
|
||||
struct gc_adaptive_heap_sizer *sizer;
|
||||
sizer = malloc(sizeof(*sizer));
|
||||
if (!sizer)
|
||||
GC_CRASH();
|
||||
memset(sizer, 0, sizeof(*sizer));
|
||||
sizer->get_allocation_counter = get_allocation_counter;
|
||||
sizer->set_heap_size = set_heap_size;
|
||||
sizer->heap = heap;
|
||||
// Baseline estimate of GC speed: 10 MB/ms, or 10 bytes/ns. However since we
|
||||
// observe this speed by separately noisy measurements, we have to provide
|
||||
// defaults for numerator and denominator; estimate 2ms for initial GC pauses
|
||||
// for 20 MB of live data during program startup.
|
||||
sizer->smoothed_pause_time = 2 * 1000 * 1000;
|
||||
sizer->smoothed_live_bytes = 20 * 1024 * 1024;
|
||||
// Baseline estimate of allocation rate during startup: 50 MB in 10ms, or 5
|
||||
// bytes/ns.
|
||||
sizer->smoothed_allocation_rate = 5;
|
||||
sizer->collection_smoothing_factor = 0.5;
|
||||
sizer->allocation_smoothing_factor = 0.95;
|
||||
sizer->minimum_multiplier = 1.1;
|
||||
sizer->maximum_multiplier = 5;
|
||||
sizer->minimum_free_space = 4 * 1024 * 1024;
|
||||
sizer->expansiveness = expansiveness;
|
||||
sizer->last_bytes_allocated = get_allocation_counter(heap);
|
||||
sizer->last_heartbeat = gc_platform_monotonic_nanoseconds();
|
||||
#if GC_PARALLEL
|
||||
pthread_mutex_init(&thread->lock, NULL);
|
||||
sizer->background_task_id =
|
||||
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_MIDDLE,
|
||||
gc_adaptive_heap_sizer_background_task,
|
||||
sizer);
|
||||
#else
|
||||
sizer->background_task_id = -1;
|
||||
#endif
|
||||
return sizer;
|
||||
}
|
||||
|
||||
#endif // ADAPTIVE_HEAP_SIZER_H
|
45
libguile/whippet/src/address-hash.h
Normal file
45
libguile/whippet/src/address-hash.h
Normal file
|
@ -0,0 +1,45 @@
|
|||
#ifndef ADDRESS_HASH_H
|
||||
#define ADDRESS_HASH_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static uintptr_t hash_address(uintptr_t x) {
|
||||
if (sizeof (x) < 8) {
|
||||
// Chris Wellon's lowbias32, from https://nullprogram.com/blog/2018/07/31/.
|
||||
x ^= x >> 16;
|
||||
x *= 0x7feb352dU;
|
||||
x ^= x >> 15;
|
||||
x *= 0x846ca68bU;
|
||||
x ^= x >> 16;
|
||||
return x;
|
||||
} else {
|
||||
// Sebastiano Vigna's splitmix64 integer mixer, from
|
||||
// https://prng.di.unimi.it/splitmix64.c.
|
||||
x ^= x >> 30;
|
||||
x *= 0xbf58476d1ce4e5b9U;
|
||||
x ^= x >> 27;
|
||||
x *= 0x94d049bb133111ebU;
|
||||
x ^= x >> 31;
|
||||
return x;
|
||||
}
|
||||
}
|
||||
// Inverse of hash_address from https://nullprogram.com/blog/2018/07/31/.
|
||||
static uintptr_t unhash_address(uintptr_t x) {
|
||||
if (sizeof (x) < 8) {
|
||||
x ^= x >> 16;
|
||||
x *= 0x43021123U;
|
||||
x ^= x >> 15 ^ x >> 30;
|
||||
x *= 0x1d69e2a5U;
|
||||
x ^= x >> 16;
|
||||
return x;
|
||||
} else {
|
||||
x ^= x >> 31 ^ x >> 62;
|
||||
x *= 0x319642b2d24d8ec3U;
|
||||
x ^= x >> 27 ^ x >> 54;
|
||||
x *= 0x96de1b173f119089U;
|
||||
x ^= x >> 30 ^ x >> 60;
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ADDRESS_HASH_H
|
213
libguile/whippet/src/address-map.h
Normal file
213
libguile/whippet/src/address-map.h
Normal file
|
@ -0,0 +1,213 @@
|
|||
#ifndef ADDRESS_MAP_H
|
||||
#define ADDRESS_MAP_H
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "address-hash.h"
|
||||
#include "gc-assert.h"
|
||||
|
||||
struct hash_map_entry {
|
||||
uintptr_t k;
|
||||
uintptr_t v;
|
||||
};
|
||||
|
||||
struct hash_map {
|
||||
struct hash_map_entry *data;
|
||||
size_t size; // total number of slots
|
||||
size_t n_items; // number of items in set
|
||||
uint8_t *bits; // bitvector indicating set slots
|
||||
};
|
||||
|
||||
static void hash_map_clear(struct hash_map *map) {
|
||||
memset(map->bits, 0, map->size / 8);
|
||||
map->n_items = 0;
|
||||
}
|
||||
|
||||
// Size must be a power of 2.
|
||||
static void hash_map_init(struct hash_map *map, size_t size) {
|
||||
map->size = size;
|
||||
map->data = malloc(sizeof(struct hash_map_entry) * size);
|
||||
if (!map->data) GC_CRASH();
|
||||
map->bits = malloc(size / 8);
|
||||
if (!map->bits) GC_CRASH();
|
||||
hash_map_clear(map);
|
||||
}
|
||||
static void hash_map_destroy(struct hash_map *map) {
|
||||
free(map->data);
|
||||
free(map->bits);
|
||||
}
|
||||
|
||||
static size_t hash_map_slot_index(struct hash_map *map, size_t idx) {
|
||||
return idx & (map->size - 1);
|
||||
}
|
||||
static struct hash_map_entry* hash_map_slot_entry(struct hash_map *map,
|
||||
size_t idx) {
|
||||
return &map->data[hash_map_slot_index(map, idx)];
|
||||
}
|
||||
static int hash_map_slot_is_empty(struct hash_map *map, size_t idx) {
|
||||
idx = hash_map_slot_index(map, idx);
|
||||
return (map->bits[idx / 8] & (1 << (idx % 8))) == 0;
|
||||
}
|
||||
static void hash_map_slot_acquire(struct hash_map *map, size_t idx) {
|
||||
idx = hash_map_slot_index(map, idx);
|
||||
map->bits[idx / 8] |= (1 << (idx % 8));
|
||||
map->n_items++;
|
||||
}
|
||||
static void hash_map_slot_release(struct hash_map *map, size_t idx) {
|
||||
idx = hash_map_slot_index(map, idx);
|
||||
map->bits[idx / 8] &= ~(1 << (idx % 8));
|
||||
map->n_items--;
|
||||
}
|
||||
static size_t hash_map_slot_distance(struct hash_map *map, size_t idx) {
|
||||
return hash_map_slot_index(map, idx - hash_map_slot_entry(map, idx)->k);
|
||||
}
|
||||
static int hash_map_should_shrink(struct hash_map *map) {
|
||||
return map->size > 8 && map->n_items <= (map->size >> 3);
|
||||
}
|
||||
static int hash_map_should_grow(struct hash_map *map) {
|
||||
return map->n_items >= map->size - (map->size >> 3);
|
||||
}
|
||||
|
||||
static void hash_map_do_insert(struct hash_map *map, uintptr_t k, uintptr_t v) {
|
||||
size_t displacement = 0;
|
||||
while (!hash_map_slot_is_empty(map, k + displacement)
|
||||
&& displacement < hash_map_slot_distance(map, k + displacement))
|
||||
displacement++;
|
||||
while (!hash_map_slot_is_empty(map, k + displacement)
|
||||
&& displacement == hash_map_slot_distance(map, k + displacement)) {
|
||||
if (hash_map_slot_entry(map, k + displacement)->k == k) {
|
||||
hash_map_slot_entry(map, k + displacement)->v = v;
|
||||
return;
|
||||
}
|
||||
displacement++;
|
||||
}
|
||||
size_t idx = k + displacement;
|
||||
size_t slots_to_move = 0;
|
||||
while (!hash_map_slot_is_empty(map, idx + slots_to_move))
|
||||
slots_to_move++;
|
||||
hash_map_slot_acquire(map, idx + slots_to_move);
|
||||
while (slots_to_move--)
|
||||
*hash_map_slot_entry(map, idx + slots_to_move + 1) =
|
||||
*hash_map_slot_entry(map, idx + slots_to_move);
|
||||
*hash_map_slot_entry(map, idx) = (struct hash_map_entry){ k, v };
|
||||
}
|
||||
|
||||
static void hash_map_populate(struct hash_map *dst, struct hash_map *src) {
|
||||
for (size_t i = 0; i < src->size; i++)
|
||||
if (!hash_map_slot_is_empty(src, i))
|
||||
hash_map_do_insert(dst, hash_map_slot_entry(src, i)->k,
|
||||
hash_map_slot_entry(src, i)->v);
|
||||
}
|
||||
static void hash_map_grow(struct hash_map *map) {
|
||||
struct hash_map fresh;
|
||||
hash_map_init(&fresh, map->size << 1);
|
||||
hash_map_populate(&fresh, map);
|
||||
hash_map_destroy(map);
|
||||
memcpy(map, &fresh, sizeof(fresh));
|
||||
}
|
||||
static void hash_map_shrink(struct hash_map *map) {
|
||||
struct hash_map fresh;
|
||||
hash_map_init(&fresh, map->size >> 1);
|
||||
hash_map_populate(&fresh, map);
|
||||
hash_map_destroy(map);
|
||||
memcpy(map, &fresh, sizeof(fresh));
|
||||
}
|
||||
|
||||
static void hash_map_insert(struct hash_map *map, uintptr_t k, uintptr_t v) {
|
||||
if (hash_map_should_grow(map))
|
||||
hash_map_grow(map);
|
||||
hash_map_do_insert(map, k, v);
|
||||
}
|
||||
static void hash_map_remove(struct hash_map *map, uintptr_t k) {
|
||||
size_t slot = k;
|
||||
while (!hash_map_slot_is_empty(map, slot) && hash_map_slot_entry(map, slot)->k != k)
|
||||
slot++;
|
||||
if (hash_map_slot_is_empty(map, slot))
|
||||
__builtin_trap();
|
||||
while (!hash_map_slot_is_empty(map, slot + 1)
|
||||
&& hash_map_slot_distance(map, slot + 1)) {
|
||||
*hash_map_slot_entry(map, slot) = *hash_map_slot_entry(map, slot + 1);
|
||||
slot++;
|
||||
}
|
||||
hash_map_slot_release(map, slot);
|
||||
if (hash_map_should_shrink(map))
|
||||
hash_map_shrink(map);
|
||||
}
|
||||
static int hash_map_contains(struct hash_map *map, uintptr_t k) {
|
||||
for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) {
|
||||
if (hash_map_slot_entry(map, slot)->k == k)
|
||||
return 1;
|
||||
if (hash_map_slot_distance(map, slot) < (slot - k))
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
static uintptr_t hash_map_lookup(struct hash_map *map, uintptr_t k, uintptr_t default_) {
|
||||
for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) {
|
||||
if (hash_map_slot_entry(map, slot)->k == k)
|
||||
return hash_map_slot_entry(map, slot)->v;
|
||||
if (hash_map_slot_distance(map, slot) < (slot - k))
|
||||
break;
|
||||
}
|
||||
return default_;
|
||||
}
|
||||
static inline void hash_map_for_each (struct hash_map *map,
|
||||
void (*f)(uintptr_t, uintptr_t, void*),
|
||||
void *data) __attribute__((always_inline));
|
||||
static inline void hash_map_for_each(struct hash_map *map,
|
||||
void (*f)(uintptr_t, uintptr_t, void*),
|
||||
void *data) {
|
||||
for (size_t i = 0; i < map->size; i++)
|
||||
if (!hash_map_slot_is_empty(map, i))
|
||||
f(hash_map_slot_entry(map, i)->k, hash_map_slot_entry(map, i)->v, data);
|
||||
}
|
||||
|
||||
struct address_map {
|
||||
struct hash_map hash_map;
|
||||
};
|
||||
|
||||
static void address_map_init(struct address_map *map) {
|
||||
hash_map_init(&map->hash_map, 8);
|
||||
}
|
||||
static void address_map_destroy(struct address_map *map) {
|
||||
hash_map_destroy(&map->hash_map);
|
||||
}
|
||||
static void address_map_clear(struct address_map *map) {
|
||||
hash_map_clear(&map->hash_map);
|
||||
}
|
||||
|
||||
static void address_map_add(struct address_map *map, uintptr_t addr, uintptr_t v) {
|
||||
hash_map_insert(&map->hash_map, hash_address(addr), v);
|
||||
}
|
||||
static void address_map_remove(struct address_map *map, uintptr_t addr) {
|
||||
hash_map_remove(&map->hash_map, hash_address(addr));
|
||||
}
|
||||
static int address_map_contains(struct address_map *map, uintptr_t addr) {
|
||||
return hash_map_contains(&map->hash_map, hash_address(addr));
|
||||
}
|
||||
static uintptr_t address_map_lookup(struct address_map *map, uintptr_t addr,
|
||||
uintptr_t default_) {
|
||||
return hash_map_lookup(&map->hash_map, hash_address(addr), default_);
|
||||
}
|
||||
|
||||
struct address_map_for_each_data {
|
||||
void (*f)(uintptr_t, uintptr_t, void *);
|
||||
void *data;
|
||||
};
|
||||
static void address_map_do_for_each(uintptr_t k, uintptr_t v, void *data) {
|
||||
struct address_map_for_each_data *for_each_data = data;
|
||||
for_each_data->f(unhash_address(k), v, for_each_data->data);
|
||||
}
|
||||
static inline void address_map_for_each (struct address_map *map,
|
||||
void (*f)(uintptr_t, uintptr_t, void*),
|
||||
void *data) __attribute__((always_inline));
|
||||
static inline void address_map_for_each (struct address_map *map,
|
||||
void (*f)(uintptr_t, uintptr_t, void*),
|
||||
void *data) {
|
||||
struct address_map_for_each_data for_each_data = { f, data };
|
||||
hash_map_for_each(&map->hash_map, address_map_do_for_each, &for_each_data);
|
||||
}
|
||||
|
||||
#endif // ADDRESS_MAP_H
|
214
libguile/whippet/src/address-set.h
Normal file
214
libguile/whippet/src/address-set.h
Normal file
|
@ -0,0 +1,214 @@
|
|||
#ifndef ADDRESS_SET_H
|
||||
#define ADDRESS_SET_H
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "address-hash.h"
|
||||
#include "gc-assert.h"
|
||||
|
||||
struct hash_set {
|
||||
uintptr_t *data;
|
||||
size_t size; // total number of slots
|
||||
size_t n_items; // number of items in set
|
||||
uint8_t *bits; // bitvector indicating set slots
|
||||
};
|
||||
|
||||
static void hash_set_clear(struct hash_set *set) {
|
||||
memset(set->bits, 0, set->size / 8);
|
||||
set->n_items = 0;
|
||||
}
|
||||
|
||||
// Size must be a power of 2.
|
||||
static void hash_set_init(struct hash_set *set, size_t size) {
|
||||
set->size = size;
|
||||
set->data = malloc(sizeof(uintptr_t) * size);
|
||||
if (!set->data) GC_CRASH();
|
||||
set->bits = malloc(size / 8);
|
||||
if (!set->bits) GC_CRASH();
|
||||
hash_set_clear(set);
|
||||
}
|
||||
static void hash_set_destroy(struct hash_set *set) {
|
||||
free(set->data);
|
||||
free(set->bits);
|
||||
}
|
||||
|
||||
static size_t hash_set_slot_index(struct hash_set *set, size_t idx) {
|
||||
return idx & (set->size - 1);
|
||||
}
|
||||
static int hash_set_slot_is_empty(struct hash_set *set, size_t idx) {
|
||||
idx = hash_set_slot_index(set, idx);
|
||||
return (set->bits[idx / 8] & (1 << (idx % 8))) == 0;
|
||||
}
|
||||
static uintptr_t hash_set_slot_ref(struct hash_set *set, size_t idx) {
|
||||
return set->data[hash_set_slot_index(set, idx)];
|
||||
}
|
||||
static void hash_set_slot_set(struct hash_set *set, size_t idx, uintptr_t v) {
|
||||
set->data[hash_set_slot_index(set, idx)] = v;
|
||||
}
|
||||
static void hash_set_slot_acquire(struct hash_set *set, size_t idx) {
|
||||
idx = hash_set_slot_index(set, idx);
|
||||
set->bits[idx / 8] |= (1 << (idx % 8));
|
||||
set->n_items++;
|
||||
}
|
||||
static void hash_set_slot_release(struct hash_set *set, size_t idx) {
|
||||
idx = hash_set_slot_index(set, idx);
|
||||
set->bits[idx / 8] &= ~(1 << (idx % 8));
|
||||
set->n_items--;
|
||||
}
|
||||
static size_t hash_set_slot_distance(struct hash_set *set, size_t idx) {
|
||||
return hash_set_slot_index(set, idx - hash_set_slot_ref(set, idx));
|
||||
}
|
||||
static int hash_set_should_shrink(struct hash_set *set) {
|
||||
return set->size > 8 && set->n_items <= (set->size >> 3);
|
||||
}
|
||||
static int hash_set_should_grow(struct hash_set *set) {
|
||||
return set->n_items >= set->size - (set->size >> 3);
|
||||
}
|
||||
|
||||
static void hash_set_do_insert(struct hash_set *set, uintptr_t v) {
|
||||
size_t displacement = 0;
|
||||
while (!hash_set_slot_is_empty(set, v + displacement)
|
||||
&& displacement < hash_set_slot_distance(set, v + displacement))
|
||||
displacement++;
|
||||
while (!hash_set_slot_is_empty(set, v + displacement)
|
||||
&& displacement == hash_set_slot_distance(set, v + displacement)) {
|
||||
if (hash_set_slot_ref(set, v + displacement) == v)
|
||||
return;
|
||||
displacement++;
|
||||
}
|
||||
size_t idx = v + displacement;
|
||||
size_t slots_to_move = 0;
|
||||
while (!hash_set_slot_is_empty(set, idx + slots_to_move))
|
||||
slots_to_move++;
|
||||
hash_set_slot_acquire(set, idx + slots_to_move);
|
||||
while (slots_to_move--)
|
||||
hash_set_slot_set(set, idx + slots_to_move + 1,
|
||||
hash_set_slot_ref(set, idx + slots_to_move));
|
||||
hash_set_slot_set(set, idx, v);
|
||||
}
|
||||
|
||||
static void hash_set_populate(struct hash_set *dst, struct hash_set *src) {
|
||||
for (size_t i = 0; i < src->size; i++)
|
||||
if (!hash_set_slot_is_empty(src, i))
|
||||
hash_set_do_insert(dst, hash_set_slot_ref(src, i));
|
||||
}
|
||||
static void hash_set_grow(struct hash_set *set) {
|
||||
struct hash_set fresh;
|
||||
hash_set_init(&fresh, set->size << 1);
|
||||
hash_set_populate(&fresh, set);
|
||||
hash_set_destroy(set);
|
||||
memcpy(set, &fresh, sizeof(fresh));
|
||||
}
|
||||
static void hash_set_shrink(struct hash_set *set) {
|
||||
struct hash_set fresh;
|
||||
hash_set_init(&fresh, set->size >> 1);
|
||||
hash_set_populate(&fresh, set);
|
||||
hash_set_destroy(set);
|
||||
memcpy(set, &fresh, sizeof(fresh));
|
||||
}
|
||||
|
||||
static void hash_set_insert(struct hash_set *set, uintptr_t v) {
|
||||
if (hash_set_should_grow(set))
|
||||
hash_set_grow(set);
|
||||
hash_set_do_insert(set, v);
|
||||
}
|
||||
|
||||
static void hash_set_remove(struct hash_set *set, uintptr_t v) {
|
||||
size_t slot = v;
|
||||
while (!hash_set_slot_is_empty(set, slot) && hash_set_slot_ref(set, slot) != v)
|
||||
slot++;
|
||||
if (hash_set_slot_is_empty(set, slot))
|
||||
__builtin_trap();
|
||||
while (!hash_set_slot_is_empty(set, slot + 1)
|
||||
&& hash_set_slot_distance(set, slot + 1)) {
|
||||
hash_set_slot_set(set, slot, hash_set_slot_ref(set, slot + 1));
|
||||
slot++;
|
||||
}
|
||||
hash_set_slot_release(set, slot);
|
||||
if (hash_set_should_shrink(set))
|
||||
hash_set_shrink(set);
|
||||
}
|
||||
static int hash_set_contains(struct hash_set *set, uintptr_t v) {
|
||||
for (size_t slot = v; !hash_set_slot_is_empty(set, slot); slot++) {
|
||||
if (hash_set_slot_ref(set, slot) == v)
|
||||
return 1;
|
||||
if (hash_set_slot_distance(set, slot) < (slot - v))
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
static inline void hash_set_find(struct hash_set *set,
|
||||
int (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
|
||||
static inline void hash_set_find(struct hash_set *set,
|
||||
int (*f)(uintptr_t, void*), void *data) {
|
||||
for (size_t i = 0; i < set->size; i++)
|
||||
if (!hash_set_slot_is_empty(set, i))
|
||||
if (f(hash_set_slot_ref(set, i), data))
|
||||
return;
|
||||
}
|
||||
|
||||
struct address_set {
|
||||
struct hash_set hash_set;
|
||||
};
|
||||
|
||||
static void address_set_init(struct address_set *set) {
|
||||
hash_set_init(&set->hash_set, 8);
|
||||
}
|
||||
static void address_set_destroy(struct address_set *set) {
|
||||
hash_set_destroy(&set->hash_set);
|
||||
}
|
||||
static void address_set_clear(struct address_set *set) {
|
||||
hash_set_clear(&set->hash_set);
|
||||
}
|
||||
|
||||
static void address_set_add(struct address_set *set, uintptr_t addr) {
|
||||
hash_set_insert(&set->hash_set, hash_address(addr));
|
||||
}
|
||||
static void address_set_remove(struct address_set *set, uintptr_t addr) {
|
||||
hash_set_remove(&set->hash_set, hash_address(addr));
|
||||
}
|
||||
static int address_set_contains(struct address_set *set, uintptr_t addr) {
|
||||
return hash_set_contains(&set->hash_set, hash_address(addr));
|
||||
}
|
||||
static void address_set_union(struct address_set *set, struct address_set *other) {
|
||||
while (set->hash_set.size < other->hash_set.size)
|
||||
hash_set_grow(&set->hash_set);
|
||||
hash_set_populate(&set->hash_set, &other->hash_set);
|
||||
}
|
||||
|
||||
struct address_set_for_each_data {
|
||||
void (*f)(uintptr_t, void *);
|
||||
void *data;
|
||||
};
|
||||
static int address_set_do_for_each(uintptr_t v, void *data) {
|
||||
struct address_set_for_each_data *for_each_data = data;
|
||||
for_each_data->f(unhash_address(v), for_each_data->data);
|
||||
return 0;
|
||||
}
|
||||
static inline void address_set_for_each(struct address_set *set,
|
||||
void (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
|
||||
static inline void address_set_for_each(struct address_set *set,
|
||||
void (*f)(uintptr_t, void*), void *data) {
|
||||
struct address_set_for_each_data for_each_data = { f, data };
|
||||
hash_set_find(&set->hash_set, address_set_do_for_each, &for_each_data);
|
||||
}
|
||||
|
||||
struct address_set_find_data {
|
||||
int (*f)(uintptr_t, void *);
|
||||
void *data;
|
||||
};
|
||||
static int address_set_do_find(uintptr_t v, void *data) {
|
||||
struct address_set_find_data *find_data = data;
|
||||
return find_data->f(unhash_address(v), find_data->data);
|
||||
}
|
||||
static inline void address_set_find(struct address_set *set,
|
||||
int (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
|
||||
static inline void address_set_find(struct address_set *set,
|
||||
int (*f)(uintptr_t, void*), void *data) {
|
||||
struct address_set_find_data find_data = { f, data };
|
||||
hash_set_find(&set->hash_set, address_set_do_find, &find_data);
|
||||
}
|
||||
|
||||
#endif // ADDRESS_SET_H
|
16
libguile/whippet/src/assert.h
Normal file
16
libguile/whippet/src/assert.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef ASSERT_H
|
||||
#define ASSERT_H
|
||||
|
||||
#define STATIC_ASSERT_EQ(a, b) _Static_assert((a) == (b), "eq")
|
||||
|
||||
#define UNLIKELY(e) __builtin_expect(e, 0)
|
||||
#define LIKELY(e) __builtin_expect(e, 1)
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define ASSERT(x) do { if (UNLIKELY(!(x))) __builtin_trap(); } while (0)
|
||||
#else
|
||||
#define ASSERT(x) do { } while (0)
|
||||
#endif
|
||||
#define ASSERT_EQ(a,b) ASSERT((a) == (b))
|
||||
|
||||
#endif // ASSERT_H
|
155
libguile/whippet/src/background-thread.h
Normal file
155
libguile/whippet/src/background-thread.h
Normal file
|
@ -0,0 +1,155 @@
|
|||
#ifndef BACKGROUND_THREAD_H
|
||||
#define BACKGROUND_THREAD_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "debug.h"
|
||||
|
||||
enum {
|
||||
GC_BACKGROUND_TASK_START = 0,
|
||||
GC_BACKGROUND_TASK_MIDDLE = 100,
|
||||
GC_BACKGROUND_TASK_END = 200
|
||||
};
|
||||
|
||||
struct gc_background_task {
|
||||
int id;
|
||||
int priority;
|
||||
void (*run)(void *data);
|
||||
void *data;
|
||||
};
|
||||
|
||||
enum gc_background_thread_state {
|
||||
GC_BACKGROUND_THREAD_STARTING,
|
||||
GC_BACKGROUND_THREAD_RUNNING,
|
||||
GC_BACKGROUND_THREAD_STOPPING
|
||||
};
|
||||
|
||||
struct gc_background_thread {
|
||||
size_t count;
|
||||
size_t capacity;
|
||||
struct gc_background_task *tasks;
|
||||
int next_id;
|
||||
enum gc_background_thread_state state;
|
||||
pthread_t thread;
|
||||
pthread_mutex_t lock;
|
||||
pthread_cond_t cond;
|
||||
};
|
||||
|
||||
static void*
|
||||
gc_background_thread(void *data) {
|
||||
struct gc_background_thread *thread = data;
|
||||
pthread_mutex_lock(&thread->lock);
|
||||
while (thread->state == GC_BACKGROUND_THREAD_STARTING)
|
||||
pthread_cond_wait(&thread->cond, &thread->lock);
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_REALTIME, &ts)) {
|
||||
perror("background thread: failed to get time!");
|
||||
return NULL;
|
||||
}
|
||||
while (thread->state == GC_BACKGROUND_THREAD_RUNNING) {
|
||||
ts.tv_sec += 1;
|
||||
pthread_cond_timedwait(&thread->cond, &thread->lock, &ts);
|
||||
if (thread->state == GC_BACKGROUND_THREAD_RUNNING)
|
||||
for (size_t i = 0; i < thread->count; i++)
|
||||
thread->tasks[i].run(thread->tasks[i].data);
|
||||
}
|
||||
pthread_mutex_unlock(&thread->lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct gc_background_thread*
|
||||
gc_make_background_thread(void) {
|
||||
struct gc_background_thread *thread;
|
||||
thread = malloc(sizeof(*thread));
|
||||
if (!thread)
|
||||
GC_CRASH();
|
||||
memset(thread, 0, sizeof(*thread));
|
||||
thread->tasks = NULL;
|
||||
thread->count = 0;
|
||||
thread->capacity = 0;
|
||||
thread->state = GC_BACKGROUND_THREAD_STARTING;
|
||||
pthread_mutex_init(&thread->lock, NULL);
|
||||
pthread_cond_init(&thread->cond, NULL);
|
||||
if (pthread_create(&thread->thread, NULL, gc_background_thread, thread)) {
|
||||
perror("spawning background thread failed");
|
||||
GC_CRASH();
|
||||
}
|
||||
return thread;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_background_thread_start(struct gc_background_thread *thread) {
|
||||
pthread_mutex_lock(&thread->lock);
|
||||
GC_ASSERT_EQ(thread->state, GC_BACKGROUND_THREAD_STARTING);
|
||||
thread->state = GC_BACKGROUND_THREAD_RUNNING;
|
||||
pthread_mutex_unlock(&thread->lock);
|
||||
pthread_cond_signal(&thread->cond);
|
||||
}
|
||||
|
||||
static int
|
||||
gc_background_thread_add_task(struct gc_background_thread *thread,
|
||||
int priority, void (*run)(void *data),
|
||||
void *data) {
|
||||
pthread_mutex_lock(&thread->lock);
|
||||
if (thread->count == thread->capacity) {
|
||||
size_t new_capacity = thread->capacity * 2 + 1;
|
||||
struct gc_background_task *new_tasks =
|
||||
realloc(thread->tasks, sizeof(struct gc_background_task) * new_capacity);
|
||||
if (!new_tasks) {
|
||||
perror("ran out of space for background tasks!");
|
||||
GC_CRASH();
|
||||
}
|
||||
thread->capacity = new_capacity;
|
||||
thread->tasks = new_tasks;
|
||||
}
|
||||
size_t insert = 0;
|
||||
for (; insert < thread->count; insert++) {
|
||||
if (priority < thread->tasks[insert].priority)
|
||||
break;
|
||||
}
|
||||
size_t bytes_to_move =
|
||||
(thread->count - insert) * sizeof(struct gc_background_task);
|
||||
memmove(&thread->tasks[insert + 1], &thread->tasks[insert], bytes_to_move);
|
||||
int id = thread->next_id++;
|
||||
thread->tasks[insert].id = id;
|
||||
thread->tasks[insert].priority = priority;
|
||||
thread->tasks[insert].run = run;
|
||||
thread->tasks[insert].data = data;
|
||||
thread->count++;
|
||||
pthread_mutex_unlock(&thread->lock);
|
||||
return id;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_background_thread_remove_task(struct gc_background_thread *thread,
|
||||
int id) {
|
||||
pthread_mutex_lock(&thread->lock);
|
||||
size_t remove = 0;
|
||||
for (; remove < thread->count; remove++) {
|
||||
if (thread->tasks[remove].id == id)
|
||||
break;
|
||||
}
|
||||
if (remove == thread->count)
|
||||
GC_CRASH();
|
||||
size_t bytes_to_move =
|
||||
(thread->count - (remove + 1)) * sizeof(struct gc_background_task);
|
||||
memmove(&thread->tasks[remove], &thread->tasks[remove + 1], bytes_to_move);
|
||||
pthread_mutex_unlock(&thread->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
gc_destroy_background_thread(struct gc_background_thread *thread) {
|
||||
pthread_mutex_lock(&thread->lock);
|
||||
GC_ASSERT(thread->state == GC_BACKGROUND_THREAD_RUNNING);
|
||||
thread->state = GC_BACKGROUND_THREAD_STOPPING;
|
||||
pthread_mutex_unlock(&thread->lock);
|
||||
pthread_cond_signal(&thread->cond);
|
||||
pthread_join(thread->thread, NULL);
|
||||
free(thread->tasks);
|
||||
free(thread);
|
||||
}
|
||||
|
||||
#endif // BACKGROUND_THREAD_H
|
647
libguile/whippet/src/bdw.c
Normal file
647
libguile/whippet/src/bdw.c
Normal file
|
@ -0,0 +1,647 @@
|
|||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gc-api.h"
|
||||
#include "gc-ephemeron.h"
|
||||
#include "gc-tracepoint.h"
|
||||
|
||||
#define GC_IMPL 1
|
||||
#include "gc-internal.h"
|
||||
|
||||
#include "bdw-attrs.h"
|
||||
|
||||
#if GC_PRECISE_ROOTS
|
||||
#error bdw-gc is a conservative collector
|
||||
#endif
|
||||
|
||||
#if !GC_CONSERVATIVE_ROOTS
|
||||
#error bdw-gc is a conservative collector
|
||||
#endif
|
||||
|
||||
#if !GC_CONSERVATIVE_TRACE
|
||||
#error bdw-gc is a conservative collector
|
||||
#endif
|
||||
|
||||
// When pthreads are used, let `libgc' know about it and redirect
|
||||
// allocation calls such as `GC_MALLOC ()' to (contention-free, faster)
|
||||
// thread-local allocation.
|
||||
|
||||
#define GC_THREADS 1
|
||||
#define GC_REDIRECT_TO_LOCAL 1
|
||||
|
||||
// Don't #define pthread routines to their GC_pthread counterparts.
|
||||
// Instead we will be careful inside the benchmarks to use API to
|
||||
// register threads with libgc.
|
||||
#define GC_NO_THREAD_REDIRECTS 1
|
||||
|
||||
#include <gc/gc.h>
|
||||
#include <gc/gc_inline.h> /* GC_generic_malloc_many */
|
||||
#include <gc/gc_mark.h> /* GC_generic_malloc */
|
||||
|
||||
#define GC_INLINE_GRANULE_WORDS 2
|
||||
#define GC_INLINE_GRANULE_BYTES (sizeof(void *) * GC_INLINE_GRANULE_WORDS)
|
||||
|
||||
/* A freelist set contains GC_INLINE_FREELIST_COUNT pointers to singly
|
||||
linked lists of objects of different sizes, the ith one containing
|
||||
objects i + 1 granules in size. This setting of
|
||||
GC_INLINE_FREELIST_COUNT will hold freelists for allocations of
|
||||
up to 256 bytes. */
|
||||
#define GC_INLINE_FREELIST_COUNT (256U / GC_INLINE_GRANULE_BYTES)
|
||||
|
||||
struct gc_heap {
|
||||
struct gc_heap *freelist; // see mark_heap
|
||||
pthread_mutex_t lock;
|
||||
struct gc_heap_roots *roots;
|
||||
struct gc_mutator *mutators;
|
||||
struct gc_event_listener event_listener;
|
||||
struct gc_finalizer_state *finalizer_state;
|
||||
gc_finalizer_callback have_finalizers;
|
||||
void *event_listener_data;
|
||||
};
|
||||
|
||||
struct gc_mutator {
|
||||
void *freelists[GC_INLINE_FREELIST_COUNT];
|
||||
void *pointerless_freelists[GC_INLINE_FREELIST_COUNT];
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator_roots *roots;
|
||||
struct gc_mutator *next; // with heap lock
|
||||
struct gc_mutator **prev; // with heap lock
|
||||
void *event_listener_data;
|
||||
};
|
||||
|
||||
struct gc_heap *__the_bdw_gc_heap;
|
||||
#define HEAP_EVENT(event, ...) do { \
|
||||
__the_bdw_gc_heap->event_listener.event(__the_bdw_gc_heap->event_listener_data, \
|
||||
##__VA_ARGS__); \
|
||||
GC_TRACEPOINT(event, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
#define MUTATOR_EVENT(mut, event, ...) do { \
|
||||
__the_bdw_gc_heap->event_listener.event(mut->event_listener_data, \
|
||||
##__VA_ARGS__); \
|
||||
GC_TRACEPOINT(event, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
static inline size_t gc_inline_bytes_to_freelist_index(size_t bytes) {
|
||||
return (bytes - 1U) / GC_INLINE_GRANULE_BYTES;
|
||||
}
|
||||
static inline size_t gc_inline_freelist_object_size(size_t idx) {
|
||||
return (idx + 1U) * GC_INLINE_GRANULE_BYTES;
|
||||
}
|
||||
|
||||
struct gc_heap* gc_mutator_heap(struct gc_mutator *mutator) {
|
||||
return __the_bdw_gc_heap;
|
||||
}
|
||||
uintptr_t gc_small_object_nursery_low_address(struct gc_heap *heap) {
|
||||
GC_CRASH();
|
||||
}
|
||||
uintptr_t gc_small_object_nursery_high_address(struct gc_heap *heap) {
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
// The values of these must match the internal POINTERLESS and NORMAL
|
||||
// definitions in libgc, for which unfortunately there are no external
|
||||
// definitions. Alack.
|
||||
enum gc_inline_kind {
|
||||
GC_INLINE_KIND_POINTERLESS,
|
||||
GC_INLINE_KIND_NORMAL
|
||||
};
|
||||
|
||||
static inline void *
|
||||
allocate_small(void **freelist, size_t idx, enum gc_inline_kind kind) {
|
||||
void *head = *freelist;
|
||||
|
||||
if (!head) {
|
||||
size_t bytes = gc_inline_freelist_object_size(idx);
|
||||
GC_generic_malloc_many(bytes, kind, freelist);
|
||||
head = *freelist;
|
||||
if (GC_UNLIKELY (!head)) {
|
||||
fprintf(stderr, "ran out of space, heap size %zu\n",
|
||||
GC_get_heap_size());
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
*freelist = *(void **)(head);
|
||||
|
||||
if (kind == GC_INLINE_KIND_POINTERLESS)
|
||||
memset(head, 0, gc_inline_freelist_object_size(idx));
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
void* gc_allocate_slow(struct gc_mutator *mut, size_t size,
|
||||
enum gc_allocation_kind kind) {
|
||||
GC_ASSERT(size != 0);
|
||||
if (size <= gc_allocator_large_threshold()) {
|
||||
size_t idx = gc_inline_bytes_to_freelist_index(size);
|
||||
void **freelists;
|
||||
enum gc_inline_kind freelist_kind;
|
||||
switch (kind) {
|
||||
case GC_ALLOCATION_TAGGED:
|
||||
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
|
||||
return allocate_small(&mut->freelists[idx], idx, GC_INLINE_KIND_NORMAL);
|
||||
case GC_ALLOCATION_TAGGED_POINTERLESS:
|
||||
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
|
||||
return allocate_small(&mut->pointerless_freelists[idx], idx,
|
||||
GC_INLINE_KIND_POINTERLESS);
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
} else {
|
||||
switch (kind) {
|
||||
case GC_ALLOCATION_TAGGED:
|
||||
case GC_ALLOCATION_UNTAGGED_CONSERVATIVE:
|
||||
return GC_malloc(size);
|
||||
case GC_ALLOCATION_TAGGED_POINTERLESS:
|
||||
case GC_ALLOCATION_UNTAGGED_POINTERLESS: {
|
||||
void *ret = GC_malloc_atomic(size);
|
||||
memset(ret, 0, size);
|
||||
return ret;
|
||||
}
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gc_pin_object(struct gc_mutator *mut, struct gc_ref ref) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
void gc_collect(struct gc_mutator *mut,
|
||||
enum gc_collection_kind requested_kind) {
|
||||
switch (requested_kind) {
|
||||
case GC_COLLECTION_MINOR:
|
||||
GC_collect_a_little();
|
||||
break;
|
||||
case GC_COLLECTION_ANY:
|
||||
case GC_COLLECTION_MAJOR:
|
||||
GC_gcollect();
|
||||
break;
|
||||
case GC_COLLECTION_COMPACTING:
|
||||
GC_gcollect_and_unmap();
|
||||
break;
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
int gc_object_is_old_generation_slow(struct gc_mutator *mut,
|
||||
struct gc_ref obj) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gc_write_barrier_slow(struct gc_mutator *mut, struct gc_ref obj,
|
||||
size_t obj_size, struct gc_edge edge,
|
||||
struct gc_ref new_val) {
|
||||
}
|
||||
|
||||
int* gc_safepoint_flag_loc(struct gc_mutator *mut) { GC_CRASH(); }
|
||||
void gc_safepoint_slow(struct gc_mutator *mut) { GC_CRASH(); }
|
||||
|
||||
struct bdw_mark_state {
|
||||
struct GC_ms_entry *mark_stack_ptr;
|
||||
struct GC_ms_entry *mark_stack_limit;
|
||||
};
|
||||
|
||||
static void bdw_mark_edge(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
struct bdw_mark_state *state = visit_data;
|
||||
uintptr_t addr = gc_ref_value(gc_edge_ref(edge));
|
||||
state->mark_stack_ptr = GC_MARK_AND_PUSH ((void *) addr,
|
||||
state->mark_stack_ptr,
|
||||
state->mark_stack_limit,
|
||||
NULL);
|
||||
}
|
||||
|
||||
static int heap_gc_kind;
|
||||
static int mutator_gc_kind;
|
||||
static int ephemeron_gc_kind;
|
||||
static int finalizer_gc_kind;
|
||||
|
||||
// In BDW-GC, we can't hook into the mark phase to call
|
||||
// gc_trace_ephemerons_for_object, so the advertised ephemeron strategy
|
||||
// doesn't really work. The primitives that we have are mark functions,
|
||||
// which run during GC and can't allocate; finalizers, which run after
|
||||
// GC and can allocate but can't add to the connectivity graph; and
|
||||
// disappearing links, which are cleared at the end of marking, in the
|
||||
// stop-the-world phase. It does not appear to be possible to implement
|
||||
// ephemerons using these primitives. Instead fall back to weak-key
|
||||
// tables.
|
||||
|
||||
struct gc_ephemeron* gc_allocate_ephemeron(struct gc_mutator *mut) {
|
||||
return GC_generic_malloc(gc_ephemeron_size(), ephemeron_gc_kind);
|
||||
}
|
||||
|
||||
unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap) {
|
||||
return GC_get_gc_no();
|
||||
}
|
||||
|
||||
void gc_ephemeron_init(struct gc_mutator *mut, struct gc_ephemeron *ephemeron,
|
||||
struct gc_ref key, struct gc_ref value) {
|
||||
gc_ephemeron_init_internal(mut->heap, ephemeron, key, value);
|
||||
if (GC_base((void*)gc_ref_value(key))) {
|
||||
struct gc_ref *loc = gc_edge_loc(gc_ephemeron_key_edge(ephemeron));
|
||||
GC_register_disappearing_link((void**)loc);
|
||||
}
|
||||
}
|
||||
|
||||
int gc_visit_ephemeron_key(struct gc_edge edge, struct gc_heap *heap) {
|
||||
// Pretend the key is traced, to avoid adding this ephemeron to the
|
||||
// global table.
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct gc_finalizer* gc_allocate_finalizer(struct gc_mutator *mut) {
|
||||
return GC_generic_malloc(gc_finalizer_size(), finalizer_gc_kind);
|
||||
}
|
||||
|
||||
static void finalize_object(void *obj, void *data) {
|
||||
struct gc_finalizer *f = data;
|
||||
gc_finalizer_externally_fired(__the_bdw_gc_heap->finalizer_state, f);
|
||||
}
|
||||
|
||||
void gc_finalizer_attach(struct gc_mutator *mut, struct gc_finalizer *finalizer,
|
||||
unsigned priority, struct gc_ref object,
|
||||
struct gc_ref closure) {
|
||||
// Don't bother much about the actual finalizer; just delegate to BDW-GC.
|
||||
GC_finalization_proc prev = NULL;
|
||||
void *prev_data = NULL;
|
||||
gc_finalizer_init_internal(finalizer, object, closure);
|
||||
gc_finalizer_externally_activated(finalizer);
|
||||
GC_register_finalizer_no_order(gc_ref_heap_object(object), finalize_object,
|
||||
finalizer, &prev, &prev_data);
|
||||
// FIXME: Allow multiple finalizers per object.
|
||||
GC_ASSERT(prev == NULL);
|
||||
GC_ASSERT(prev_data == NULL);
|
||||
}
|
||||
|
||||
struct gc_finalizer* gc_pop_finalizable(struct gc_mutator *mut) {
|
||||
GC_invoke_finalizers();
|
||||
return gc_finalizer_state_pop(mut->heap->finalizer_state);
|
||||
}
|
||||
|
||||
void gc_set_finalizer_callback(struct gc_heap *heap,
|
||||
gc_finalizer_callback callback) {
|
||||
heap->have_finalizers = callback;
|
||||
}
|
||||
|
||||
static void have_finalizers(void) {
|
||||
struct gc_heap *heap = __the_bdw_gc_heap;
|
||||
if (heap->have_finalizers)
|
||||
heap->have_finalizers(heap, 1);
|
||||
}
|
||||
|
||||
static struct GC_ms_entry *
|
||||
mark_ephemeron(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
|
||||
struct GC_ms_entry *mark_stack_limit, GC_word env) {
|
||||
|
||||
struct bdw_mark_state state = {
|
||||
mark_stack_ptr,
|
||||
mark_stack_limit,
|
||||
};
|
||||
|
||||
struct gc_ephemeron *ephemeron = (struct gc_ephemeron*) addr;
|
||||
|
||||
// If this ephemeron is on a freelist, its first word will be a
|
||||
// freelist link and everything else will be NULL.
|
||||
if (!gc_ref_value(gc_edge_ref(gc_ephemeron_value_edge(ephemeron)))) {
|
||||
bdw_mark_edge(gc_edge(addr), NULL, &state);
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
if (!gc_ref_value(gc_edge_ref(gc_ephemeron_key_edge(ephemeron)))) {
|
||||
// If the key died in a previous collection, the disappearing link
|
||||
// will have been cleared. Mark the ephemeron as dead.
|
||||
gc_ephemeron_mark_dead(ephemeron);
|
||||
}
|
||||
|
||||
gc_trace_ephemeron(ephemeron, bdw_mark_edge, NULL, &state);
|
||||
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
static struct GC_ms_entry *
|
||||
mark_finalizer(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
|
||||
struct GC_ms_entry *mark_stack_limit, GC_word env) {
|
||||
|
||||
struct bdw_mark_state state = {
|
||||
mark_stack_ptr,
|
||||
mark_stack_limit,
|
||||
};
|
||||
|
||||
struct gc_finalizer *finalizer = (struct gc_finalizer*) addr;
|
||||
|
||||
// If this ephemeron is on a freelist, its first word will be a
|
||||
// freelist link and everything else will be NULL.
|
||||
if (!gc_ref_value(gc_finalizer_object(finalizer))) {
|
||||
bdw_mark_edge(gc_edge(addr), NULL, &state);
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
gc_trace_finalizer(finalizer, bdw_mark_edge, NULL, &state);
|
||||
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
static struct GC_ms_entry *
|
||||
mark_heap(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
|
||||
struct GC_ms_entry *mark_stack_limit, GC_word env) {
|
||||
struct bdw_mark_state state = {
|
||||
mark_stack_ptr,
|
||||
mark_stack_limit,
|
||||
};
|
||||
|
||||
struct gc_heap *heap = (struct gc_heap*) addr;
|
||||
|
||||
// If this heap is on a freelist... well probably we are screwed, BDW
|
||||
// isn't really made to do multiple heaps in a process. But still, in
|
||||
// this case, the first word is the freelist and the rest are null.
|
||||
if (heap->freelist) {
|
||||
bdw_mark_edge(gc_edge(addr), NULL, &state);
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
if (heap->roots)
|
||||
gc_trace_heap_roots(heap->roots, bdw_mark_edge, heap, &state);
|
||||
|
||||
gc_visit_finalizer_roots(heap->finalizer_state, bdw_mark_edge, heap, &state);
|
||||
|
||||
state.mark_stack_ptr = GC_MARK_AND_PUSH (heap->mutators,
|
||||
state.mark_stack_ptr,
|
||||
state.mark_stack_limit,
|
||||
NULL);
|
||||
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
static struct GC_ms_entry *
|
||||
mark_mutator(GC_word *addr, struct GC_ms_entry *mark_stack_ptr,
|
||||
struct GC_ms_entry *mark_stack_limit, GC_word env) {
|
||||
struct bdw_mark_state state = {
|
||||
mark_stack_ptr,
|
||||
mark_stack_limit,
|
||||
};
|
||||
|
||||
struct gc_mutator *mut = (struct gc_mutator*) addr;
|
||||
|
||||
// If this mutator is on a freelist, its first word will be a
|
||||
// freelist link and everything else will be NULL.
|
||||
if (!mut->heap) {
|
||||
bdw_mark_edge(gc_edge(addr), NULL, &state);
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
for (int i = 0; i < GC_INLINE_FREELIST_COUNT; i++)
|
||||
state.mark_stack_ptr = GC_MARK_AND_PUSH (mut->freelists[i],
|
||||
state.mark_stack_ptr,
|
||||
state.mark_stack_limit,
|
||||
NULL);
|
||||
|
||||
for (int i = 0; i < GC_INLINE_FREELIST_COUNT; i++)
|
||||
for (void *head = mut->pointerless_freelists[i]; head; head = *(void**)head)
|
||||
state.mark_stack_ptr = GC_MARK_AND_PUSH (head,
|
||||
state.mark_stack_ptr,
|
||||
state.mark_stack_limit,
|
||||
NULL);
|
||||
|
||||
if (mut->roots)
|
||||
gc_trace_mutator_roots(mut->roots, bdw_mark_edge, mut->heap, &state);
|
||||
|
||||
state.mark_stack_ptr = GC_MARK_AND_PUSH (mut->next,
|
||||
state.mark_stack_ptr,
|
||||
state.mark_stack_limit,
|
||||
NULL);
|
||||
|
||||
return state.mark_stack_ptr;
|
||||
}
|
||||
|
||||
static inline struct gc_mutator *add_mutator(struct gc_heap *heap) {
|
||||
struct gc_mutator *ret =
|
||||
GC_generic_malloc(sizeof(struct gc_mutator), mutator_gc_kind);
|
||||
ret->heap = heap;
|
||||
ret->event_listener_data =
|
||||
heap->event_listener.mutator_added(heap->event_listener_data);
|
||||
|
||||
pthread_mutex_lock(&heap->lock);
|
||||
ret->next = heap->mutators;
|
||||
ret->prev = &heap->mutators;
|
||||
if (ret->next)
|
||||
ret->next->prev = &ret->next;
|
||||
heap->mutators = ret;
|
||||
pthread_mutex_unlock(&heap->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct gc_options {
|
||||
struct gc_common_options common;
|
||||
};
|
||||
int gc_option_from_string(const char *str) {
|
||||
return gc_common_option_from_string(str);
|
||||
}
|
||||
struct gc_options* gc_allocate_options(void) {
|
||||
struct gc_options *ret = malloc(sizeof(struct gc_options));
|
||||
gc_init_common_options(&ret->common);
|
||||
return ret;
|
||||
}
|
||||
int gc_options_set_int(struct gc_options *options, int option, int value) {
|
||||
return gc_common_options_set_int(&options->common, option, value);
|
||||
}
|
||||
int gc_options_set_size(struct gc_options *options, int option,
|
||||
size_t value) {
|
||||
return gc_common_options_set_size(&options->common, option, value);
|
||||
}
|
||||
int gc_options_set_double(struct gc_options *options, int option,
|
||||
double value) {
|
||||
return gc_common_options_set_double(&options->common, option, value);
|
||||
}
|
||||
int gc_options_parse_and_set(struct gc_options *options, int option,
|
||||
const char *value) {
|
||||
return gc_common_options_parse_and_set(&options->common, option, value);
|
||||
}
|
||||
|
||||
struct gc_pending_ephemerons *
|
||||
gc_heap_pending_ephemerons(struct gc_heap *heap) {
|
||||
GC_CRASH();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void on_collection_event(GC_EventType event) {
|
||||
switch (event) {
|
||||
case GC_EVENT_START: {
|
||||
HEAP_EVENT(requesting_stop);
|
||||
HEAP_EVENT(waiting_for_stop);
|
||||
break;
|
||||
}
|
||||
case GC_EVENT_MARK_START:
|
||||
HEAP_EVENT(mutators_stopped);
|
||||
HEAP_EVENT(prepare_gc, GC_COLLECTION_MAJOR);
|
||||
break;
|
||||
case GC_EVENT_MARK_END:
|
||||
HEAP_EVENT(roots_traced);
|
||||
HEAP_EVENT(heap_traced);
|
||||
break;
|
||||
case GC_EVENT_RECLAIM_START:
|
||||
break;
|
||||
case GC_EVENT_RECLAIM_END:
|
||||
// Sloppily attribute finalizers and eager reclamation to
|
||||
// ephemerons.
|
||||
HEAP_EVENT(ephemerons_traced);
|
||||
HEAP_EVENT(live_data_size, GC_get_heap_size() - GC_get_free_bytes());
|
||||
break;
|
||||
case GC_EVENT_END:
|
||||
HEAP_EVENT(restarting_mutators);
|
||||
break;
|
||||
case GC_EVENT_PRE_START_WORLD:
|
||||
case GC_EVENT_POST_STOP_WORLD:
|
||||
// Can't rely on these, as they are only fired when threads are
|
||||
// enabled.
|
||||
break;
|
||||
case GC_EVENT_THREAD_SUSPENDED:
|
||||
case GC_EVENT_THREAD_UNSUSPENDED:
|
||||
// No nice way to map back to the mutator.
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void on_heap_resize(GC_word size) {
|
||||
HEAP_EVENT(heap_resized, size);
|
||||
}
|
||||
|
||||
uint64_t gc_allocation_counter(struct gc_heap *heap) {
|
||||
return GC_get_total_bytes();
|
||||
}
|
||||
|
||||
int gc_init(const struct gc_options *options, struct gc_stack_addr *stack_base,
|
||||
struct gc_heap **heap, struct gc_mutator **mutator,
|
||||
struct gc_event_listener event_listener,
|
||||
void *event_listener_data) {
|
||||
// Root the heap, which will also cause all mutators to be marked.
|
||||
GC_ASSERT_EQ(gc_allocator_small_granule_size(), GC_INLINE_GRANULE_BYTES);
|
||||
GC_ASSERT_EQ(gc_allocator_large_threshold(),
|
||||
GC_INLINE_FREELIST_COUNT * GC_INLINE_GRANULE_BYTES);
|
||||
|
||||
GC_ASSERT_EQ(__the_bdw_gc_heap, NULL);
|
||||
|
||||
if (!options) options = gc_allocate_options();
|
||||
|
||||
// Ignore stack base for main thread.
|
||||
|
||||
switch (options->common.heap_size_policy) {
|
||||
case GC_HEAP_SIZE_FIXED:
|
||||
GC_set_max_heap_size(options->common.heap_size);
|
||||
break;
|
||||
case GC_HEAP_SIZE_GROWABLE: {
|
||||
if (options->common.maximum_heap_size)
|
||||
GC_set_max_heap_size(options->common.maximum_heap_size);
|
||||
// BDW uses a pretty weird heap-sizing heuristic:
|
||||
//
|
||||
// heap-size = live-data * (1 + (2 / GC_free_space_divisor))
|
||||
// heap-size-multiplier = heap-size/live-data = 1 + 2/GC_free_space_divisor
|
||||
// GC_free_space_divisor = 2/(heap-size-multiplier-1)
|
||||
//
|
||||
// (Assumption: your heap is mostly "composite", i.e. not
|
||||
// "atomic". See bdw's alloc.c:min_bytes_allocd.)
|
||||
double fsd = 2.0/(options->common.heap_size_multiplier - 1);
|
||||
// But, the divisor is an integer. WTF. This caps the effective
|
||||
// maximum heap multiplier at 3. Oh well.
|
||||
GC_set_free_space_divisor(fsd + 0.51);
|
||||
break;
|
||||
}
|
||||
case GC_HEAP_SIZE_ADAPTIVE:
|
||||
default:
|
||||
fprintf(stderr, "adaptive heap sizing unsupported by bdw-gc\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
GC_set_all_interior_pointers (0);
|
||||
GC_set_finalize_on_demand (1);
|
||||
GC_set_finalizer_notifier(have_finalizers);
|
||||
|
||||
// Not part of 7.3, sigh. Have to set an env var.
|
||||
// GC_set_markers_count(options->common.parallelism);
|
||||
char markers[21] = {0,}; // 21 bytes enough for 2**64 in decimal + NUL.
|
||||
snprintf(markers, sizeof(markers), "%d", options->common.parallelism);
|
||||
setenv("GC_MARKERS", markers, 1);
|
||||
GC_init();
|
||||
size_t current_heap_size = GC_get_heap_size();
|
||||
if (options->common.heap_size > current_heap_size)
|
||||
GC_expand_hp(options->common.heap_size - current_heap_size);
|
||||
GC_allow_register_threads();
|
||||
|
||||
{
|
||||
int add_size_to_descriptor = 0;
|
||||
int clear_memory = 1;
|
||||
|
||||
heap_gc_kind = GC_new_kind(GC_new_free_list(),
|
||||
GC_MAKE_PROC(GC_new_proc(mark_heap), 0),
|
||||
add_size_to_descriptor, clear_memory);
|
||||
mutator_gc_kind = GC_new_kind(GC_new_free_list(),
|
||||
GC_MAKE_PROC(GC_new_proc(mark_mutator), 0),
|
||||
add_size_to_descriptor, clear_memory);
|
||||
ephemeron_gc_kind = GC_new_kind(GC_new_free_list(),
|
||||
GC_MAKE_PROC(GC_new_proc(mark_ephemeron), 0),
|
||||
add_size_to_descriptor, clear_memory);
|
||||
finalizer_gc_kind = GC_new_kind(GC_new_free_list(),
|
||||
GC_MAKE_PROC(GC_new_proc(mark_finalizer), 0),
|
||||
add_size_to_descriptor, clear_memory);
|
||||
}
|
||||
|
||||
*heap = GC_generic_malloc(sizeof(struct gc_heap), heap_gc_kind);
|
||||
pthread_mutex_init(&(*heap)->lock, NULL);
|
||||
|
||||
(*heap)->event_listener = event_listener;
|
||||
(*heap)->event_listener_data = event_listener_data;
|
||||
(*heap)->finalizer_state = gc_make_finalizer_state();
|
||||
|
||||
__the_bdw_gc_heap = *heap;
|
||||
HEAP_EVENT(init, GC_get_heap_size());
|
||||
GC_set_on_collection_event(on_collection_event);
|
||||
GC_set_on_heap_resize(on_heap_resize);
|
||||
|
||||
*mutator = add_mutator(*heap);
|
||||
|
||||
// Sanity check.
|
||||
if (!GC_is_visible (&__the_bdw_gc_heap))
|
||||
abort ();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct gc_mutator* gc_init_for_thread(struct gc_stack_addr *stack_base,
|
||||
struct gc_heap *heap) {
|
||||
struct GC_stack_base base = { stack_base };
|
||||
GC_register_my_thread(&base);
|
||||
return add_mutator(heap);
|
||||
}
|
||||
void gc_finish_for_thread(struct gc_mutator *mut) {
|
||||
pthread_mutex_lock(&mut->heap->lock);
|
||||
MUTATOR_EVENT(mut, mutator_removed);
|
||||
*mut->prev = mut->next;
|
||||
if (mut->next)
|
||||
mut->next->prev = mut->prev;
|
||||
pthread_mutex_unlock(&mut->heap->lock);
|
||||
|
||||
GC_unregister_my_thread();
|
||||
}
|
||||
|
||||
void* gc_call_without_gc(struct gc_mutator *mut,
|
||||
void* (*f)(void*),
|
||||
void *data) {
|
||||
return GC_do_blocking(f, data);
|
||||
}
|
||||
|
||||
void gc_mutator_set_roots(struct gc_mutator *mut,
|
||||
struct gc_mutator_roots *roots) {
|
||||
mut->roots = roots;
|
||||
}
|
||||
void gc_heap_set_roots(struct gc_heap *heap, struct gc_heap_roots *roots) {
|
||||
heap->roots = roots;
|
||||
}
|
||||
void gc_heap_set_extern_space(struct gc_heap *heap,
|
||||
struct gc_extern_space *space) {
|
||||
}
|
979
libguile/whippet/src/copy-space.h
Normal file
979
libguile/whippet/src/copy-space.h
Normal file
|
@ -0,0 +1,979 @@
|
|||
#ifndef COPY_SPACE_H
|
||||
#define COPY_SPACE_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "gc-api.h"
|
||||
|
||||
#define GC_IMPL 1
|
||||
#include "gc-internal.h"
|
||||
|
||||
#include "assert.h"
|
||||
#include "background-thread.h"
|
||||
#include "debug.h"
|
||||
#include "extents.h"
|
||||
#include "gc-align.h"
|
||||
#include "gc-attrs.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-lock.h"
|
||||
#include "gc-platform.h"
|
||||
#include "spin.h"
|
||||
|
||||
// A copy space: a block-structured space that traces via evacuation.
|
||||
|
||||
#define COPY_SPACE_SLAB_SIZE (64 * 1024 * 1024)
|
||||
#define COPY_SPACE_REGION_SIZE (64 * 1024)
|
||||
#define COPY_SPACE_BLOCK_SIZE (2 * COPY_SPACE_REGION_SIZE)
|
||||
#define COPY_SPACE_BLOCKS_PER_SLAB \
|
||||
(COPY_SPACE_SLAB_SIZE / COPY_SPACE_BLOCK_SIZE)
|
||||
#define COPY_SPACE_HEADER_BYTES_PER_BLOCK \
|
||||
(COPY_SPACE_BLOCK_SIZE / COPY_SPACE_BLOCKS_PER_SLAB)
|
||||
#define COPY_SPACE_HEADER_BLOCKS_PER_SLAB 1
|
||||
#define COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB \
|
||||
(COPY_SPACE_BLOCKS_PER_SLAB - COPY_SPACE_HEADER_BLOCKS_PER_SLAB)
|
||||
#define COPY_SPACE_HEADER_BYTES_PER_SLAB \
|
||||
(COPY_SPACE_HEADER_BYTES_PER_BLOCK * COPY_SPACE_HEADER_BLOCKS_PER_SLAB)
|
||||
|
||||
struct copy_space_slab;
|
||||
|
||||
struct copy_space_slab_header {
|
||||
union {
|
||||
struct {
|
||||
struct copy_space_slab *next;
|
||||
struct copy_space_slab *prev;
|
||||
unsigned incore_block_count;
|
||||
};
|
||||
uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_SLAB];
|
||||
};
|
||||
};
|
||||
STATIC_ASSERT_EQ(sizeof(struct copy_space_slab_header),
|
||||
COPY_SPACE_HEADER_BYTES_PER_SLAB);
|
||||
|
||||
// Really just the block header.
|
||||
struct copy_space_block {
|
||||
union {
|
||||
struct {
|
||||
struct copy_space_block *next;
|
||||
uint8_t in_core;
|
||||
uint8_t all_zeroes[2];
|
||||
uint8_t is_survivor[2];
|
||||
size_t allocated; // For partly-empty blocks.
|
||||
};
|
||||
uint8_t padding[COPY_SPACE_HEADER_BYTES_PER_BLOCK];
|
||||
};
|
||||
};
|
||||
STATIC_ASSERT_EQ(sizeof(struct copy_space_block),
|
||||
COPY_SPACE_HEADER_BYTES_PER_BLOCK);
|
||||
|
||||
struct copy_space_region {
|
||||
char data[COPY_SPACE_REGION_SIZE];
|
||||
};
|
||||
|
||||
struct copy_space_block_payload {
|
||||
struct copy_space_region regions[2];
|
||||
};
|
||||
|
||||
struct copy_space_slab {
|
||||
struct copy_space_slab_header header;
|
||||
struct copy_space_block headers[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB];
|
||||
struct copy_space_block_payload blocks[COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB];
|
||||
};
|
||||
STATIC_ASSERT_EQ(sizeof(struct copy_space_slab), COPY_SPACE_SLAB_SIZE);
|
||||
|
||||
static inline struct copy_space_block*
|
||||
copy_space_block_for_addr(uintptr_t addr) {
|
||||
uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE);
|
||||
struct copy_space_slab *slab = (struct copy_space_slab*) base;
|
||||
uintptr_t block_idx =
|
||||
(addr / COPY_SPACE_BLOCK_SIZE) % COPY_SPACE_BLOCKS_PER_SLAB;
|
||||
return &slab->headers[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB];
|
||||
}
|
||||
|
||||
static inline struct copy_space_block*
|
||||
copy_space_block_header(struct copy_space_block_payload *payload) {
|
||||
return copy_space_block_for_addr((uintptr_t) payload);
|
||||
}
|
||||
|
||||
static inline struct copy_space_block_payload*
|
||||
copy_space_block_payload(struct copy_space_block *block) {
|
||||
uintptr_t addr = (uintptr_t) block;
|
||||
uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE);
|
||||
struct copy_space_slab *slab = (struct copy_space_slab*) base;
|
||||
uintptr_t block_idx =
|
||||
(addr / COPY_SPACE_HEADER_BYTES_PER_BLOCK) % COPY_SPACE_BLOCKS_PER_SLAB;
|
||||
return &slab->blocks[block_idx - COPY_SPACE_HEADER_BLOCKS_PER_SLAB];
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
copy_space_object_region(struct gc_ref obj) {
|
||||
return (gc_ref_value(obj) / COPY_SPACE_REGION_SIZE) & 1;
|
||||
}
|
||||
|
||||
#define COPY_SPACE_PAGE_OUT_QUEUE_SIZE 4
|
||||
|
||||
struct copy_space_block_list {
|
||||
struct copy_space_block *head;
|
||||
};
|
||||
|
||||
struct copy_space_block_stack {
|
||||
struct copy_space_block_list list;
|
||||
};
|
||||
|
||||
enum copy_space_flags {
|
||||
COPY_SPACE_ATOMIC_FORWARDING = 1,
|
||||
COPY_SPACE_ALIGNED = 2,
|
||||
COPY_SPACE_HAS_FIELD_LOGGING_BITS = 4,
|
||||
};
|
||||
|
||||
struct copy_space {
|
||||
pthread_mutex_t lock;
|
||||
struct copy_space_block_stack empty;
|
||||
struct copy_space_block_stack partly_full;
|
||||
struct copy_space_block_list full ALIGNED_TO_AVOID_FALSE_SHARING;
|
||||
size_t allocated_bytes;
|
||||
size_t fragmentation;
|
||||
struct copy_space_block_stack paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE]
|
||||
ALIGNED_TO_AVOID_FALSE_SHARING;
|
||||
ssize_t bytes_to_page_out ALIGNED_TO_AVOID_FALSE_SHARING;
|
||||
// The rest of these members are only changed rarely and with the heap
|
||||
// lock.
|
||||
uint8_t active_region ALIGNED_TO_AVOID_FALSE_SHARING;
|
||||
uint8_t atomic_forward;
|
||||
uint8_t in_gc;
|
||||
uint32_t flags;
|
||||
size_t allocated_bytes_at_last_gc;
|
||||
size_t fragmentation_at_last_gc;
|
||||
struct extents *extents;
|
||||
struct copy_space_slab **slabs;
|
||||
size_t nslabs;
|
||||
};
|
||||
|
||||
enum copy_space_forward_result {
|
||||
// We went to forward an edge, but the target was already forwarded, so we
|
||||
// just updated the edge.
|
||||
COPY_SPACE_FORWARD_UPDATED,
|
||||
// We went to forward an edge and evacuated the referent to a new location.
|
||||
COPY_SPACE_FORWARD_EVACUATED,
|
||||
// We went to forward an edge but failed to acquire memory for its new
|
||||
// location.
|
||||
COPY_SPACE_FORWARD_FAILED,
|
||||
};
|
||||
|
||||
struct copy_space_allocator {
|
||||
uintptr_t hp;
|
||||
uintptr_t limit;
|
||||
struct copy_space_block *block;
|
||||
};
|
||||
|
||||
static struct gc_lock
|
||||
copy_space_lock(struct copy_space *space) {
|
||||
return gc_lock_acquire(&space->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_block_list_push(struct copy_space_block_list *list,
|
||||
struct copy_space_block *block) {
|
||||
struct copy_space_block *next =
|
||||
atomic_load_explicit(&list->head, memory_order_acquire);
|
||||
do {
|
||||
block->next = next;
|
||||
} while (!atomic_compare_exchange_weak(&list->head, &next, block));
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_block_list_pop(struct copy_space_block_list *list) {
|
||||
struct copy_space_block *head =
|
||||
atomic_load_explicit(&list->head, memory_order_acquire);
|
||||
struct copy_space_block *next;
|
||||
do {
|
||||
if (!head)
|
||||
return NULL;
|
||||
} while (!atomic_compare_exchange_weak(&list->head, &head, head->next));
|
||||
head->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_block_stack_push(struct copy_space_block_stack *stack,
|
||||
struct copy_space_block *block,
|
||||
const struct gc_lock *lock) {
|
||||
struct copy_space_block *next = stack->list.head;
|
||||
block->next = next;
|
||||
stack->list.head = block;
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_block_stack_pop(struct copy_space_block_stack *stack,
|
||||
const struct gc_lock *lock) {
|
||||
struct copy_space_block *head = stack->list.head;
|
||||
if (head) {
|
||||
stack->list.head = head->next;
|
||||
head->next = NULL;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_pop_empty_block(struct copy_space *space,
|
||||
const struct gc_lock *lock) {
|
||||
struct copy_space_block *ret = copy_space_block_stack_pop(&space->empty,
|
||||
lock);
|
||||
if (ret) {
|
||||
ret->allocated = 0;
|
||||
ret->is_survivor[space->active_region] = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_push_empty_block(struct copy_space *space,
|
||||
struct copy_space_block *block,
|
||||
const struct gc_lock *lock) {
|
||||
copy_space_block_stack_push(&space->empty, block, lock);
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_pop_full_block(struct copy_space *space) {
|
||||
return copy_space_block_list_pop(&space->full);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_push_full_block(struct copy_space *space,
|
||||
struct copy_space_block *block) {
|
||||
if (space->in_gc)
|
||||
block->is_survivor[space->active_region] = 1;
|
||||
copy_space_block_list_push(&space->full, block);
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_pop_partly_full_block(struct copy_space *space,
|
||||
const struct gc_lock *lock) {
|
||||
return copy_space_block_stack_pop(&space->partly_full, lock);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_push_partly_full_block(struct copy_space *space,
|
||||
struct copy_space_block *block,
|
||||
const struct gc_lock *lock) {
|
||||
copy_space_block_stack_push(&space->partly_full, block, lock);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_page_out_block(struct copy_space *space,
|
||||
struct copy_space_block *block,
|
||||
const struct gc_lock *lock) {
|
||||
copy_space_block_stack_push
|
||||
(block->in_core
|
||||
? &space->paged_out[0]
|
||||
: &space->paged_out[COPY_SPACE_PAGE_OUT_QUEUE_SIZE-1],
|
||||
block,
|
||||
lock);
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_page_in_block(struct copy_space *space,
|
||||
const struct gc_lock *lock) {
|
||||
for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++) {
|
||||
struct copy_space_block *block =
|
||||
copy_space_block_stack_pop(&space->paged_out[age], lock);
|
||||
if (block) return block;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
copy_space_request_release_memory(struct copy_space *space, size_t bytes) {
|
||||
return atomic_fetch_add(&space->bytes_to_page_out, bytes) + bytes;
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_page_out_blocks_until_memory_released(struct copy_space *space) {
|
||||
ssize_t pending = atomic_load(&space->bytes_to_page_out);
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
while (pending > 0) {
|
||||
struct copy_space_block *block = copy_space_pop_empty_block(space, &lock);
|
||||
if (!block) break;
|
||||
copy_space_page_out_block(space, block, &lock);
|
||||
pending = (atomic_fetch_sub(&space->bytes_to_page_out, COPY_SPACE_BLOCK_SIZE)
|
||||
- COPY_SPACE_BLOCK_SIZE);
|
||||
}
|
||||
gc_lock_release(&lock);
|
||||
return pending <= 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
copy_space_maybe_reacquire_memory(struct copy_space *space, size_t bytes) {
|
||||
ssize_t pending =
|
||||
atomic_fetch_sub(&space->bytes_to_page_out, bytes) - bytes;
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
while (pending + COPY_SPACE_BLOCK_SIZE <= 0) {
|
||||
struct copy_space_block *block = copy_space_page_in_block(space, &lock);
|
||||
if (!block) break;
|
||||
copy_space_push_empty_block(space, block, &lock);
|
||||
pending = (atomic_fetch_add(&space->bytes_to_page_out,
|
||||
COPY_SPACE_BLOCK_SIZE)
|
||||
+ COPY_SPACE_BLOCK_SIZE);
|
||||
}
|
||||
gc_lock_release(&lock);
|
||||
return pending;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_reacquire_memory(struct copy_space *space, size_t bytes) {
|
||||
ssize_t pending = copy_space_maybe_reacquire_memory(space, bytes);
|
||||
GC_ASSERT(pending + COPY_SPACE_BLOCK_SIZE > 0);
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_contains_address(struct copy_space *space, uintptr_t addr) {
|
||||
return extents_contain_addr(space->extents, addr);
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_contains(struct copy_space *space, struct gc_ref ref) {
|
||||
return copy_space_contains_address(space, gc_ref_value(ref));
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_has_field_logging_bits(struct copy_space *space) {
|
||||
return space->flags & COPY_SPACE_HAS_FIELD_LOGGING_BITS;
|
||||
}
|
||||
|
||||
static size_t
|
||||
copy_space_field_logging_blocks(struct copy_space *space) {
|
||||
if (!copy_space_has_field_logging_bits(space))
|
||||
return 0;
|
||||
size_t bytes = COPY_SPACE_SLAB_SIZE / sizeof (uintptr_t) / 8;
|
||||
size_t blocks =
|
||||
align_up(bytes, COPY_SPACE_BLOCK_SIZE) / COPY_SPACE_BLOCK_SIZE;
|
||||
return blocks;
|
||||
}
|
||||
|
||||
static uint8_t*
|
||||
copy_space_field_logged_byte(struct gc_edge edge) {
|
||||
uintptr_t addr = gc_edge_address(edge);
|
||||
uintptr_t base = align_down(addr, COPY_SPACE_SLAB_SIZE);
|
||||
base += offsetof(struct copy_space_slab, blocks);
|
||||
uintptr_t field = (addr & (COPY_SPACE_SLAB_SIZE - 1)) / sizeof(uintptr_t);
|
||||
uintptr_t byte = field / 8;
|
||||
return (uint8_t*) (base + byte);
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
copy_space_field_logged_bit(struct gc_edge edge) {
|
||||
// Each byte has 8 bytes, covering 8 fields.
|
||||
size_t field = gc_edge_address(edge) / sizeof(uintptr_t);
|
||||
return 1 << (field % 8);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_clear_field_logged_bits_for_region(struct copy_space *space,
|
||||
void *region_base) {
|
||||
uintptr_t addr = (uintptr_t)region_base;
|
||||
GC_ASSERT_EQ(addr, align_down(addr, COPY_SPACE_REGION_SIZE));
|
||||
GC_ASSERT(copy_space_contains_address(space, addr));
|
||||
if (copy_space_has_field_logging_bits(space))
|
||||
memset(copy_space_field_logged_byte(gc_edge(region_base)),
|
||||
0,
|
||||
COPY_SPACE_REGION_SIZE / sizeof(uintptr_t) / 8);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_clear_field_logged_bits_for_block(struct copy_space *space,
|
||||
struct copy_space_block *block) {
|
||||
struct copy_space_block_payload *payload = copy_space_block_payload(block);
|
||||
copy_space_clear_field_logged_bits_for_region(space, &payload->regions[0]);
|
||||
copy_space_clear_field_logged_bits_for_region(space, &payload->regions[1]);
|
||||
}
|
||||
|
||||
static inline void
|
||||
copy_space_allocator_set_block(struct copy_space_allocator *alloc,
|
||||
struct copy_space_block *block,
|
||||
int active_region) {
|
||||
struct copy_space_block_payload *payload = copy_space_block_payload(block);
|
||||
struct copy_space_region *region = &payload->regions[active_region];
|
||||
alloc->block = block;
|
||||
alloc->hp = (uintptr_t)®ion[0];
|
||||
alloc->limit = (uintptr_t)®ion[1];
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_allocator_acquire_block(struct copy_space_allocator *alloc,
|
||||
struct copy_space_block *block,
|
||||
int active_region) {
|
||||
if (block) {
|
||||
copy_space_allocator_set_block(alloc, block, active_region);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_allocator_acquire_empty_block(struct copy_space_allocator *alloc,
|
||||
struct copy_space *space) {
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
struct copy_space_block *block = copy_space_pop_empty_block(space, &lock);
|
||||
gc_lock_release(&lock);
|
||||
if (copy_space_allocator_acquire_block(alloc, block, space->active_region)) {
|
||||
block->in_core = 1;
|
||||
if (block->all_zeroes[space->active_region]) {
|
||||
block->all_zeroes[space->active_region] = 0;
|
||||
} else {
|
||||
memset((char*)alloc->hp, 0, COPY_SPACE_REGION_SIZE);
|
||||
copy_space_clear_field_logged_bits_for_region(space, (void*)alloc->hp);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_allocator_acquire_partly_full_block(struct copy_space_allocator *alloc,
|
||||
struct copy_space *space) {
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
struct copy_space_block *block = copy_space_pop_partly_full_block(space,
|
||||
&lock);
|
||||
gc_lock_release(&lock);
|
||||
if (copy_space_allocator_acquire_block(alloc, block, space->active_region)) {
|
||||
alloc->hp += block->allocated;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_allocator_release_full_block(struct copy_space_allocator *alloc,
|
||||
struct copy_space *space) {
|
||||
size_t fragmentation = alloc->limit - alloc->hp;
|
||||
size_t allocated = COPY_SPACE_REGION_SIZE - alloc->block->allocated;
|
||||
atomic_fetch_add_explicit(&space->allocated_bytes, allocated,
|
||||
memory_order_relaxed);
|
||||
if (fragmentation)
|
||||
atomic_fetch_add_explicit(&space->fragmentation, fragmentation,
|
||||
memory_order_relaxed);
|
||||
copy_space_push_full_block(space, alloc->block);
|
||||
alloc->hp = alloc->limit = 0;
|
||||
alloc->block = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_allocator_release_partly_full_block(struct copy_space_allocator *alloc,
|
||||
struct copy_space *space) {
|
||||
size_t allocated = alloc->hp & (COPY_SPACE_REGION_SIZE - 1);
|
||||
if (allocated) {
|
||||
atomic_fetch_add_explicit(&space->allocated_bytes,
|
||||
allocated - alloc->block->allocated,
|
||||
memory_order_relaxed);
|
||||
alloc->block->allocated = allocated;
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
copy_space_push_partly_full_block(space, alloc->block, &lock);
|
||||
gc_lock_release(&lock);
|
||||
} else {
|
||||
// In this case, hp was bumped all the way to the limit, in which
|
||||
// case allocated wraps to 0; the block is full.
|
||||
atomic_fetch_add_explicit(&space->allocated_bytes,
|
||||
COPY_SPACE_REGION_SIZE - alloc->block->allocated,
|
||||
memory_order_relaxed);
|
||||
copy_space_push_full_block(space, alloc->block);
|
||||
}
|
||||
alloc->hp = alloc->limit = 0;
|
||||
alloc->block = NULL;
|
||||
}
|
||||
|
||||
static inline struct gc_ref
|
||||
copy_space_allocate(struct copy_space_allocator *alloc,
|
||||
struct copy_space *space,
|
||||
size_t size) {
|
||||
GC_ASSERT(size > 0);
|
||||
GC_ASSERT(size <= gc_allocator_large_threshold());
|
||||
size = align_up(size, gc_allocator_small_granule_size());
|
||||
|
||||
if (alloc->hp + size <= alloc->limit)
|
||||
goto done;
|
||||
|
||||
if (alloc->block)
|
||||
copy_space_allocator_release_full_block(alloc, space);
|
||||
while (copy_space_allocator_acquire_partly_full_block(alloc, space)) {
|
||||
if (alloc->hp + size <= alloc->limit)
|
||||
goto done;
|
||||
copy_space_allocator_release_full_block(alloc, space);
|
||||
}
|
||||
if (!copy_space_allocator_acquire_empty_block(alloc, space))
|
||||
return gc_ref_null();
|
||||
// The newly acquired block is empty and is therefore large enough for
|
||||
// a small allocation.
|
||||
|
||||
done:
|
||||
struct gc_ref ret = gc_ref(alloc->hp);
|
||||
alloc->hp += size;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct copy_space_block*
|
||||
copy_space_append_block_lists(struct copy_space_block *head,
|
||||
struct copy_space_block *tail) {
|
||||
if (!head) return tail;
|
||||
if (tail) {
|
||||
struct copy_space_block *walk = head;
|
||||
while (walk->next)
|
||||
walk = walk->next;
|
||||
walk->next = tail;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_flip(struct copy_space *space) {
|
||||
// Mutators stopped, can access nonatomically.
|
||||
struct copy_space_block* flip = space->full.head;
|
||||
flip = copy_space_append_block_lists(space->partly_full.list.head, flip);
|
||||
flip = copy_space_append_block_lists(space->empty.list.head, flip);
|
||||
space->empty.list.head = flip;
|
||||
space->partly_full.list.head = NULL;
|
||||
space->full.head = NULL;
|
||||
space->allocated_bytes = 0;
|
||||
space->fragmentation = 0;
|
||||
space->active_region ^= 1;
|
||||
space->in_gc = 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
copy_space_allocator_init(struct copy_space_allocator *alloc) {
|
||||
memset(alloc, 0, sizeof(*alloc));
|
||||
}
|
||||
|
||||
static inline void
|
||||
copy_space_allocator_finish(struct copy_space_allocator *alloc,
|
||||
struct copy_space *space) {
|
||||
if (alloc->block)
|
||||
copy_space_allocator_release_partly_full_block(alloc, space);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_finish_gc(struct copy_space *space, int is_minor_gc) {
|
||||
// Mutators stopped, can access nonatomically.
|
||||
if (is_minor_gc) {
|
||||
// Avoid mixing survivors and new objects on the same blocks.
|
||||
struct copy_space_allocator alloc;
|
||||
copy_space_allocator_init(&alloc);
|
||||
while (copy_space_allocator_acquire_partly_full_block(&alloc, space))
|
||||
copy_space_allocator_release_full_block(&alloc, space);
|
||||
copy_space_allocator_finish(&alloc, space);
|
||||
}
|
||||
|
||||
space->allocated_bytes_at_last_gc = space->allocated_bytes;
|
||||
space->fragmentation_at_last_gc = space->fragmentation;
|
||||
space->in_gc = 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
copy_space_can_allocate(struct copy_space *space, size_t bytes) {
|
||||
// With lock!
|
||||
size_t count = 0;
|
||||
for (struct copy_space_block *empties = space->empty.list.head;
|
||||
empties && count < bytes;
|
||||
empties = empties->next) {
|
||||
count += COPY_SPACE_REGION_SIZE;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_add_to_allocation_counter(struct copy_space *space,
|
||||
uint64_t *counter) {
|
||||
*counter += space->allocated_bytes - space->allocated_bytes_at_last_gc;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_gc_during_evacuation(void *data) {
|
||||
// If space is really tight and reordering of objects during
|
||||
// evacuation resulted in more end-of-block fragmentation and thus
|
||||
// block use than before collection started, we can actually run out
|
||||
// of memory while collecting. We should probably attempt to expand
|
||||
// the heap here, at least by a single block; it's better than the
|
||||
// alternatives.
|
||||
fprintf(stderr, "Out of memory\n");
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
static inline enum copy_space_forward_result
|
||||
copy_space_forward_atomic(struct copy_space *space, struct gc_edge edge,
|
||||
struct gc_ref old_ref,
|
||||
struct copy_space_allocator *alloc) {
|
||||
struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref);
|
||||
|
||||
retry:
|
||||
if (fwd.state == GC_FORWARDING_STATE_NOT_FORWARDED)
|
||||
gc_atomic_forward_acquire(&fwd);
|
||||
|
||||
switch (fwd.state) {
|
||||
case GC_FORWARDING_STATE_NOT_FORWARDED:
|
||||
default:
|
||||
// Impossible.
|
||||
GC_CRASH();
|
||||
case GC_FORWARDING_STATE_ACQUIRED: {
|
||||
// We claimed the object successfully; evacuating is up to us.
|
||||
size_t bytes = gc_atomic_forward_object_size(&fwd);
|
||||
struct gc_ref new_ref = copy_space_allocate(alloc, space, bytes);
|
||||
if (gc_ref_is_null(new_ref)) {
|
||||
gc_atomic_forward_abort(&fwd);
|
||||
return COPY_SPACE_FORWARD_FAILED;
|
||||
}
|
||||
// Copy object contents before committing, as we don't know what
|
||||
// part of the object (if any) will be overwritten by the
|
||||
// commit.
|
||||
memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), bytes);
|
||||
gc_atomic_forward_commit(&fwd, new_ref);
|
||||
gc_edge_update(edge, new_ref);
|
||||
return COPY_SPACE_FORWARD_EVACUATED;
|
||||
}
|
||||
case GC_FORWARDING_STATE_BUSY:
|
||||
// Someone else claimed this object first. Spin until new address
|
||||
// known, or evacuation aborts.
|
||||
for (size_t spin_count = 0;; spin_count++) {
|
||||
if (gc_atomic_forward_retry_busy(&fwd))
|
||||
goto retry;
|
||||
yield_for_spin(spin_count);
|
||||
}
|
||||
GC_CRASH(); // Unreachable.
|
||||
case GC_FORWARDING_STATE_FORWARDED:
|
||||
// The object has been evacuated already. Update the edge;
|
||||
// whoever forwarded the object will make sure it's eventually
|
||||
// traced.
|
||||
gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
|
||||
return COPY_SPACE_FORWARD_UPDATED;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_forward_if_traced_atomic(struct copy_space *space,
|
||||
struct gc_edge edge,
|
||||
struct gc_ref old_ref) {
|
||||
struct gc_atomic_forward fwd = gc_atomic_forward_begin(old_ref);
|
||||
retry:
|
||||
switch (fwd.state) {
|
||||
case GC_FORWARDING_STATE_NOT_FORWARDED:
|
||||
return 0;
|
||||
case GC_FORWARDING_STATE_BUSY:
|
||||
// Someone else claimed this object first. Spin until new address
|
||||
// known.
|
||||
for (size_t spin_count = 0;; spin_count++) {
|
||||
if (gc_atomic_forward_retry_busy(&fwd))
|
||||
goto retry;
|
||||
yield_for_spin(spin_count);
|
||||
}
|
||||
GC_CRASH(); // Unreachable.
|
||||
case GC_FORWARDING_STATE_FORWARDED:
|
||||
gc_edge_update(edge, gc_ref(gc_atomic_forward_address(&fwd)));
|
||||
return 1;
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static inline enum copy_space_forward_result
|
||||
copy_space_forward_nonatomic(struct copy_space *space, struct gc_edge edge,
|
||||
struct gc_ref old_ref,
|
||||
struct copy_space_allocator *alloc) {
|
||||
uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref);
|
||||
if (forwarded) {
|
||||
gc_edge_update(edge, gc_ref(forwarded));
|
||||
return COPY_SPACE_FORWARD_UPDATED;
|
||||
} else {
|
||||
size_t size;
|
||||
gc_trace_object(old_ref, NULL, NULL, NULL, &size);
|
||||
struct gc_ref new_ref = copy_space_allocate(alloc, space, size);
|
||||
if (gc_ref_is_null(new_ref))
|
||||
return COPY_SPACE_FORWARD_FAILED;
|
||||
memcpy(gc_ref_heap_object(new_ref), gc_ref_heap_object(old_ref), size);
|
||||
gc_object_forward_nonatomic(old_ref, new_ref);
|
||||
gc_edge_update(edge, new_ref);
|
||||
return COPY_SPACE_FORWARD_EVACUATED;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_forward_if_traced_nonatomic(struct copy_space *space,
|
||||
struct gc_edge edge,
|
||||
struct gc_ref old_ref) {
|
||||
uintptr_t forwarded = gc_object_forwarded_nonatomic(old_ref);
|
||||
if (forwarded) {
|
||||
gc_edge_update(edge, gc_ref(forwarded));
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline enum copy_space_forward_result
|
||||
copy_space_forward(struct copy_space *src_space, struct copy_space *dst_space,
|
||||
struct gc_edge edge,
|
||||
struct gc_ref old_ref,
|
||||
struct copy_space_allocator *dst_alloc) {
|
||||
GC_ASSERT(copy_space_contains(src_space, old_ref));
|
||||
GC_ASSERT(src_space != dst_space
|
||||
|| copy_space_object_region(old_ref) != src_space->active_region);
|
||||
if (GC_PARALLEL && src_space->atomic_forward)
|
||||
return copy_space_forward_atomic(dst_space, edge, old_ref, dst_alloc);
|
||||
return copy_space_forward_nonatomic(dst_space, edge, old_ref, dst_alloc);
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_forward_if_traced(struct copy_space *space, struct gc_edge edge,
|
||||
struct gc_ref old_ref) {
|
||||
GC_ASSERT(copy_space_contains(space, old_ref));
|
||||
GC_ASSERT(copy_space_object_region(old_ref) != space->active_region);
|
||||
if (GC_PARALLEL && space->atomic_forward)
|
||||
return copy_space_forward_if_traced_atomic(space, edge, old_ref);
|
||||
return copy_space_forward_if_traced_nonatomic(space, edge, old_ref);
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_is_aligned(struct copy_space *space) {
|
||||
return space->flags & COPY_SPACE_ALIGNED;
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_fixed_size(struct copy_space *space) {
|
||||
// If the extent is aligned, it is fixed.
|
||||
return copy_space_is_aligned(space);
|
||||
}
|
||||
|
||||
static inline uintptr_t
|
||||
copy_space_low_aligned_address(struct copy_space *space) {
|
||||
GC_ASSERT(copy_space_is_aligned(space));
|
||||
GC_ASSERT_EQ(space->extents->size, 1);
|
||||
return space->extents->ranges[0].lo_addr;
|
||||
}
|
||||
|
||||
static inline uintptr_t
|
||||
copy_space_high_aligned_address(struct copy_space *space) {
|
||||
GC_ASSERT(copy_space_is_aligned(space));
|
||||
GC_ASSERT_EQ(space->extents->size, 1);
|
||||
return space->extents->ranges[0].hi_addr;
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_contains_address_aligned(struct copy_space *space, uintptr_t addr) {
|
||||
uintptr_t low_addr = copy_space_low_aligned_address(space);
|
||||
uintptr_t high_addr = copy_space_high_aligned_address(space);
|
||||
uintptr_t size = high_addr - low_addr;
|
||||
return (addr - low_addr) < size;
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_contains_edge_aligned(struct copy_space *space,
|
||||
struct gc_edge edge) {
|
||||
return copy_space_contains_address_aligned(space, gc_edge_address(edge));
|
||||
}
|
||||
|
||||
static inline int
|
||||
copy_space_should_promote(struct copy_space *space, struct gc_ref ref) {
|
||||
GC_ASSERT(copy_space_contains(space, ref));
|
||||
uintptr_t addr = gc_ref_value(ref);
|
||||
struct copy_space_block *block = copy_space_block_for_addr(gc_ref_value(ref));
|
||||
GC_ASSERT_EQ(copy_space_object_region(ref), space->active_region ^ 1);
|
||||
return block->is_survivor[space->active_region ^ 1];
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_contains_edge(struct copy_space *space, struct gc_edge edge) {
|
||||
return copy_space_contains_address(space, gc_edge_address(edge));
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_remember_edge(struct copy_space *space, struct gc_edge edge) {
|
||||
GC_ASSERT(copy_space_contains_edge(space, edge));
|
||||
uint8_t* loc = copy_space_field_logged_byte(edge);
|
||||
uint8_t bit = copy_space_field_logged_bit(edge);
|
||||
uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
|
||||
do {
|
||||
if (byte & bit) return 0;
|
||||
} while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte|bit,
|
||||
memory_order_acq_rel,
|
||||
memory_order_acquire));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_forget_edge(struct copy_space *space, struct gc_edge edge) {
|
||||
GC_ASSERT(copy_space_contains_edge(space, edge));
|
||||
uint8_t* loc = copy_space_field_logged_byte(edge);
|
||||
uint8_t bit = copy_space_field_logged_bit(edge);
|
||||
uint8_t byte = atomic_load_explicit(loc, memory_order_acquire);
|
||||
do {
|
||||
if (!(byte & bit)) return 0;
|
||||
} while (!atomic_compare_exchange_weak_explicit(loc, &byte, byte&~bit,
|
||||
memory_order_acq_rel,
|
||||
memory_order_acquire));
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t copy_space_is_power_of_two(size_t n) {
|
||||
GC_ASSERT(n != 0);
|
||||
return (n & (n - 1)) == 0;
|
||||
}
|
||||
|
||||
static size_t copy_space_round_up_power_of_two(size_t n) {
|
||||
if (copy_space_is_power_of_two(n))
|
||||
return n;
|
||||
|
||||
return 1ULL << (sizeof(size_t) * 8 - __builtin_clzll(n));
|
||||
}
|
||||
|
||||
static struct copy_space_slab*
|
||||
copy_space_allocate_slabs(size_t nslabs, uint32_t flags) {
|
||||
size_t size = nslabs * COPY_SPACE_SLAB_SIZE;
|
||||
size_t alignment = COPY_SPACE_SLAB_SIZE;
|
||||
if (flags & COPY_SPACE_ALIGNED) {
|
||||
GC_ASSERT(copy_space_is_power_of_two(size));
|
||||
alignment = size;
|
||||
}
|
||||
return gc_platform_acquire_memory(size, alignment);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_add_slabs(struct copy_space *space, struct copy_space_slab *slabs,
|
||||
size_t nslabs) {
|
||||
size_t old_size = space->nslabs * sizeof(struct copy_space_slab*);
|
||||
size_t additional_size = nslabs * sizeof(struct copy_space_slab*);
|
||||
space->extents = extents_adjoin(space->extents, slabs,
|
||||
nslabs * sizeof(struct copy_space_slab));
|
||||
space->slabs = realloc(space->slabs, old_size + additional_size);
|
||||
if (!space->slabs)
|
||||
GC_CRASH();
|
||||
while (nslabs--)
|
||||
space->slabs[space->nslabs++] = slabs++;
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_shrink(struct copy_space *space, size_t bytes) {
|
||||
ssize_t pending = copy_space_request_release_memory(space, bytes);
|
||||
copy_space_page_out_blocks_until_memory_released(space);
|
||||
|
||||
// It still may be the case we need to page out more blocks. Only collection
|
||||
// can help us then!
|
||||
}
|
||||
|
||||
static size_t
|
||||
copy_space_first_payload_block(struct copy_space *space) {
|
||||
return copy_space_field_logging_blocks(space);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_expand(struct copy_space *space, size_t bytes) {
|
||||
GC_ASSERT(!copy_space_fixed_size(space));
|
||||
ssize_t to_acquire = -copy_space_maybe_reacquire_memory(space, bytes);
|
||||
if (to_acquire <= 0) return;
|
||||
size_t reserved = align_up(to_acquire, COPY_SPACE_SLAB_SIZE);
|
||||
size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE;
|
||||
struct copy_space_slab *slabs =
|
||||
copy_space_allocate_slabs(nslabs, space->flags);
|
||||
copy_space_add_slabs(space, slabs, nslabs);
|
||||
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
for (size_t slab = 0; slab < nslabs; slab++) {
|
||||
for (size_t idx = copy_space_first_payload_block(space);
|
||||
idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB;
|
||||
idx++) {
|
||||
struct copy_space_block *block = &slabs[slab].headers[idx];
|
||||
block->all_zeroes[0] = block->all_zeroes[1] = 1;
|
||||
block->in_core = 0;
|
||||
copy_space_page_out_block(space, block, &lock);
|
||||
reserved -= COPY_SPACE_BLOCK_SIZE;
|
||||
}
|
||||
}
|
||||
gc_lock_release(&lock);
|
||||
copy_space_reacquire_memory(space, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_advance_page_out_queue(void *data) {
|
||||
struct copy_space *space = data;
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
for (int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 3; age >= 0; age--) {
|
||||
while (1) {
|
||||
struct copy_space_block *block =
|
||||
copy_space_block_stack_pop(&space->paged_out[age], &lock);
|
||||
if (!block) break;
|
||||
copy_space_block_stack_push(&space->paged_out[age + 1], block, &lock);
|
||||
}
|
||||
}
|
||||
gc_lock_release(&lock);
|
||||
}
|
||||
|
||||
static void
|
||||
copy_space_page_out_blocks(void *data) {
|
||||
struct copy_space *space = data;
|
||||
int age = COPY_SPACE_PAGE_OUT_QUEUE_SIZE - 2;
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
while (1) {
|
||||
struct copy_space_block *block =
|
||||
copy_space_block_stack_pop(&space->paged_out[age], &lock);
|
||||
if (!block) break;
|
||||
block->in_core = 0;
|
||||
block->all_zeroes[0] = block->all_zeroes[1] = 1;
|
||||
gc_platform_discard_memory(copy_space_block_payload(block),
|
||||
COPY_SPACE_BLOCK_SIZE);
|
||||
copy_space_clear_field_logged_bits_for_block(space, block);
|
||||
copy_space_block_stack_push(&space->paged_out[age + 1], block, &lock);
|
||||
}
|
||||
gc_lock_release(&lock);
|
||||
}
|
||||
|
||||
static int
|
||||
copy_space_init(struct copy_space *space, size_t size, uint32_t flags,
|
||||
struct gc_background_thread *thread) {
|
||||
size = align_up(size, COPY_SPACE_BLOCK_SIZE);
|
||||
size_t reserved = align_up(size, COPY_SPACE_SLAB_SIZE);
|
||||
if (flags & COPY_SPACE_ALIGNED)
|
||||
reserved = copy_space_round_up_power_of_two(reserved);
|
||||
size_t nslabs = reserved / COPY_SPACE_SLAB_SIZE;
|
||||
struct copy_space_slab *slabs = copy_space_allocate_slabs(nslabs, flags);
|
||||
if (!slabs)
|
||||
return 0;
|
||||
|
||||
pthread_mutex_init(&space->lock, NULL);
|
||||
space->empty.list.head = NULL;
|
||||
space->partly_full.list.head = NULL;
|
||||
space->full.head = NULL;
|
||||
for (int age = 0; age < COPY_SPACE_PAGE_OUT_QUEUE_SIZE; age++)
|
||||
space->paged_out[age].list.head = NULL;
|
||||
space->allocated_bytes = 0;
|
||||
space->fragmentation = 0;
|
||||
space->bytes_to_page_out = 0;
|
||||
space->active_region = 0;
|
||||
space->atomic_forward = flags & COPY_SPACE_ATOMIC_FORWARDING;
|
||||
space->flags = flags;
|
||||
space->allocated_bytes_at_last_gc = 0;
|
||||
space->fragmentation_at_last_gc = 0;
|
||||
space->extents = extents_allocate((flags & COPY_SPACE_ALIGNED) ? 1 : 10);
|
||||
copy_space_add_slabs(space, slabs, nslabs);
|
||||
struct gc_lock lock = copy_space_lock(space);
|
||||
for (size_t slab = 0; slab < nslabs; slab++) {
|
||||
for (size_t idx = copy_space_first_payload_block(space);
|
||||
idx < COPY_SPACE_NONHEADER_BLOCKS_PER_SLAB;
|
||||
idx++) {
|
||||
struct copy_space_block *block = &slabs[slab].headers[idx];
|
||||
block->all_zeroes[0] = block->all_zeroes[1] = 1;
|
||||
block->in_core = 0;
|
||||
block->is_survivor[0] = block->is_survivor[1] = 0;
|
||||
if (reserved > size) {
|
||||
copy_space_page_out_block(space, block, &lock);
|
||||
reserved -= COPY_SPACE_BLOCK_SIZE;
|
||||
} else {
|
||||
copy_space_push_empty_block(space, block, &lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
gc_lock_release(&lock);
|
||||
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START,
|
||||
copy_space_advance_page_out_queue,
|
||||
space);
|
||||
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_END,
|
||||
copy_space_page_out_blocks,
|
||||
space);
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // COPY_SPACE_H
|
10
libguile/whippet/src/debug.h
Normal file
10
libguile/whippet/src/debug.h
Normal file
|
@ -0,0 +1,10 @@
|
|||
#ifndef DEBUG_H
|
||||
#define DEBUG_H
|
||||
|
||||
#ifndef NDEBUG
|
||||
#define DEBUG(...) fprintf (stderr, "DEBUG: " __VA_ARGS__)
|
||||
#else
|
||||
#define DEBUG(...) do { } while (0)
|
||||
#endif
|
||||
|
||||
#endif // DEBUG_H
|
88
libguile/whippet/src/extents.h
Normal file
88
libguile/whippet/src/extents.h
Normal file
|
@ -0,0 +1,88 @@
|
|||
#ifndef EXTENTS_H
|
||||
#define EXTENTS_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "gc-assert.h"
|
||||
|
||||
struct extent_range {
|
||||
uintptr_t lo_addr;
|
||||
uintptr_t hi_addr;
|
||||
};
|
||||
|
||||
struct extents {
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
struct extent_range ranges[];
|
||||
};
|
||||
|
||||
static inline int
|
||||
extents_contain_addr(struct extents *extents, uintptr_t addr) {
|
||||
size_t lo = 0;
|
||||
size_t hi = extents->size;
|
||||
while (lo != hi) {
|
||||
size_t mid = (lo + hi) / 2;
|
||||
struct extent_range range = extents->ranges[mid];
|
||||
if (addr < range.lo_addr) {
|
||||
hi = mid;
|
||||
} else if (addr < range.hi_addr) {
|
||||
return 1;
|
||||
} else {
|
||||
lo = mid + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct extents*
|
||||
extents_allocate(size_t capacity) {
|
||||
size_t byte_size =
|
||||
sizeof(struct extents) + sizeof(struct extent_range) * capacity;
|
||||
struct extents *ret = malloc(byte_size);
|
||||
if (!ret) __builtin_trap();
|
||||
memset(ret, 0, byte_size);
|
||||
ret->capacity = capacity;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct extents*
|
||||
extents_insert(struct extents *old, size_t idx, struct extent_range range) {
|
||||
if (old->size < old->capacity) {
|
||||
size_t bytes_to_move = sizeof(struct extent_range) * (old->size - idx);
|
||||
memmove(&old->ranges[idx + 1], &old->ranges[idx], bytes_to_move);
|
||||
old->ranges[idx] = range;
|
||||
old->size++;
|
||||
return old;
|
||||
} else {
|
||||
struct extents *new_ = extents_allocate(old->capacity * 2 + 1);
|
||||
memcpy(&new_->ranges[0], &old->ranges[0],
|
||||
sizeof(struct extent_range) * idx);
|
||||
memcpy(&new_->ranges[idx + 1], &old->ranges[idx],
|
||||
sizeof(struct extent_range) * (old->size - idx));
|
||||
new_->ranges[idx] = range;
|
||||
new_->size = old->size + 1;
|
||||
free(old);
|
||||
return new_;
|
||||
}
|
||||
}
|
||||
|
||||
static struct extents*
|
||||
extents_adjoin(struct extents *extents, void *lo_addr, size_t size) {
|
||||
size_t i;
|
||||
struct extent_range range = { (uintptr_t)lo_addr, (uintptr_t)lo_addr + size };
|
||||
for (i = 0; i < extents->size; i++) {
|
||||
if (range.hi_addr < extents->ranges[i].lo_addr) {
|
||||
break;
|
||||
} else if (range.hi_addr == extents->ranges[i].lo_addr) {
|
||||
extents->ranges[i].lo_addr = range.lo_addr;
|
||||
return extents;
|
||||
} else if (range.lo_addr == extents->ranges[i].hi_addr) {
|
||||
extents->ranges[i].hi_addr = range.hi_addr;
|
||||
return extents;
|
||||
}
|
||||
}
|
||||
return extents_insert(extents, i, range);
|
||||
}
|
||||
|
||||
#endif // EXTENTS_H
|
229
libguile/whippet/src/field-set.h
Normal file
229
libguile/whippet/src/field-set.h
Normal file
|
@ -0,0 +1,229 @@
|
|||
#ifndef FIELD_SET_H
|
||||
#define FIELD_SET_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "gc-edge.h"
|
||||
#include "gc-lock.h"
|
||||
#include "tracer.h"
|
||||
|
||||
#define GC_EDGE_BUFFER_CAPACITY 510
|
||||
|
||||
struct gc_edge_buffer {
|
||||
struct gc_edge_buffer *next;
|
||||
size_t size;
|
||||
struct gc_edge edges[GC_EDGE_BUFFER_CAPACITY];
|
||||
};
|
||||
|
||||
// Lock-free.
|
||||
struct gc_edge_buffer_list {
|
||||
struct gc_edge_buffer *head;
|
||||
};
|
||||
|
||||
// With a lock.
|
||||
struct gc_edge_buffer_stack {
|
||||
struct gc_edge_buffer_list list;
|
||||
};
|
||||
|
||||
struct gc_field_set {
|
||||
struct gc_edge_buffer_list full;
|
||||
struct gc_edge_buffer_stack partly_full;
|
||||
struct gc_edge_buffer_list empty;
|
||||
size_t count;
|
||||
pthread_mutex_t lock;
|
||||
};
|
||||
|
||||
struct gc_field_set_writer {
|
||||
struct gc_edge_buffer *buf;
|
||||
struct gc_field_set *set;
|
||||
};
|
||||
|
||||
static void
|
||||
gc_edge_buffer_list_push(struct gc_edge_buffer_list *list,
|
||||
struct gc_edge_buffer *buf) {
|
||||
GC_ASSERT(!buf->next);
|
||||
struct gc_edge_buffer *next =
|
||||
atomic_load_explicit(&list->head, memory_order_relaxed);
|
||||
do {
|
||||
buf->next = next;
|
||||
} while (!atomic_compare_exchange_weak_explicit(&list->head, &next, buf,
|
||||
memory_order_acq_rel,
|
||||
memory_order_acquire));
|
||||
}
|
||||
|
||||
static struct gc_edge_buffer*
|
||||
gc_edge_buffer_list_pop(struct gc_edge_buffer_list *list) {
|
||||
struct gc_edge_buffer *head =
|
||||
atomic_load_explicit(&list->head, memory_order_acquire);
|
||||
struct gc_edge_buffer *next;
|
||||
do {
|
||||
if (!head) return NULL;
|
||||
next = head->next;
|
||||
} while (!atomic_compare_exchange_weak_explicit(&list->head, &head, next,
|
||||
memory_order_acq_rel,
|
||||
memory_order_acquire));
|
||||
head->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_edge_buffer_stack_push(struct gc_edge_buffer_stack *stack,
|
||||
struct gc_edge_buffer *buf,
|
||||
const struct gc_lock *lock) {
|
||||
GC_ASSERT(!buf->next);
|
||||
buf->next = stack->list.head;
|
||||
stack->list.head = buf;
|
||||
}
|
||||
|
||||
static struct gc_edge_buffer*
|
||||
gc_edge_buffer_stack_pop(struct gc_edge_buffer_stack *stack,
|
||||
const struct gc_lock *lock) {
|
||||
struct gc_edge_buffer *head = stack->list.head;
|
||||
if (head) {
|
||||
stack->list.head = head->next;
|
||||
head->next = NULL;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_init(struct gc_field_set *set) {
|
||||
memset(set, 0, sizeof(*set));
|
||||
pthread_mutex_init(&set->lock, NULL);
|
||||
}
|
||||
|
||||
static struct gc_edge_buffer*
|
||||
gc_field_set_acquire_buffer(struct gc_field_set *set) {
|
||||
struct gc_edge_buffer *ret;
|
||||
|
||||
ret = gc_edge_buffer_list_pop(&set->empty);
|
||||
if (ret) return ret;
|
||||
|
||||
struct gc_lock lock = gc_lock_acquire(&set->lock);
|
||||
ret = gc_edge_buffer_stack_pop(&set->partly_full, &lock);
|
||||
gc_lock_release(&lock);
|
||||
if (ret) return ret;
|
||||
|
||||
// atomic inc count
|
||||
ret = malloc(sizeof(*ret));
|
||||
if (!ret) {
|
||||
perror("Failed to allocate remembered set");
|
||||
GC_CRASH();
|
||||
}
|
||||
memset(ret, 0, sizeof(*ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_release_buffer(struct gc_field_set *set,
|
||||
struct gc_edge_buffer *buf) {
|
||||
if (buf->size == GC_EDGE_BUFFER_CAPACITY) {
|
||||
gc_edge_buffer_list_push(&set->full, buf);
|
||||
} else {
|
||||
struct gc_lock lock = gc_lock_acquire(&set->lock);
|
||||
gc_edge_buffer_stack_push(&set->partly_full, buf, &lock);
|
||||
gc_lock_release(&lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_add_roots(struct gc_field_set *set, struct gc_tracer *tracer) {
|
||||
struct gc_edge_buffer *buf;
|
||||
struct gc_lock lock = gc_lock_acquire(&set->lock);
|
||||
while ((buf = gc_edge_buffer_stack_pop(&set->partly_full, &lock)))
|
||||
gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
|
||||
while ((buf = gc_edge_buffer_list_pop(&set->full)))
|
||||
gc_tracer_add_root(tracer, gc_root_edge_buffer(buf));
|
||||
gc_lock_release(&lock);
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_clear(struct gc_field_set *set,
|
||||
void (*forget_edge)(struct gc_edge, struct gc_heap*),
|
||||
struct gc_heap *heap) {
|
||||
struct gc_edge_buffer *partly_full = set->partly_full.list.head;
|
||||
struct gc_edge_buffer *full = set->full.head;
|
||||
// Clear the full and partly full sets now so that if a collector
|
||||
// wanted to it could re-add an edge to the remembered set.
|
||||
set->partly_full.list.head = NULL;
|
||||
set->full.head = NULL;
|
||||
struct gc_edge_buffer *buf, *next;
|
||||
for (buf = partly_full; buf; buf = next) {
|
||||
next = buf->next;
|
||||
buf->next = NULL;
|
||||
if (forget_edge)
|
||||
for (size_t i = 0; i < buf->size; i++)
|
||||
forget_edge(buf->edges[i], heap);
|
||||
buf->size = 0;
|
||||
gc_edge_buffer_list_push(&set->empty, buf);
|
||||
}
|
||||
for (buf = full; buf; buf = next) {
|
||||
next = buf->next;
|
||||
buf->next = NULL;
|
||||
if (forget_edge)
|
||||
for (size_t i = 0; i < buf->size; i++)
|
||||
forget_edge(buf->edges[i], heap);
|
||||
buf->size = 0;
|
||||
gc_edge_buffer_list_push(&set->empty, buf);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_field_set_visit_edge_buffer(struct gc_field_set *set,
|
||||
struct gc_edge_buffer *buf,
|
||||
int (*visit)(struct gc_edge,
|
||||
struct gc_heap*,
|
||||
void *data),
|
||||
struct gc_heap *heap,
|
||||
void *data) GC_ALWAYS_INLINE;
|
||||
static inline void
|
||||
gc_field_set_visit_edge_buffer(struct gc_field_set *set,
|
||||
struct gc_edge_buffer *buf,
|
||||
int (*visit)(struct gc_edge,
|
||||
struct gc_heap*,
|
||||
void *data),
|
||||
struct gc_heap *heap,
|
||||
void *data) {
|
||||
size_t i = 0;
|
||||
while (i < buf->size) {
|
||||
if (visit(buf->edges[i], heap, data))
|
||||
i++;
|
||||
else
|
||||
buf->edges[i] = buf->edges[--buf->size];
|
||||
}
|
||||
gc_field_set_release_buffer(set, buf);
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_writer_release_buffer(struct gc_field_set_writer *writer) {
|
||||
if (writer->buf) {
|
||||
gc_field_set_release_buffer(writer->set, writer->buf);
|
||||
writer->buf = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_writer_init(struct gc_field_set_writer *writer,
|
||||
struct gc_field_set *set) {
|
||||
writer->set = set;
|
||||
writer->buf = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_field_set_writer_add_edge(struct gc_field_set_writer *writer,
|
||||
struct gc_edge edge) {
|
||||
struct gc_edge_buffer *buf = writer->buf;
|
||||
if (GC_UNLIKELY(!buf))
|
||||
writer->buf = buf = gc_field_set_acquire_buffer(writer->set);
|
||||
GC_ASSERT(buf->size < GC_EDGE_BUFFER_CAPACITY);
|
||||
buf->edges[buf->size++] = edge;
|
||||
if (GC_UNLIKELY(buf->size == GC_EDGE_BUFFER_CAPACITY)) {
|
||||
gc_edge_buffer_list_push(&writer->set->full, buf);
|
||||
writer->buf = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // FIELD_SET_H
|
31
libguile/whippet/src/freelist.h
Normal file
31
libguile/whippet/src/freelist.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
#ifndef FREELIST_H
|
||||
#define FREELIST_H
|
||||
|
||||
// A size-segregated freelist with linear-log buckets à la
|
||||
// https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/.
|
||||
|
||||
#include "gc-assert.h"
|
||||
#include "gc-histogram.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define DEFINE_FREELIST(name, max_value_bits, precision, node) \
|
||||
struct name { node buckets[((max_value_bits) << (precision)) + 1]; }; \
|
||||
static inline size_t name##_num_size_classes(void) { \
|
||||
return ((max_value_bits) << (precision)) + 1; \
|
||||
} \
|
||||
static inline uint64_t name##_bucket_min_val(size_t idx) { \
|
||||
GC_ASSERT(idx < name##_num_size_classes()); \
|
||||
return gc_histogram_bucket_min_val((precision), idx); \
|
||||
} \
|
||||
static inline void name##_init(struct name *f) { \
|
||||
memset(f, 0, sizeof(*f)); \
|
||||
} \
|
||||
static inline size_t name##_size_class(uint64_t val) { \
|
||||
return gc_histogram_bucket((max_value_bits), (precision), val); \
|
||||
} \
|
||||
static inline node* name##_bucket(struct name *f, uint64_t val) { \
|
||||
return &f->buckets[name##_size_class(val)]; \
|
||||
}
|
||||
|
||||
#endif // FREELIST_H
|
22
libguile/whippet/src/gc-align.h
Normal file
22
libguile/whippet/src/gc-align.h
Normal file
|
@ -0,0 +1,22 @@
|
|||
#ifndef GC_ALIGN_H
|
||||
#define GC_ALIGN_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
static inline uintptr_t align_down(uintptr_t addr, size_t align) {
|
||||
return addr & ~(align - 1);
|
||||
}
|
||||
static inline uintptr_t align_up(uintptr_t addr, size_t align) {
|
||||
return align_down(addr + align - 1, align);
|
||||
}
|
||||
|
||||
// Poor man's equivalent of std::hardware_destructive_interference_size.
|
||||
#define AVOID_FALSE_SHARING 128
|
||||
#define ALIGNED_TO_AVOID_FALSE_SHARING \
|
||||
__attribute__((aligned(AVOID_FALSE_SHARING)))
|
||||
|
||||
#endif // GC_ALIGN_H
|
55
libguile/whippet/src/gc-ephemeron-internal.h
Normal file
55
libguile/whippet/src/gc-ephemeron-internal.h
Normal file
|
@ -0,0 +1,55 @@
|
|||
#ifndef GC_EPHEMERON_INTERNAL_H
|
||||
#define GC_EPHEMERON_INTERNAL_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include "gc-ephemeron.h"
|
||||
|
||||
struct gc_pending_ephemerons;
|
||||
|
||||
// API implemented by collector, for use by ephemerons:
|
||||
GC_INTERNAL int gc_visit_ephemeron_key(struct gc_edge edge,
|
||||
struct gc_heap *heap);
|
||||
GC_INTERNAL struct gc_pending_ephemerons*
|
||||
gc_heap_pending_ephemerons(struct gc_heap *heap);
|
||||
GC_INTERNAL unsigned gc_heap_ephemeron_trace_epoch(struct gc_heap *heap);
|
||||
|
||||
// API implemented by ephemerons, for use by collector:
|
||||
GC_INTERNAL struct gc_edge gc_ephemeron_key_edge(struct gc_ephemeron *eph);
|
||||
GC_INTERNAL struct gc_edge gc_ephemeron_value_edge(struct gc_ephemeron *eph);
|
||||
|
||||
GC_INTERNAL struct gc_pending_ephemerons*
|
||||
gc_prepare_pending_ephemerons(struct gc_pending_ephemerons *state,
|
||||
size_t target_size, double slop);
|
||||
|
||||
GC_INTERNAL void
|
||||
gc_resolve_pending_ephemerons(struct gc_ref obj, struct gc_heap *heap);
|
||||
|
||||
GC_INTERNAL void
|
||||
gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state,
|
||||
struct gc_heap *heap, size_t shard,
|
||||
size_t nshards);
|
||||
|
||||
GC_INTERNAL struct gc_ephemeron*
|
||||
gc_pop_resolved_ephemerons(struct gc_heap *heap);
|
||||
|
||||
GC_INTERNAL void
|
||||
gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data);
|
||||
|
||||
GC_INTERNAL void
|
||||
gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state,
|
||||
size_t shard, size_t nshards);
|
||||
|
||||
GC_INTERNAL void gc_ephemeron_init_internal(struct gc_heap *heap,
|
||||
struct gc_ephemeron *ephemeron,
|
||||
struct gc_ref key,
|
||||
struct gc_ref value);
|
||||
|
||||
#endif // GC_EPHEMERON_INTERNAL_H
|
583
libguile/whippet/src/gc-ephemeron.c
Normal file
583
libguile/whippet/src/gc-ephemeron.c
Normal file
|
@ -0,0 +1,583 @@
|
|||
#include <math.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define GC_IMPL 1
|
||||
|
||||
#include "address-hash.h"
|
||||
#include "debug.h"
|
||||
#include "gc-embedder-api.h"
|
||||
#include "gc-ephemeron-internal.h"
|
||||
|
||||
// # Overview
|
||||
//
|
||||
// An ephemeron is a conjunction consisting of the ephemeron object
|
||||
// itself, a "key" object, and a "value" object. If the ephemeron and
|
||||
// the key are live, then the value is kept live and can be looked up
|
||||
// given the ephemeron object.
|
||||
//
|
||||
// Sometimes we write this as E×K⇒V, indicating that you need both E and
|
||||
// K to get V. We'll use this notation in these comments sometimes.
|
||||
//
|
||||
// The key and the value of an ephemeron are never modified, except
|
||||
// possibly via forwarding during GC.
|
||||
//
|
||||
// If the key of an ephemeron ever becomes unreachable, the ephemeron
|
||||
// object will be marked as dead by the collector, and neither key nor
|
||||
// value will be accessible. Users can also explicitly mark an
|
||||
// ephemeron as dead.
|
||||
//
|
||||
// Users can build collections of ephemerons by chaining them together.
|
||||
// If an ephemeron ever becomes dead, the ephemeron will be removed from
|
||||
// the chain by the garbage collector.
|
||||
//
|
||||
// # Tracing algorithm
|
||||
//
|
||||
// Tracing ephemerons is somewhat complicated. Tracing the live objects
|
||||
// in a heap is usually a parallelizable fan-out kind of operation,
|
||||
// requiring minimal synchronization between tracing worker threads.
|
||||
// However with ephemerons, each worker thread may need to check if
|
||||
// there is a pending ephemeron E for an object K, marking the
|
||||
// associated V for later traversal by the tracer. Doing this without
|
||||
// introducing excessive global serialization points is the motivation
|
||||
// for the complications that follow.
|
||||
//
|
||||
// From the viewpoint of the garbage collector, an ephemeron E×K⇒V has 4
|
||||
// possible states:
|
||||
//
|
||||
// - Traced: An E that was already fully traced as of a given GC epoch.
|
||||
//
|
||||
// - Claimed: GC discovers E for the first time in a GC epoch
|
||||
//
|
||||
// - Pending: K's liveness is unknown
|
||||
//
|
||||
// - Resolved: K is live; V needs tracing
|
||||
//
|
||||
// The ephemeron state is kept in an atomic variable. The pending and
|
||||
// resolved states also have associated atomic list link fields as well;
|
||||
// it doesn't appear possible to coalesce them into a single field
|
||||
// without introducing serialization. Finally, there is a bit to
|
||||
// indicate whether a "traced" ephemeron is live or dead, and a field to
|
||||
// indicate the epoch at which it was last traced.
|
||||
//
|
||||
// Here is a diagram of the state transitions:
|
||||
//
|
||||
// ,----->Traced<-----.
|
||||
// , | | .
|
||||
// , v / .
|
||||
// | Claimed |
|
||||
// | ,-----/ \---. |
|
||||
// | v v |
|
||||
// Pending--------->Resolved
|
||||
//
|
||||
// Ephemerons are born in the traced state, for the current GC epoch.
|
||||
//
|
||||
// When the tracer sees an ephemeron E in the traced state it checks the
|
||||
// epoch. If the epoch is up to date, E stays in the traced state and
|
||||
// we are done.
|
||||
//
|
||||
// Otherwise, E transitions from traced to claimed. The thread that
|
||||
// claims E is then responsible for resetting E's pending and resolved
|
||||
// links, updating E's epoch, and tracing E's user-controlled chain
|
||||
// link.
|
||||
//
|
||||
// If the claiming thread sees that E was already marked dead by a
|
||||
// previous GC, or explicitly by the user, the ephemeron then
|
||||
// transitions from back to traced, ready for the next epoch.
|
||||
//
|
||||
// If the claiming thread sees K to already be known to be live, then E
|
||||
// is added to the global resolved set and E's state becomes resolved.
|
||||
//
|
||||
// Otherwise the claiming thread publishes K⇒E to the global pending
|
||||
// ephemeron table, via the pending link, and E transitions to pending.
|
||||
//
|
||||
// A pending ephemeron is a link in a buckets-of-chains concurrent hash
|
||||
// table. If its K is ever determined to be live, it becomes resolved,
|
||||
// and is added to a global set of resolved ephemerons. At the end of
|
||||
// GC, any ephemerons still pending are marked dead, transitioning their
|
||||
// states to traced.
|
||||
//
|
||||
// Note that the claiming thread -- the one that publishes K⇒E to the
|
||||
// global pending ephemeron table -- needs to re-check that K is still
|
||||
// untraced after adding K⇒E to the pending table, and move to resolved
|
||||
// if so.
|
||||
//
|
||||
// A resolved ephemeron needs its V to be traced. Incidentally its K
|
||||
// also needs tracing, to relocate any forwarding pointer. The thread
|
||||
// that pops an ephemeron from the resolved set is responsible for
|
||||
// tracing and for moving E's state to traced.
|
||||
//
|
||||
// # Concurrency
|
||||
//
|
||||
// All operations on ephemerons are wait-free. Sometimes only one
|
||||
// thread can make progress (for example for an ephemeron in the claimed
|
||||
// state), but no thread will be stalled waiting on other threads to
|
||||
// proceed.
|
||||
//
|
||||
// There is one interesting (from a concurrency point of view) data
|
||||
// structure used by the implementation of ephemerons, the singly-linked
|
||||
// list. Actually there are three of these; one is used as a stack and
|
||||
// the other two is used as sets.
|
||||
//
|
||||
// The resolved set is implemented via a global `struct gc_ephemeron
|
||||
// *resolved` variable. Resolving an ephemeron does an atomic push to
|
||||
// this stack, via compare-and-swap (CAS); popping from the stack (also
|
||||
// via CAS) yields an ephemeron for tracing. Ephemerons are added to
|
||||
// the resolved set at most once per GC cycle, and the resolved set is
|
||||
// empty outside of GC.
|
||||
//
|
||||
// The operations that are supported on atomic stacks are:
|
||||
//
|
||||
// push(LOC, E, OFFSET) -> void
|
||||
//
|
||||
// The user-visible chain link and the link for the pending ephemeron
|
||||
// table are used to build atomic sets. In these you can add an
|
||||
// ephemeron to the beginning of the list, traverse the list link by
|
||||
// link to the end (indicated by NULL), and remove any list item.
|
||||
// Removing a list node proceeds in two phases: one, you mark the node
|
||||
// for removal, by changing the ephemeron's state; then, possibly on a
|
||||
// subsequent traversal, any predecessor may forward its link past
|
||||
// removed nodes. Because node values never change and nodes only go
|
||||
// from live to dead, the live list tail can always be reached by any
|
||||
// node, even from dead nodes.
|
||||
//
|
||||
// The operations that are supported on these atomic lists:
|
||||
//
|
||||
// push(LOC, E, OFFSET) -> void
|
||||
// pop(LOC, OFFSET) -> ephemeron or null
|
||||
// follow(LOC, OFFSET, STATE_OFFSET, LIVE_STATE) -> ephemeron or null
|
||||
//
|
||||
// These operations are all wait-free. The "push" operation is shared
|
||||
// between stack and set use cases. "pop" is for stack-like use cases.
|
||||
// The "follow" operation traverses a list, opportunistically eliding
|
||||
// nodes that have been marked dead, atomically updating the location
|
||||
// storing the next item.
|
||||
//
|
||||
// There are also accessors on ephemerons to their fields:
|
||||
//
|
||||
// key(E) -> value or null
|
||||
// value(E) -> value or null
|
||||
//
|
||||
// These operations retrieve the key and value, respectively, provided
|
||||
// that the ephemeron is not marked dead.
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Concurrent operations on ephemeron lists
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static void
|
||||
ephemeron_list_push(struct gc_ephemeron **loc,
|
||||
struct gc_ephemeron *head,
|
||||
struct gc_ephemeron** (*get_next)(struct gc_ephemeron*)) {
|
||||
struct gc_ephemeron *tail = atomic_load_explicit(loc, memory_order_acquire);
|
||||
while (1) {
|
||||
// There must be no concurrent readers of HEAD, a precondition that
|
||||
// we ensure by only publishing HEAD to LOC at most once per cycle.
|
||||
// Therefore we can use a normal store for the tail pointer.
|
||||
*get_next(head) = tail;
|
||||
if (atomic_compare_exchange_weak(loc, &tail, head))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct gc_ephemeron*
|
||||
ephemeron_list_pop(struct gc_ephemeron **loc,
|
||||
struct gc_ephemeron** (*get_next)(struct gc_ephemeron*)) {
|
||||
struct gc_ephemeron *head = atomic_load_explicit(loc, memory_order_acquire);
|
||||
while (head) {
|
||||
// Precondition: the result of get_next on an ephemeron is never
|
||||
// updated concurrently; OK to load non-atomically.
|
||||
struct gc_ephemeron *tail = *get_next(head);
|
||||
if (atomic_compare_exchange_weak(loc, &head, tail))
|
||||
break;
|
||||
}
|
||||
return head;
|
||||
}
|
||||
|
||||
static struct gc_ephemeron*
|
||||
ephemeron_list_follow(struct gc_ephemeron **loc,
|
||||
struct gc_ephemeron** (*get_next)(struct gc_ephemeron*),
|
||||
int (*is_live)(struct gc_ephemeron*)) {
|
||||
struct gc_ephemeron *head = atomic_load_explicit(loc, memory_order_acquire);
|
||||
if (!head) return NULL;
|
||||
|
||||
while (1) {
|
||||
struct gc_ephemeron *new_head = head;
|
||||
|
||||
// Skip past any dead nodes.
|
||||
while (new_head && !is_live(new_head))
|
||||
new_head = atomic_load_explicit(get_next(new_head), memory_order_acquire);
|
||||
|
||||
if (// If we didn't have to advance past any dead nodes, no need to
|
||||
// update LOC.
|
||||
(head == new_head)
|
||||
// Otherwise if we succeed in updating LOC, we're done.
|
||||
|| atomic_compare_exchange_strong(loc, &head, new_head)
|
||||
// Someone else managed to advance LOC; that's fine too.
|
||||
|| (head == new_head))
|
||||
return new_head;
|
||||
|
||||
// Otherwise we lost a race; loop and retry.
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// The ephemeron object type
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef GC_EMBEDDER_EPHEMERON_HEADER
|
||||
#error Embedder should define GC_EMBEDDER_EPHEMERON_HEADER
|
||||
#endif
|
||||
|
||||
enum {
|
||||
EPHEMERON_STATE_TRACED,
|
||||
EPHEMERON_STATE_CLAIMED,
|
||||
EPHEMERON_STATE_PENDING,
|
||||
EPHEMERON_STATE_RESOLVED,
|
||||
};
|
||||
|
||||
struct gc_ephemeron {
|
||||
GC_EMBEDDER_EPHEMERON_HEADER
|
||||
uint8_t state;
|
||||
unsigned epoch;
|
||||
struct gc_ephemeron *chain;
|
||||
struct gc_ephemeron *pending;
|
||||
struct gc_ephemeron *resolved;
|
||||
struct gc_ref key;
|
||||
struct gc_ref value;
|
||||
};
|
||||
|
||||
size_t gc_ephemeron_size(void) { return sizeof(struct gc_ephemeron); }
|
||||
|
||||
struct gc_edge gc_ephemeron_key_edge(struct gc_ephemeron *e) {
|
||||
return gc_edge(&e->key);
|
||||
}
|
||||
struct gc_edge gc_ephemeron_value_edge(struct gc_ephemeron *e) {
|
||||
return gc_edge(&e->value);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Operations on the user-controlled chain field
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static struct gc_ephemeron** ephemeron_chain(struct gc_ephemeron *e) {
|
||||
return &e->chain;
|
||||
}
|
||||
static int ephemeron_is_dead(struct gc_ephemeron *e) {
|
||||
return !atomic_load_explicit(&e->key.value, memory_order_acquire);
|
||||
}
|
||||
static int ephemeron_is_not_dead(struct gc_ephemeron *e) {
|
||||
return !ephemeron_is_dead(e);
|
||||
}
|
||||
|
||||
void gc_ephemeron_chain_push(struct gc_ephemeron **loc,
|
||||
struct gc_ephemeron *e) {
|
||||
ephemeron_list_push(loc, e, ephemeron_chain);
|
||||
}
|
||||
static struct gc_ephemeron* follow_chain(struct gc_ephemeron **loc) {
|
||||
return ephemeron_list_follow(loc, ephemeron_chain, ephemeron_is_not_dead);
|
||||
}
|
||||
struct gc_ephemeron* gc_ephemeron_chain_head(struct gc_ephemeron **loc) {
|
||||
return follow_chain(loc);
|
||||
}
|
||||
struct gc_ephemeron* gc_ephemeron_chain_next(struct gc_ephemeron *e) {
|
||||
return follow_chain(ephemeron_chain(e));
|
||||
}
|
||||
void gc_ephemeron_mark_dead(struct gc_ephemeron *e) {
|
||||
atomic_store_explicit(&e->key.value, 0, memory_order_release);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Operations on the GC-managed pending link
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static struct gc_ephemeron** ephemeron_pending(struct gc_ephemeron *e) {
|
||||
return &e->pending;
|
||||
}
|
||||
static uint8_t ephemeron_state(struct gc_ephemeron *e) {
|
||||
return atomic_load_explicit(&e->state, memory_order_acquire);
|
||||
}
|
||||
static int ephemeron_is_pending(struct gc_ephemeron *e) {
|
||||
return ephemeron_state(e) == EPHEMERON_STATE_PENDING;
|
||||
}
|
||||
|
||||
static void push_pending(struct gc_ephemeron **loc, struct gc_ephemeron *e) {
|
||||
ephemeron_list_push(loc, e, ephemeron_pending);
|
||||
}
|
||||
static struct gc_ephemeron* follow_pending(struct gc_ephemeron **loc) {
|
||||
return ephemeron_list_follow(loc, ephemeron_pending, ephemeron_is_pending);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Operations on the GC-managed resolved link
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static struct gc_ephemeron** ephemeron_resolved(struct gc_ephemeron *e) {
|
||||
return &e->resolved;
|
||||
}
|
||||
static void push_resolved(struct gc_ephemeron **loc, struct gc_ephemeron *e) {
|
||||
ephemeron_list_push(loc, e, ephemeron_resolved);
|
||||
}
|
||||
static struct gc_ephemeron* pop_resolved(struct gc_ephemeron **loc) {
|
||||
return ephemeron_list_pop(loc, ephemeron_resolved);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Access to the association
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct gc_ref gc_ephemeron_key(struct gc_ephemeron *e) {
|
||||
return gc_ref(atomic_load_explicit(&e->key.value, memory_order_acquire));
|
||||
}
|
||||
|
||||
struct gc_ref gc_ephemeron_value(struct gc_ephemeron *e) {
|
||||
return ephemeron_is_dead(e) ? gc_ref_null() : e->value;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Tracing ephemerons
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct gc_pending_ephemerons {
|
||||
struct gc_ephemeron* resolved;
|
||||
size_t nbuckets;
|
||||
double scale;
|
||||
struct gc_ephemeron* buckets[0];
|
||||
};
|
||||
|
||||
static const size_t MIN_PENDING_EPHEMERONS_SIZE = 32;
|
||||
|
||||
static size_t pending_ephemerons_byte_size(size_t nbuckets) {
|
||||
return sizeof(struct gc_pending_ephemerons) +
|
||||
sizeof(struct gc_ephemeron*) * nbuckets;
|
||||
}
|
||||
|
||||
static struct gc_pending_ephemerons*
|
||||
gc_make_pending_ephemerons(size_t byte_size) {
|
||||
size_t nbuckets = byte_size / sizeof(struct gc_ephemeron*);
|
||||
if (nbuckets < MIN_PENDING_EPHEMERONS_SIZE)
|
||||
nbuckets = MIN_PENDING_EPHEMERONS_SIZE;
|
||||
|
||||
struct gc_pending_ephemerons *ret =
|
||||
malloc(pending_ephemerons_byte_size(nbuckets));
|
||||
if (!ret)
|
||||
return NULL;
|
||||
|
||||
ret->resolved = NULL;
|
||||
ret->nbuckets = nbuckets;
|
||||
ret->scale = nbuckets / pow(2.0, sizeof(uintptr_t) * 8);
|
||||
for (size_t i = 0; i < nbuckets; i++)
|
||||
ret->buckets[i] = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct gc_pending_ephemerons*
|
||||
gc_prepare_pending_ephemerons(struct gc_pending_ephemerons *state,
|
||||
size_t target_byte_size, double slop) {
|
||||
size_t existing =
|
||||
state ? pending_ephemerons_byte_size(state->nbuckets) : 0;
|
||||
slop += 1.0;
|
||||
if (existing * slop > target_byte_size && existing < target_byte_size * slop)
|
||||
return state;
|
||||
|
||||
struct gc_pending_ephemerons *new_state =
|
||||
gc_make_pending_ephemerons(target_byte_size);
|
||||
|
||||
if (!new_state)
|
||||
return state;
|
||||
|
||||
free(state);
|
||||
return new_state;
|
||||
}
|
||||
|
||||
static struct gc_ephemeron**
|
||||
pending_ephemeron_bucket(struct gc_pending_ephemerons *state,
|
||||
struct gc_ref ref) {
|
||||
uintptr_t hash = hash_address(gc_ref_value(ref));
|
||||
size_t idx = hash * state->scale;
|
||||
GC_ASSERT(idx < state->nbuckets);
|
||||
return &state->buckets[idx];
|
||||
}
|
||||
|
||||
static void
|
||||
add_pending_ephemeron(struct gc_pending_ephemerons *state,
|
||||
struct gc_ephemeron *e) {
|
||||
struct gc_ephemeron **bucket = pending_ephemeron_bucket(state, e->key);
|
||||
atomic_store_explicit(&e->state, EPHEMERON_STATE_PENDING,
|
||||
memory_order_release);
|
||||
push_pending(bucket, e);
|
||||
}
|
||||
|
||||
static void maybe_resolve_ephemeron(struct gc_pending_ephemerons *state,
|
||||
struct gc_ephemeron *e) {
|
||||
uint8_t expected = EPHEMERON_STATE_PENDING;
|
||||
if (atomic_compare_exchange_strong(&e->state, &expected,
|
||||
EPHEMERON_STATE_RESOLVED))
|
||||
push_resolved(&state->resolved, e);
|
||||
}
|
||||
|
||||
// Precondition: OBJ has already been copied to tospace, but OBJ is a
|
||||
// fromspace ref.
|
||||
void gc_resolve_pending_ephemerons(struct gc_ref obj, struct gc_heap *heap) {
|
||||
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
|
||||
struct gc_ephemeron **bucket = pending_ephemeron_bucket(state, obj);
|
||||
for (struct gc_ephemeron *link = follow_pending(bucket);
|
||||
link;
|
||||
link = follow_pending(&link->pending)) {
|
||||
if (gc_ref_value(obj) == gc_ref_value(link->key)) {
|
||||
gc_visit_ephemeron_key(gc_ephemeron_key_edge(link), heap);
|
||||
// PENDING -> RESOLVED, if it was pending.
|
||||
maybe_resolve_ephemeron(state, link);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gc_trace_ephemeron(struct gc_ephemeron *e,
|
||||
void (*visit)(struct gc_edge edge, struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data) {
|
||||
unsigned epoch = gc_heap_ephemeron_trace_epoch(heap);
|
||||
uint8_t expected = EPHEMERON_STATE_TRACED;
|
||||
// TRACED[_] -> CLAIMED[_].
|
||||
if (!atomic_compare_exchange_strong(&e->state, &expected,
|
||||
EPHEMERON_STATE_CLAIMED))
|
||||
return;
|
||||
|
||||
|
||||
if (e->epoch == epoch) {
|
||||
// CLAIMED[epoch] -> TRACED[epoch].
|
||||
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
|
||||
memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
// CLAIMED[!epoch] -> CLAIMED[epoch].
|
||||
e->epoch = epoch;
|
||||
e->pending = NULL;
|
||||
e->resolved = NULL;
|
||||
|
||||
// Trace chain successors, eliding any intermediate dead links. Note
|
||||
// that there is a race between trace-time evacuation of the next link
|
||||
// in the chain and any mutation of that link pointer by the mutator
|
||||
// (which can only be to advance the chain forward past dead links).
|
||||
// Collectors using this API have to eliminate this race, for example
|
||||
// by not evacuating while the mutator is running.
|
||||
follow_chain(&e->chain);
|
||||
visit(gc_edge(&e->chain), heap, trace_data);
|
||||
|
||||
// Similarly there is a race between the mutator marking an ephemeron
|
||||
// as dead and here; the consequence would be that we treat an
|
||||
// ephemeron as live when it's not, but only for this cycle. No big
|
||||
// deal.
|
||||
if (ephemeron_is_dead(e)) {
|
||||
// CLAIMED[epoch] -> TRACED[epoch].
|
||||
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
|
||||
memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
// If K is live, trace V and we are done.
|
||||
if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap)) {
|
||||
visit(gc_ephemeron_value_edge(e), heap, trace_data);
|
||||
// CLAIMED[epoch] -> TRACED[epoch].
|
||||
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
|
||||
memory_order_release);
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise K is not yet traced, so we don't know if it is live.
|
||||
// Publish the ephemeron to a global table.
|
||||
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
|
||||
// CLAIMED[epoch] -> PENDING.
|
||||
add_pending_ephemeron(state, e);
|
||||
|
||||
// Given an ephemeron E×K⇒V, there is a race between marking K and E.
|
||||
// One thread could go to mark E and see that K is unmarked, so we get
|
||||
// here. Meanwhile another thread could go to mark K and not see E in
|
||||
// the global table yet. Therefore after publishing E, we have to
|
||||
// check the mark on K again.
|
||||
if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap))
|
||||
// K visited by another thread while we published E; PENDING ->
|
||||
// RESOLVED, if still PENDING.
|
||||
maybe_resolve_ephemeron(state, e);
|
||||
}
|
||||
|
||||
void
|
||||
gc_scan_pending_ephemerons(struct gc_pending_ephemerons *state,
|
||||
struct gc_heap *heap, size_t shard,
|
||||
size_t nshards) {
|
||||
GC_ASSERT(shard < nshards);
|
||||
size_t start = state->nbuckets * 1.0 * shard / nshards;
|
||||
size_t end = state->nbuckets * 1.0 * (shard + 1) / nshards;
|
||||
for (size_t idx = start; idx < end; idx++) {
|
||||
for (struct gc_ephemeron *e = follow_pending(&state->buckets[idx]);
|
||||
e;
|
||||
e = follow_pending(&e->pending)) {
|
||||
if (gc_visit_ephemeron_key(gc_ephemeron_key_edge(e), heap))
|
||||
// PENDING -> RESOLVED, if PENDING.
|
||||
maybe_resolve_ephemeron(state, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct gc_ephemeron*
|
||||
gc_pop_resolved_ephemerons(struct gc_heap *heap) {
|
||||
struct gc_pending_ephemerons *state = gc_heap_pending_ephemerons(heap);
|
||||
return atomic_exchange(&state->resolved, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
gc_trace_resolved_ephemerons(struct gc_ephemeron *resolved,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data) {
|
||||
for (; resolved; resolved = resolved->resolved) {
|
||||
visit(gc_ephemeron_value_edge(resolved), heap, trace_data);
|
||||
// RESOLVED -> TRACED.
|
||||
atomic_store_explicit(&resolved->state, EPHEMERON_STATE_TRACED,
|
||||
memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gc_sweep_pending_ephemerons(struct gc_pending_ephemerons *state,
|
||||
size_t shard, size_t nshards) {
|
||||
GC_ASSERT(shard < nshards);
|
||||
size_t start = state->nbuckets * 1.0 * shard / nshards;
|
||||
size_t end = state->nbuckets * 1.0 * (shard + 1) / nshards;
|
||||
for (size_t idx = start; idx < end; idx++) {
|
||||
struct gc_ephemeron **bucket = &state->buckets[idx];
|
||||
for (struct gc_ephemeron *e = follow_pending(bucket);
|
||||
e;
|
||||
e = follow_pending(&e->pending)) {
|
||||
// PENDING -> TRACED, but dead.
|
||||
atomic_store_explicit(&e->key.value, 0, memory_order_release);
|
||||
atomic_store_explicit(&e->state, EPHEMERON_STATE_TRACED,
|
||||
memory_order_release);
|
||||
}
|
||||
atomic_store_explicit(bucket, NULL, memory_order_release);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Allocation & initialization
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void gc_ephemeron_init_internal(struct gc_heap *heap,
|
||||
struct gc_ephemeron *ephemeron,
|
||||
struct gc_ref key, struct gc_ref value) {
|
||||
// Caller responsible for any write barrier, though really the
|
||||
// assumption is that the ephemeron is younger than the key and the
|
||||
// value.
|
||||
ephemeron->state = EPHEMERON_STATE_TRACED;
|
||||
ephemeron->epoch = gc_heap_ephemeron_trace_epoch(heap) - 1;
|
||||
ephemeron->chain = NULL;
|
||||
ephemeron->pending = NULL;
|
||||
ephemeron->resolved = NULL;
|
||||
ephemeron->key = key;
|
||||
ephemeron->value = value;
|
||||
}
|
65
libguile/whippet/src/gc-finalizer-internal.h
Normal file
65
libguile/whippet/src/gc-finalizer-internal.h
Normal file
|
@ -0,0 +1,65 @@
|
|||
#ifndef GC_FINALIZER_INTERNAL_H
|
||||
#define GC_FINALIZER_INTERNAL_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include "gc-finalizer.h"
|
||||
#include "root.h"
|
||||
|
||||
struct gc_finalizer_state;
|
||||
|
||||
GC_INTERNAL
|
||||
struct gc_finalizer_state* gc_make_finalizer_state(void);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_finalizer_init_internal(struct gc_finalizer *f,
|
||||
struct gc_ref object,
|
||||
struct gc_ref closure);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_finalizer_attach_internal(struct gc_finalizer_state *state,
|
||||
struct gc_finalizer *f,
|
||||
unsigned priority);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_finalizer_externally_activated(struct gc_finalizer *f);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_finalizer_externally_fired(struct gc_finalizer_state *state,
|
||||
struct gc_finalizer *finalizer);
|
||||
|
||||
GC_INTERNAL
|
||||
struct gc_finalizer* gc_finalizer_state_pop(struct gc_finalizer_state *state);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_finalizer_fire(struct gc_finalizer **fired_list_loc,
|
||||
struct gc_finalizer *finalizer);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_finalizer_state_set_callback(struct gc_finalizer_state *state,
|
||||
gc_finalizer_callback callback);
|
||||
|
||||
GC_INTERNAL
|
||||
size_t gc_visit_finalizer_roots(struct gc_finalizer_state *state,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data);
|
||||
|
||||
GC_INTERNAL
|
||||
size_t gc_resolve_finalizers(struct gc_finalizer_state *state,
|
||||
size_t priority,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data);
|
||||
|
||||
GC_INTERNAL
|
||||
void gc_notify_finalizers(struct gc_finalizer_state *state,
|
||||
struct gc_heap *heap);
|
||||
|
||||
#endif // GC_FINALIZER_INTERNAL_H
|
307
libguile/whippet/src/gc-finalizer.c
Normal file
307
libguile/whippet/src/gc-finalizer.c
Normal file
|
@ -0,0 +1,307 @@
|
|||
#include <math.h>
|
||||
#include <stdatomic.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define GC_IMPL 1
|
||||
|
||||
#include "debug.h"
|
||||
#include "gc-embedder-api.h"
|
||||
#include "gc-ephemeron-internal.h" // for gc_visit_ephemeron_key
|
||||
#include "gc-finalizer-internal.h"
|
||||
|
||||
// # Overview
|
||||
//
|
||||
// See gc-finalizer.h for a overview of finalizers from the user and
|
||||
// embedder point of view.
|
||||
//
|
||||
// ## Tracing
|
||||
//
|
||||
// From the perspecive of the collector implementation, finalizers are
|
||||
// GC-managed objects, allowing their size to be accounted for within
|
||||
// the heap size. They get traced during collection, allowing for
|
||||
// relocation of their object references, and allowing the finalizer
|
||||
// object itself to be evacuated if appropriate.
|
||||
//
|
||||
// The collector holds on to outstanding finalizers in a *finalizer
|
||||
// state*, which holds one *finalizer table* for each priority. We
|
||||
// don't need to look up finalizers by object, so we could just hold
|
||||
// them in a big list, but to facilitate parallelism we slice them
|
||||
// across some number of shards, where the "next" pointer is part of the
|
||||
// finalizer object.
|
||||
//
|
||||
// There are a number of ways you could imagine integrating finalizers
|
||||
// into a system. The way Whippet does it goes like this. See
|
||||
// https://wingolog.org/archives/2022/10/31/ephemerons-and-finalizers
|
||||
// and
|
||||
// https://wingolog.org/archives/2024/07/22/finalizers-guardians-phantom-references-et-cetera
|
||||
// for some further discussion.
|
||||
//
|
||||
// 1. The collector should begin a cycle by adding all shards from all
|
||||
// priorities to the root set. When the embedder comes across a
|
||||
// finalizer (as it will, because we added them to the root set),
|
||||
// it traces it via gc_trace_finalizer(), which will visit the
|
||||
// finalizer's closure and its "next" pointer.
|
||||
//
|
||||
// 2. After the full trace, and then the fix-point on pending
|
||||
// ephemerons, for each priority from 0 upwards:
|
||||
//
|
||||
// i. Visit each finalizable object in the table. If the object
|
||||
// was as-yet unvisited, then it is unreachable and thus
|
||||
// finalizable; the finalizer is added to the global "fired"
|
||||
// list, and changes state from "attached" to "fired".
|
||||
// Otherwise it is re-added to the finalizer table.
|
||||
//
|
||||
// ii. If any finalizer was added to the fired list, then those
|
||||
// objects were also added to the grey worklist; run tracing
|
||||
// again until the grey set is empty, including ephemerons.
|
||||
//
|
||||
// 3. Finally, call the finalizer callback if the list of fired finalizers is
|
||||
// nonempty.
|
||||
//
|
||||
// ## Concurrency
|
||||
//
|
||||
// The finalizer table is wait-free. It keeps a count of active finalizers, and
|
||||
// chooses a bucket based on the count modulo the number of buckets. Adding a
|
||||
// finalizer to the table is an atomic push on a linked list. The table is
|
||||
// completely rebuilt during the GC pause, redistributing survivor entries
|
||||
// across the buckets, and pushing all finalizable entries onto the single
|
||||
// "fired" linked list.
|
||||
//
|
||||
// The fired list is also wait-free. As noted above, it is built
|
||||
// during the pause, and mutators pop items off of it atomically.
|
||||
//
|
||||
// ## Generations
|
||||
//
|
||||
// It would be ideal if a young generation had its own finalizer table.
|
||||
// Promoting an object would require promoting its finalizer to the old
|
||||
// finalizer table. Not yet implemented (but would be nice).
|
||||
|
||||
#ifndef GC_EMBEDDER_FINALIZER_HEADER
|
||||
#error Embedder should define GC_EMBEDDER_FINALIZER_HEADER
|
||||
#endif
|
||||
|
||||
enum finalizer_state {
|
||||
FINALIZER_STATE_INIT = 0, // Finalizer is newborn.
|
||||
FINALIZER_STATE_ACTIVE, // Finalizer is ours and in the finalizer table.
|
||||
FINALIZER_STATE_FIRED, // Finalizer is handed back to mutator.
|
||||
};
|
||||
|
||||
struct gc_finalizer {
|
||||
GC_EMBEDDER_FINALIZER_HEADER
|
||||
enum finalizer_state state;
|
||||
struct gc_ref object;
|
||||
struct gc_ref closure;
|
||||
struct gc_finalizer *next;
|
||||
};
|
||||
|
||||
// Enough buckets to parallelize closure marking. No need to look up a
|
||||
// finalizer for a given object.
|
||||
#define BUCKET_COUNT 32
|
||||
|
||||
struct gc_finalizer_table {
|
||||
size_t finalizer_count;
|
||||
struct gc_finalizer* buckets[BUCKET_COUNT];
|
||||
};
|
||||
|
||||
struct gc_finalizer_state {
|
||||
gc_finalizer_callback have_finalizers;
|
||||
struct gc_finalizer *fired;
|
||||
size_t fired_this_cycle;
|
||||
size_t table_count;
|
||||
struct gc_finalizer_table tables[0];
|
||||
};
|
||||
|
||||
// public
|
||||
size_t gc_finalizer_size(void) { return sizeof(struct gc_finalizer); }
|
||||
struct gc_ref gc_finalizer_object(struct gc_finalizer *f) { return f->object; }
|
||||
struct gc_ref gc_finalizer_closure(struct gc_finalizer *f) { return f->closure; }
|
||||
|
||||
// internal
|
||||
struct gc_finalizer_state* gc_make_finalizer_state(void) {
|
||||
size_t ntables = gc_finalizer_priority_count();
|
||||
size_t size = (sizeof(struct gc_finalizer_state) +
|
||||
sizeof(struct gc_finalizer_table) * ntables);
|
||||
struct gc_finalizer_state *ret = malloc(size);
|
||||
if (!ret)
|
||||
return NULL;
|
||||
memset(ret, 0, size);
|
||||
ret->table_count = ntables;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void finalizer_list_push(struct gc_finalizer **loc,
|
||||
struct gc_finalizer *head) {
|
||||
struct gc_finalizer *tail = atomic_load_explicit(loc, memory_order_acquire);
|
||||
do {
|
||||
head->next = tail;
|
||||
} while (!atomic_compare_exchange_weak(loc, &tail, head));
|
||||
}
|
||||
|
||||
static struct gc_finalizer* finalizer_list_pop(struct gc_finalizer **loc) {
|
||||
struct gc_finalizer *head = atomic_load_explicit(loc, memory_order_acquire);
|
||||
do {
|
||||
if (!head) return NULL;
|
||||
} while (!atomic_compare_exchange_weak(loc, &head, head->next));
|
||||
head->next = NULL;
|
||||
return head;
|
||||
}
|
||||
|
||||
static void add_finalizer_to_table(struct gc_finalizer_table *table,
|
||||
struct gc_finalizer *f) {
|
||||
size_t count = atomic_fetch_add_explicit(&table->finalizer_count, 1,
|
||||
memory_order_relaxed);
|
||||
struct gc_finalizer **loc = &table->buckets[count % BUCKET_COUNT];
|
||||
finalizer_list_push(loc, f);
|
||||
}
|
||||
|
||||
// internal
|
||||
void gc_finalizer_init_internal(struct gc_finalizer *f,
|
||||
struct gc_ref object,
|
||||
struct gc_ref closure) {
|
||||
// Caller responsible for any write barrier, though really the
|
||||
// assumption is that the finalizer is younger than the key and the
|
||||
// value.
|
||||
if (f->state != FINALIZER_STATE_INIT)
|
||||
GC_CRASH();
|
||||
GC_ASSERT(gc_ref_is_null(f->object));
|
||||
f->object = object;
|
||||
f->closure = closure;
|
||||
}
|
||||
|
||||
// internal
|
||||
void gc_finalizer_attach_internal(struct gc_finalizer_state *state,
|
||||
struct gc_finalizer *f,
|
||||
unsigned priority) {
|
||||
// Caller responsible for any write barrier, though really the
|
||||
// assumption is that the finalizer is younger than the key and the
|
||||
// value.
|
||||
if (f->state != FINALIZER_STATE_INIT)
|
||||
GC_CRASH();
|
||||
if (gc_ref_is_null(f->object))
|
||||
GC_CRASH();
|
||||
|
||||
f->state = FINALIZER_STATE_ACTIVE;
|
||||
|
||||
GC_ASSERT(priority < state->table_count);
|
||||
add_finalizer_to_table(&state->tables[priority], f);
|
||||
}
|
||||
|
||||
// internal
|
||||
struct gc_finalizer* gc_finalizer_state_pop(struct gc_finalizer_state *state) {
|
||||
return finalizer_list_pop(&state->fired);
|
||||
}
|
||||
|
||||
static void
|
||||
add_fired_finalizer(struct gc_finalizer_state *state,
|
||||
struct gc_finalizer *f) {
|
||||
if (f->state != FINALIZER_STATE_ACTIVE)
|
||||
GC_CRASH();
|
||||
f->state = FINALIZER_STATE_FIRED;
|
||||
finalizer_list_push(&state->fired, f);
|
||||
}
|
||||
|
||||
// internal
|
||||
void
|
||||
gc_finalizer_externally_activated(struct gc_finalizer *f) {
|
||||
if (f->state != FINALIZER_STATE_INIT)
|
||||
GC_CRASH();
|
||||
f->state = FINALIZER_STATE_ACTIVE;
|
||||
}
|
||||
|
||||
// internal
|
||||
void
|
||||
gc_finalizer_externally_fired(struct gc_finalizer_state *state,
|
||||
struct gc_finalizer *f) {
|
||||
add_fired_finalizer(state, f);
|
||||
}
|
||||
|
||||
// internal
|
||||
size_t gc_visit_finalizer_roots(struct gc_finalizer_state *state,
|
||||
void (*visit)(struct gc_edge,
|
||||
struct gc_heap*,
|
||||
void *),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
size_t count = 0;
|
||||
for (size_t tidx = 0; tidx < state->table_count; tidx++) {
|
||||
struct gc_finalizer_table *table = &state->tables[tidx];
|
||||
if (table->finalizer_count) {
|
||||
count += table->finalizer_count;
|
||||
for (size_t bidx = 0; bidx < BUCKET_COUNT; bidx++)
|
||||
visit(gc_edge(&table->buckets[bidx]), heap, visit_data);
|
||||
}
|
||||
}
|
||||
visit(gc_edge(&state->fired), heap, visit_data);
|
||||
return count;
|
||||
}
|
||||
|
||||
// public
|
||||
void gc_trace_finalizer(struct gc_finalizer *f,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *trace_data) {
|
||||
if (f->state != FINALIZER_STATE_ACTIVE)
|
||||
visit(gc_edge(&f->object), heap, trace_data);
|
||||
visit(gc_edge(&f->closure), heap, trace_data);
|
||||
visit(gc_edge(&f->next), heap, trace_data);
|
||||
}
|
||||
|
||||
// Sweeping is currently serial. It could run in parallel but we want to
|
||||
// resolve all finalizers before shading any additional node. Perhaps we should
|
||||
// relax this restriction though; if the user attaches two finalizers to the
|
||||
// same object, it's probably OK to only have one finalizer fire per cycle.
|
||||
|
||||
// internal
|
||||
size_t gc_resolve_finalizers(struct gc_finalizer_state *state,
|
||||
size_t priority,
|
||||
void (*visit)(struct gc_edge edge,
|
||||
struct gc_heap *heap,
|
||||
void *visit_data),
|
||||
struct gc_heap *heap,
|
||||
void *visit_data) {
|
||||
GC_ASSERT(priority < state->table_count);
|
||||
struct gc_finalizer_table *table = &state->tables[priority];
|
||||
size_t finalizers_fired = 0;
|
||||
// Visit each finalizer in the table. If its object was already visited,
|
||||
// re-add the finalizer to the table. Otherwise enqueue its object edge for
|
||||
// tracing and mark the finalizer as fired.
|
||||
if (table->finalizer_count) {
|
||||
struct gc_finalizer_table scratch = { 0, };
|
||||
for (size_t bidx = 0; bidx < BUCKET_COUNT; bidx++) {
|
||||
struct gc_finalizer *next;
|
||||
for (struct gc_finalizer *f = table->buckets[bidx]; f; f = next) {
|
||||
next = f->next;
|
||||
f->next = NULL;
|
||||
struct gc_edge edge = gc_edge(&f->object);
|
||||
if (gc_visit_ephemeron_key(edge, heap)) {
|
||||
add_finalizer_to_table(&scratch, f);
|
||||
} else {
|
||||
finalizers_fired++;
|
||||
visit(edge, heap, visit_data);
|
||||
add_fired_finalizer(state, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
memcpy(table, &scratch, sizeof(*table));
|
||||
}
|
||||
state->fired_this_cycle += finalizers_fired;
|
||||
return finalizers_fired;
|
||||
}
|
||||
|
||||
// internal
|
||||
void gc_notify_finalizers(struct gc_finalizer_state *state,
|
||||
struct gc_heap *heap) {
|
||||
if (state->fired_this_cycle && state->have_finalizers) {
|
||||
state->have_finalizers(heap, state->fired_this_cycle);
|
||||
state->fired_this_cycle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// internal
|
||||
void gc_finalizer_state_set_callback(struct gc_finalizer_state *state,
|
||||
gc_finalizer_callback callback) {
|
||||
state->have_finalizers = callback;
|
||||
}
|
16
libguile/whippet/src/gc-internal.h
Normal file
16
libguile/whippet/src/gc-internal.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#ifndef GC_INTERNAL_H
|
||||
#define GC_INTERNAL_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include "gc-ephemeron-internal.h"
|
||||
#include "gc-finalizer-internal.h"
|
||||
#include "gc-options-internal.h"
|
||||
|
||||
uint64_t gc_heap_total_bytes_allocated(struct gc_heap *heap);
|
||||
void gc_mutator_adjust_heap_size(struct gc_mutator *mut, uint64_t new_size);
|
||||
|
||||
|
||||
#endif // GC_INTERNAL_H
|
24
libguile/whippet/src/gc-lock.h
Normal file
24
libguile/whippet/src/gc-lock.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
#ifndef GC_LOCK_H
|
||||
#define GC_LOCK_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include "gc-assert.h"
|
||||
|
||||
struct gc_lock {
|
||||
pthread_mutex_t *lock;
|
||||
};
|
||||
|
||||
static struct gc_lock
|
||||
gc_lock_acquire(pthread_mutex_t *lock) {
|
||||
pthread_mutex_lock(lock);
|
||||
return (struct gc_lock){ lock };
|
||||
}
|
||||
|
||||
static void
|
||||
gc_lock_release(struct gc_lock *lock) {
|
||||
GC_ASSERT(lock->lock);
|
||||
pthread_mutex_unlock(lock->lock);
|
||||
lock->lock = NULL;
|
||||
}
|
||||
|
||||
#endif // GC_LOCK_H
|
32
libguile/whippet/src/gc-options-internal.h
Normal file
32
libguile/whippet/src/gc-options-internal.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
#ifndef GC_OPTIONS_INTERNAL_H
|
||||
#define GC_OPTIONS_INTERNAL_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include "gc-options.h"
|
||||
|
||||
struct gc_common_options {
|
||||
enum gc_heap_size_policy heap_size_policy;
|
||||
size_t heap_size;
|
||||
size_t maximum_heap_size;
|
||||
double heap_size_multiplier;
|
||||
double heap_expansiveness;
|
||||
int parallelism;
|
||||
};
|
||||
|
||||
GC_INTERNAL void gc_init_common_options(struct gc_common_options *options);
|
||||
|
||||
GC_INTERNAL int gc_common_option_from_string(const char *str);
|
||||
|
||||
GC_INTERNAL int gc_common_options_set_int(struct gc_common_options *options,
|
||||
int option, int value);
|
||||
GC_INTERNAL int gc_common_options_set_size(struct gc_common_options *options,
|
||||
int option, size_t value);
|
||||
GC_INTERNAL int gc_common_options_set_double(struct gc_common_options *options,
|
||||
int option, double value);
|
||||
GC_INTERNAL int gc_common_options_parse_and_set(struct gc_common_options *options,
|
||||
int option, const char *value);
|
||||
|
||||
#endif // GC_OPTIONS_INTERNAL_H
|
198
libguile/whippet/src/gc-options.c
Normal file
198
libguile/whippet/src/gc-options.c
Normal file
|
@ -0,0 +1,198 @@
|
|||
#include <limits.h>
|
||||
#include <malloc.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define GC_IMPL 1
|
||||
|
||||
#include "gc-options-internal.h"
|
||||
#include "gc-platform.h"
|
||||
|
||||
// M(UPPER, lower, repr, type, parser, default, min, max)
|
||||
#define FOR_EACH_INT_GC_OPTION(M) \
|
||||
M(HEAP_SIZE_POLICY, heap_size_policy, "heap-size-policy", \
|
||||
int, heap_size_policy, GC_HEAP_SIZE_FIXED, GC_HEAP_SIZE_FIXED, \
|
||||
GC_HEAP_SIZE_ADAPTIVE) \
|
||||
M(PARALLELISM, parallelism, "parallelism", \
|
||||
int, int, default_parallelism(), 1, 64)
|
||||
|
||||
#define FOR_EACH_SIZE_GC_OPTION(M) \
|
||||
M(HEAP_SIZE, heap_size, "heap-size", \
|
||||
size, size, 6 * 1024 * 1024, 0, -1) \
|
||||
M(MAXIMUM_HEAP_SIZE, maximum_heap_size, "maximum-heap-size", \
|
||||
size, size, 0, 0, -1)
|
||||
|
||||
#define FOR_EACH_DOUBLE_GC_OPTION(M) \
|
||||
M(HEAP_SIZE_MULTIPLIER, heap_size_multiplier, "heap-size-multiplier", \
|
||||
double, double, 1.75, 1.0, 1e6) \
|
||||
M(HEAP_EXPANSIVENESS, heap_expansiveness, "heap-expansiveness", \
|
||||
double, double, 1.0, 0.0, 50.0)
|
||||
|
||||
typedef int gc_option_int;
|
||||
typedef size_t gc_option_size;
|
||||
typedef double gc_option_double;
|
||||
|
||||
#define FOR_EACH_COMMON_GC_OPTION(M) \
|
||||
FOR_EACH_INT_GC_OPTION(M) \
|
||||
FOR_EACH_SIZE_GC_OPTION(M) \
|
||||
FOR_EACH_DOUBLE_GC_OPTION(M)
|
||||
|
||||
static int clamp_int(int n, int lo, int hi) {
|
||||
return n < lo ? lo : n > hi ? hi : n;
|
||||
}
|
||||
static size_t clamp_size(size_t n, size_t lo, size_t hi) {
|
||||
return n < lo ? lo : n > hi ? hi : n;
|
||||
}
|
||||
static double clamp_double(double n, double lo, double hi) {
|
||||
return n < lo ? lo : n > hi ? hi : n;
|
||||
}
|
||||
|
||||
static int default_parallelism(void) {
|
||||
return clamp_int(gc_platform_processor_count(), 1, 8);
|
||||
}
|
||||
|
||||
void gc_init_common_options(struct gc_common_options *options) {
|
||||
#define INIT(UPPER, lower, repr, type, parser, default, min, max) \
|
||||
options->lower = default;
|
||||
FOR_EACH_COMMON_GC_OPTION(INIT)
|
||||
#undef INIT
|
||||
}
|
||||
|
||||
int gc_common_option_from_string(const char *str) {
|
||||
#define GET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \
|
||||
if (strcmp(str, repr) == 0) return GC_OPTION_##UPPER;
|
||||
FOR_EACH_COMMON_GC_OPTION(GET_OPTION)
|
||||
#undef GET_OPTION
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define SET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \
|
||||
case GC_OPTION_##UPPER: \
|
||||
if (value != clamp_##type(value, min, max)) return 0; \
|
||||
options->lower = value; \
|
||||
return 1;
|
||||
#define DEFINE_SETTER(STEM, stem, type) \
|
||||
int gc_common_options_set_##stem(struct gc_common_options *options, \
|
||||
int option, type value) { \
|
||||
switch (option) { \
|
||||
FOR_EACH_##STEM##_GC_OPTION(SET_OPTION) \
|
||||
default: return 0; \
|
||||
} \
|
||||
}
|
||||
DEFINE_SETTER(INT, int, int)
|
||||
DEFINE_SETTER(SIZE, size, size_t)
|
||||
DEFINE_SETTER(DOUBLE, double, double)
|
||||
#undef SET_OPTION
|
||||
#undef DEFINE_SETTER
|
||||
|
||||
static int parse_size(const char *arg, size_t *val) {
|
||||
char *end;
|
||||
long i = strtol(arg, &end, 0);
|
||||
if (i < 0 || i == LONG_MAX) return 0;
|
||||
if (end == arg) return 0;
|
||||
char delim = *end;
|
||||
if (delim == 'k' || delim == 'K')
|
||||
++end, i *= 1024L;
|
||||
else if (delim == 'm' || delim == 'M')
|
||||
++end, i *= 1024L * 1024L;
|
||||
else if (delim == 'g' || delim == 'G')
|
||||
++end, i *= 1024L * 1024L * 1024L;
|
||||
else if (delim == 't' || delim == 'T')
|
||||
++end, i *= 1024L * 1024L * 1024L * 1024L;
|
||||
|
||||
if (*end != '\0') return 0;
|
||||
*val = i;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int parse_int(const char *arg, int *val) {
|
||||
char *end;
|
||||
long i = strtol(arg, &end, 0);
|
||||
if (i == LONG_MIN || i == LONG_MAX || end == arg || *end)
|
||||
return 0;
|
||||
*val = i;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int parse_heap_size_policy(const char *arg, int *val) {
|
||||
if (strcmp(arg, "fixed") == 0) {
|
||||
*val = GC_HEAP_SIZE_FIXED;
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(arg, "growable") == 0) {
|
||||
*val = GC_HEAP_SIZE_GROWABLE;
|
||||
return 1;
|
||||
}
|
||||
if (strcmp(arg, "adaptive") == 0) {
|
||||
*val = GC_HEAP_SIZE_ADAPTIVE;
|
||||
return 1;
|
||||
}
|
||||
return parse_int(arg, val);
|
||||
}
|
||||
|
||||
static int parse_double(const char *arg, double *val) {
|
||||
char *end;
|
||||
double d = strtod(arg, &end);
|
||||
if (end == arg || *end)
|
||||
return 0;
|
||||
*val = d;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int gc_common_options_parse_and_set(struct gc_common_options *options,
|
||||
int option, const char *value) {
|
||||
switch (option) {
|
||||
#define SET_OPTION(UPPER, lower, repr, type, parser, default, min, max) \
|
||||
case GC_OPTION_##UPPER: { \
|
||||
gc_option_##type v; \
|
||||
if (!parse_##parser(value, &v)) return 0; \
|
||||
return gc_common_options_set_##type(options, option, v); \
|
||||
}
|
||||
FOR_EACH_COMMON_GC_OPTION(SET_OPTION)
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int is_lower(char c) { return 'a' <= c && c <= 'z'; }
|
||||
static int is_digit(char c) { return '0' <= c && c <= '9'; }
|
||||
static int is_option(char c) { return is_lower(c) || c == '-'; }
|
||||
static int is_option_end(char c) { return c == '='; }
|
||||
static int is_value(char c) {
|
||||
return is_lower(c) || is_digit(c) || c == '-' || c == '+' || c == '.';
|
||||
}
|
||||
static int is_value_end(char c) { return c == '\0' || c == ','; }
|
||||
static char* read_token(char *p, int (*is_tok)(char c), int (*is_end)(char c),
|
||||
char *delim) {
|
||||
char c;
|
||||
for (c = *p; is_tok(c); c = *++p);
|
||||
if (!is_end(c)) return NULL;
|
||||
*delim = c;
|
||||
*p = '\0';
|
||||
return p + 1;
|
||||
}
|
||||
int gc_options_parse_and_set_many(struct gc_options *options,
|
||||
const char *str) {
|
||||
if (!*str) return 1;
|
||||
char *copy = strdup(str);
|
||||
char *cur = copy;
|
||||
int ret = 0;
|
||||
while (1) {
|
||||
char delim;
|
||||
char *next = read_token(cur, is_option, is_option_end, &delim);
|
||||
if (!next) break;
|
||||
int option = gc_option_from_string(cur);
|
||||
if (option < 0) break;
|
||||
|
||||
cur = next;
|
||||
next = read_token(cur, is_value, is_value_end, &delim);
|
||||
if (!next) break;
|
||||
if (!gc_options_parse_and_set(options, option, cur)) break;
|
||||
cur = next;
|
||||
if (delim == '\0') {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
free(copy);
|
||||
return ret;
|
||||
}
|
211
libguile/whippet/src/gc-platform-gnu-linux.c
Normal file
211
libguile/whippet/src/gc-platform-gnu-linux.c
Normal file
|
@ -0,0 +1,211 @@
|
|||
// For pthread_getattr_np.
|
||||
#define _GNU_SOURCE
|
||||
#include <errno.h>
|
||||
#include <link.h>
|
||||
#include <pthread.h>
|
||||
#include <sched.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/mman.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define GC_IMPL 1
|
||||
|
||||
#include "debug.h"
|
||||
#include "gc-align.h"
|
||||
#include "gc-assert.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-platform.h"
|
||||
|
||||
void gc_platform_init(void) {
|
||||
// Nothing to do.
|
||||
}
|
||||
|
||||
static uintptr_t fallback_current_thread_stack_base(void) GC_NEVER_INLINE;
|
||||
static uintptr_t fallback_current_thread_stack_base(void) {
|
||||
// Sloppily assume that there are very few frames between us and the
|
||||
// thread entry or main function, and that therefore we haven't
|
||||
// consumed more than a page of stack; we can then just round up the
|
||||
// stack pointer to the page boundary.
|
||||
fprintf(stderr,
|
||||
"Using fallback strategy to capture stack base for thread %p.\n",
|
||||
(void*)pthread_self());
|
||||
int local;
|
||||
uintptr_t hot = (uintptr_t)&local;
|
||||
size_t page_size = getpagesize();
|
||||
return (hot + page_size) & ~(page_size - 1);
|
||||
}
|
||||
|
||||
uintptr_t gc_platform_current_thread_stack_base(void) {
|
||||
pthread_t me = pthread_self();
|
||||
pthread_attr_t attr;
|
||||
int err = pthread_getattr_np(me, &attr);
|
||||
if (err) {
|
||||
errno = err;
|
||||
// This case can occur for the main thread when running in a
|
||||
// filesystem without /proc/stat.
|
||||
perror("Failed to capture stack base via pthread_getattr_np");
|
||||
return fallback_current_thread_stack_base();
|
||||
}
|
||||
|
||||
void *stack_low_addr;
|
||||
size_t stack_size;
|
||||
err = pthread_attr_getstack(&attr, &stack_low_addr, &stack_size);
|
||||
pthread_attr_destroy(&attr);
|
||||
if (err) {
|
||||
// Should never occur.
|
||||
errno = err;
|
||||
perror("pthread_attr_getstack");
|
||||
return fallback_current_thread_stack_base();
|
||||
}
|
||||
|
||||
return (uintptr_t)stack_low_addr + stack_size;
|
||||
}
|
||||
|
||||
struct visit_data {
|
||||
void (*f)(uintptr_t start, uintptr_t end, struct gc_heap *heap, void *data);
|
||||
struct gc_heap *heap;
|
||||
void *data;
|
||||
};
|
||||
|
||||
static int visit_roots(struct dl_phdr_info *info, size_t size, void *data) {
|
||||
struct visit_data *visit_data = data;
|
||||
uintptr_t object_addr = info->dlpi_addr;
|
||||
const char *object_name = info->dlpi_name;
|
||||
const ElfW(Phdr) *program_headers = info->dlpi_phdr;
|
||||
size_t program_headers_count = info->dlpi_phnum;
|
||||
|
||||
// From the loader's perspective, an ELF image is broken up into
|
||||
// "segments", each of which is described by a "program header".
|
||||
// Treat all writable data segments as potential edges into the
|
||||
// GC-managed heap.
|
||||
//
|
||||
// Note that there are some RELRO segments which are initially
|
||||
// writable but then remapped read-only. BDW-GC will exclude these,
|
||||
// but we just punt for the time being and treat them as roots
|
||||
for (size_t i = 0; i < program_headers_count; i++) {
|
||||
const ElfW(Phdr) *p = &program_headers[i];
|
||||
if (p->p_type == PT_LOAD && (p->p_flags & PF_W)) {
|
||||
uintptr_t start = p->p_vaddr + object_addr;
|
||||
uintptr_t end = start + p->p_memsz;
|
||||
DEBUG("found roots for '%s': [%p,%p)\n", object_name,
|
||||
(void*)start, (void*)end);
|
||||
visit_data->f(start, end, visit_data->heap, visit_data->data);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start,
|
||||
uintptr_t end,
|
||||
struct gc_heap*,
|
||||
void *data),
|
||||
struct gc_heap *heap,
|
||||
void *data) {
|
||||
struct visit_data visit_data = { f, heap, data };
|
||||
dl_iterate_phdr(visit_roots, &visit_data);
|
||||
}
|
||||
|
||||
int gc_platform_processor_count(void) {
|
||||
cpu_set_t set;
|
||||
if (sched_getaffinity(0, sizeof (set), &set) != 0)
|
||||
return 1;
|
||||
return CPU_COUNT(&set);
|
||||
}
|
||||
|
||||
uint64_t gc_platform_monotonic_nanoseconds(void) {
|
||||
struct timespec ts;
|
||||
if (clock_gettime(CLOCK_MONOTONIC, &ts))
|
||||
GC_CRASH();
|
||||
uint64_t s = ts.tv_sec;
|
||||
uint64_t ns = ts.tv_nsec;
|
||||
uint64_t ns_per_sec = 1000000000;
|
||||
return s * ns_per_sec + ns;
|
||||
}
|
||||
|
||||
size_t gc_platform_page_size(void) {
|
||||
return getpagesize();
|
||||
}
|
||||
|
||||
struct gc_reservation gc_platform_reserve_memory(size_t size,
|
||||
size_t alignment) {
|
||||
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
|
||||
GC_ASSERT_EQ(alignment & (alignment - 1), 0);
|
||||
GC_ASSERT_EQ(alignment, align_down(alignment, getpagesize()));
|
||||
|
||||
size_t extent = size + alignment;
|
||||
void *mem = mmap(NULL, extent, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
if (mem == MAP_FAILED) {
|
||||
perror("failed to reserve address space");
|
||||
GC_CRASH();
|
||||
}
|
||||
|
||||
uintptr_t base = (uintptr_t) mem;
|
||||
uintptr_t end = base + extent;
|
||||
uintptr_t aligned_base = alignment ? align_up(base, alignment) : base;
|
||||
uintptr_t aligned_end = aligned_base + size;
|
||||
|
||||
if (aligned_base - base)
|
||||
munmap((void*)base, aligned_base - base);
|
||||
if (end - aligned_end)
|
||||
munmap((void*)aligned_end, end - aligned_end);
|
||||
|
||||
return (struct gc_reservation){aligned_base, size};
|
||||
}
|
||||
|
||||
void*
|
||||
gc_platform_acquire_memory_from_reservation(struct gc_reservation reservation,
|
||||
size_t offset, size_t size) {
|
||||
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
|
||||
GC_ASSERT(size <= reservation.size);
|
||||
GC_ASSERT(offset <= reservation.size - size);
|
||||
|
||||
void *mem = mmap((void*)(reservation.base + offset), size,
|
||||
PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
|
||||
if (mem == MAP_FAILED) {
|
||||
perror("mmap failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
void
|
||||
gc_platform_release_reservation(struct gc_reservation reservation) {
|
||||
if (munmap((void*)reservation.base, reservation.size) != 0)
|
||||
perror("failed to unmap memory");
|
||||
}
|
||||
|
||||
void*
|
||||
gc_platform_acquire_memory(size_t size, size_t alignment) {
|
||||
struct gc_reservation reservation =
|
||||
gc_platform_reserve_memory(size, alignment);
|
||||
return gc_platform_acquire_memory_from_reservation(reservation, 0, size);
|
||||
}
|
||||
|
||||
void gc_platform_release_memory(void *ptr, size_t size) {
|
||||
GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize()));
|
||||
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
|
||||
if (munmap(ptr, size) != 0)
|
||||
perror("failed to unmap memory");
|
||||
}
|
||||
|
||||
int gc_platform_populate_memory(void *ptr, size_t size) {
|
||||
GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize()));
|
||||
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
|
||||
if (madvise(ptr, size, MADV_WILLNEED) == 0)
|
||||
return 1;
|
||||
perror("failed to populate memory");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int gc_platform_discard_memory(void *ptr, size_t size) {
|
||||
GC_ASSERT_EQ((uintptr_t)ptr, align_down((uintptr_t)ptr, getpagesize()));
|
||||
GC_ASSERT_EQ(size, align_down(size, getpagesize()));
|
||||
if (madvise(ptr, size, MADV_DONTNEED) == 0)
|
||||
return 1;
|
||||
perror("failed to discard memory");
|
||||
return 0;
|
||||
}
|
48
libguile/whippet/src/gc-platform.h
Normal file
48
libguile/whippet/src/gc-platform.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
#ifndef GC_PLATFORM_H
|
||||
#define GC_PLATFORM_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "gc-visibility.h"
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
GC_INTERNAL void gc_platform_init(void);
|
||||
GC_INTERNAL uintptr_t gc_platform_current_thread_stack_base(void);
|
||||
GC_INTERNAL
|
||||
void gc_platform_visit_global_conservative_roots(void (*f)(uintptr_t start,
|
||||
uintptr_t end,
|
||||
struct gc_heap *heap,
|
||||
void *data),
|
||||
struct gc_heap *heap,
|
||||
void *data);
|
||||
GC_INTERNAL int gc_platform_processor_count(void);
|
||||
GC_INTERNAL uint64_t gc_platform_monotonic_nanoseconds(void);
|
||||
|
||||
GC_INTERNAL size_t gc_platform_page_size(void);
|
||||
|
||||
struct gc_reservation {
|
||||
uintptr_t base;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
GC_INTERNAL
|
||||
struct gc_reservation gc_platform_reserve_memory(size_t size, size_t alignment);
|
||||
GC_INTERNAL
|
||||
void*
|
||||
gc_platform_acquire_memory_from_reservation(struct gc_reservation reservation,
|
||||
size_t offset, size_t size);
|
||||
GC_INTERNAL
|
||||
void gc_platform_release_reservation(struct gc_reservation reservation);
|
||||
|
||||
GC_INTERNAL void* gc_platform_acquire_memory(size_t size, size_t alignment);
|
||||
GC_INTERNAL void gc_platform_release_memory(void *base, size_t size);
|
||||
|
||||
GC_INTERNAL int gc_platform_populate_memory(void *addr, size_t size);
|
||||
GC_INTERNAL int gc_platform_discard_memory(void *addr, size_t size);
|
||||
|
||||
#endif // GC_PLATFORM_H
|
92
libguile/whippet/src/gc-stack.c
Normal file
92
libguile/whippet/src/gc-stack.c
Normal file
|
@ -0,0 +1,92 @@
|
|||
// For pthread_getattr_np.
|
||||
#define _GNU_SOURCE
|
||||
#include <pthread.h>
|
||||
#include <setjmp.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define GC_IMPL 1
|
||||
|
||||
#include "debug.h"
|
||||
#include "gc-align.h"
|
||||
#include "gc-assert.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-platform.h"
|
||||
#include "gc-stack.h"
|
||||
|
||||
static uintptr_t current_thread_hot_stack_addr(void) {
|
||||
#ifdef __GNUC__
|
||||
return (uintptr_t)__builtin_frame_address(0);
|
||||
#else
|
||||
uintptr_t local;
|
||||
return (uintptr_t)&local;
|
||||
#endif
|
||||
}
|
||||
|
||||
// FIXME: check platform stack growth direction.
|
||||
#define HOTTER_THAN <=
|
||||
|
||||
static void capture_current_thread_hot_stack_addr(struct gc_stack_addr *addr) {
|
||||
addr->addr = current_thread_hot_stack_addr();
|
||||
}
|
||||
|
||||
static void capture_current_thread_cold_stack_addr(struct gc_stack_addr *addr) {
|
||||
addr->addr = gc_platform_current_thread_stack_base();
|
||||
}
|
||||
|
||||
void gc_stack_init(struct gc_stack *stack, struct gc_stack_addr *base) {
|
||||
if (base)
|
||||
stack->cold = *base;
|
||||
else
|
||||
capture_current_thread_cold_stack_addr(&stack->cold);
|
||||
stack->hot = stack->cold;
|
||||
}
|
||||
|
||||
void gc_stack_capture_hot(struct gc_stack *stack) {
|
||||
capture_current_thread_hot_stack_addr(&stack->hot);
|
||||
setjmp(stack->registers);
|
||||
GC_ASSERT(stack->hot.addr HOTTER_THAN stack->cold.addr);
|
||||
}
|
||||
|
||||
static void* call_with_stack(void* (*)(struct gc_stack_addr*, void*),
|
||||
struct gc_stack_addr*, void*) GC_NEVER_INLINE;
|
||||
static void* call_with_stack(void* (*f)(struct gc_stack_addr *, void *),
|
||||
struct gc_stack_addr *addr, void *arg) {
|
||||
return f(addr, arg);
|
||||
}
|
||||
void* gc_call_with_stack_addr(void* (*f)(struct gc_stack_addr *base,
|
||||
void *arg),
|
||||
void *arg) {
|
||||
struct gc_stack_addr base;
|
||||
capture_current_thread_hot_stack_addr(&base);
|
||||
return call_with_stack(f, &base, arg);
|
||||
}
|
||||
|
||||
void gc_stack_visit(struct gc_stack *stack,
|
||||
void (*visit)(uintptr_t low, uintptr_t high,
|
||||
struct gc_heap *heap, void *data),
|
||||
struct gc_heap *heap,
|
||||
void *data) {
|
||||
{
|
||||
uintptr_t low = (uintptr_t)stack->registers;
|
||||
GC_ASSERT(low == align_down(low, sizeof(uintptr_t)));
|
||||
uintptr_t high = low + sizeof(jmp_buf);
|
||||
DEBUG("found mutator register roots for %p: [%p,%p)\n", stack,
|
||||
(void*)low, (void*)high);
|
||||
visit(low, high, heap, data);
|
||||
}
|
||||
|
||||
if (0 HOTTER_THAN 1) {
|
||||
DEBUG("found mutator stack roots for %p: [%p,%p)\n", stack,
|
||||
(void*)stack->hot.addr, (void*)stack->cold.addr);
|
||||
visit(align_up(stack->hot.addr, sizeof(uintptr_t)),
|
||||
align_down(stack->cold.addr, sizeof(uintptr_t)),
|
||||
heap, data);
|
||||
} else {
|
||||
DEBUG("found mutator stack roots for %p: [%p,%p)\n", stack,
|
||||
(void*)stack->cold.addr, (void*)stack->hot.addr);
|
||||
visit(align_up(stack->cold.addr, sizeof(uintptr_t)),
|
||||
align_down(stack->hot.addr, sizeof(uintptr_t)),
|
||||
heap, data);
|
||||
}
|
||||
}
|
33
libguile/whippet/src/gc-stack.h
Normal file
33
libguile/whippet/src/gc-stack.h
Normal file
|
@ -0,0 +1,33 @@
|
|||
#ifndef GC_STACK_H
|
||||
#define GC_STACK_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include "gc-inline.h"
|
||||
#include <setjmp.h>
|
||||
|
||||
struct gc_stack_addr {
|
||||
uintptr_t addr;
|
||||
};
|
||||
|
||||
struct gc_stack {
|
||||
struct gc_stack_addr cold;
|
||||
struct gc_stack_addr hot;
|
||||
jmp_buf registers;
|
||||
};
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
GC_INTERNAL void gc_stack_init(struct gc_stack *stack,
|
||||
struct gc_stack_addr *base);
|
||||
GC_INTERNAL void gc_stack_capture_hot(struct gc_stack *stack);
|
||||
GC_INTERNAL void gc_stack_visit(struct gc_stack *stack,
|
||||
void (*visit)(uintptr_t low, uintptr_t high,
|
||||
struct gc_heap *heap,
|
||||
void *data),
|
||||
struct gc_heap *heap,
|
||||
void *data);
|
||||
|
||||
#endif // GC_STACK_H
|
56
libguile/whippet/src/gc-trace.h
Normal file
56
libguile/whippet/src/gc-trace.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
#ifndef GC_TRACE_H
|
||||
#define GC_TRACE_H
|
||||
|
||||
#ifndef GC_IMPL
|
||||
#error internal header file, not part of API
|
||||
#endif
|
||||
|
||||
#include "gc-config.h"
|
||||
#include "gc-assert.h"
|
||||
#include "gc-conservative-ref.h"
|
||||
#include "gc-embedder-api.h"
|
||||
|
||||
static inline int gc_has_mutator_conservative_roots(void) {
|
||||
return GC_CONSERVATIVE_ROOTS;
|
||||
}
|
||||
static inline int gc_mutator_conservative_roots_may_be_interior(void) {
|
||||
return 1;
|
||||
}
|
||||
static inline int gc_has_global_conservative_roots(void) {
|
||||
return GC_CONSERVATIVE_ROOTS;
|
||||
}
|
||||
static inline int gc_has_conservative_intraheap_edges(void) {
|
||||
return GC_CONSERVATIVE_TRACE;
|
||||
}
|
||||
|
||||
static inline int gc_has_conservative_roots(void) {
|
||||
return gc_has_mutator_conservative_roots() ||
|
||||
gc_has_global_conservative_roots();
|
||||
}
|
||||
|
||||
enum gc_trace_kind {
|
||||
GC_TRACE_PRECISELY,
|
||||
GC_TRACE_NONE,
|
||||
GC_TRACE_CONSERVATIVELY,
|
||||
GC_TRACE_EPHEMERON,
|
||||
};
|
||||
|
||||
struct gc_trace_plan {
|
||||
enum gc_trace_kind kind;
|
||||
size_t size; // For conservative tracing.
|
||||
};
|
||||
|
||||
static inline int
|
||||
gc_conservative_ref_might_be_a_heap_object(struct gc_conservative_ref ref,
|
||||
int possibly_interior) {
|
||||
// Assume that the minimum page size is 4096, and that the first page
|
||||
// will contain no heap objects.
|
||||
if (gc_conservative_ref_value(ref) < 4096)
|
||||
return 0;
|
||||
if (possibly_interior)
|
||||
return 1;
|
||||
return gc_is_valid_conservative_ref_displacement
|
||||
(gc_conservative_ref_value(ref) & (sizeof(uintptr_t) - 1));
|
||||
}
|
||||
|
||||
#endif // GC_TRACE_H
|
6
libguile/whippet/src/gc-tracepoint.c
Normal file
6
libguile/whippet/src/gc-tracepoint.c
Normal file
|
@ -0,0 +1,6 @@
|
|||
#include <assert.h>
|
||||
#ifdef GC_TRACEPOINT_LTTNG
|
||||
#define LTTNG_UST_TRACEPOINT_DEFINE
|
||||
#define LTTNG_UST_TRACEPOINT_CREATE_PROBES
|
||||
#include "gc-lttng.h"
|
||||
#endif // GC_TRACEPOINT_LTTNG
|
59
libguile/whippet/src/growable-heap-sizer.h
Normal file
59
libguile/whippet/src/growable-heap-sizer.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
#ifndef GROWABLE_HEAP_SIZER_H
|
||||
#define GROWABLE_HEAP_SIZER_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "heap-sizer.h"
|
||||
|
||||
// This is a simple heap-sizing algorithm that will grow the heap if it is
|
||||
// smaller than a given multiplier of the live data size. It does not shrink
|
||||
// the heap.
|
||||
|
||||
struct gc_growable_heap_sizer {
|
||||
struct gc_heap *heap;
|
||||
double multiplier;
|
||||
pthread_mutex_t lock;
|
||||
};
|
||||
|
||||
static void
|
||||
gc_growable_heap_sizer_set_multiplier(struct gc_growable_heap_sizer *sizer,
|
||||
double multiplier) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
sizer->multiplier = multiplier;
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
gc_growable_heap_sizer_on_gc(struct gc_growable_heap_sizer *sizer,
|
||||
size_t heap_size, size_t live_bytes,
|
||||
uint64_t pause_ns,
|
||||
void (*set_heap_size)(struct gc_heap*, size_t)) {
|
||||
pthread_mutex_lock(&sizer->lock);
|
||||
size_t target_size = live_bytes * sizer->multiplier;
|
||||
if (target_size > heap_size)
|
||||
set_heap_size(sizer->heap, target_size);
|
||||
pthread_mutex_unlock(&sizer->lock);
|
||||
}
|
||||
|
||||
static struct gc_growable_heap_sizer*
|
||||
gc_make_growable_heap_sizer(struct gc_heap *heap, double multiplier) {
|
||||
struct gc_growable_heap_sizer *sizer;
|
||||
sizer = malloc(sizeof(*sizer));
|
||||
if (!sizer)
|
||||
GC_CRASH();
|
||||
memset(sizer, 0, sizeof(*sizer));
|
||||
sizer->heap = heap;
|
||||
sizer->multiplier = multiplier;
|
||||
pthread_mutex_init(&sizer->lock, NULL);
|
||||
return sizer;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_destroy_growable_heap_sizer(struct gc_growable_heap_sizer *sizer) {
|
||||
free(sizer);
|
||||
}
|
||||
|
||||
#endif // GROWABLE_HEAP_SIZER_H
|
74
libguile/whippet/src/heap-sizer.h
Normal file
74
libguile/whippet/src/heap-sizer.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
#ifndef HEAP_SIZER_H
|
||||
#define HEAP_SIZER_H
|
||||
|
||||
#include "gc-api.h"
|
||||
|
||||
#include "gc-options-internal.h"
|
||||
#include "growable-heap-sizer.h"
|
||||
#include "adaptive-heap-sizer.h"
|
||||
|
||||
struct gc_heap_sizer {
|
||||
enum gc_heap_size_policy policy;
|
||||
union {
|
||||
struct gc_growable_heap_sizer* growable;
|
||||
struct gc_adaptive_heap_sizer* adaptive;
|
||||
};
|
||||
};
|
||||
|
||||
static struct gc_heap_sizer
|
||||
gc_make_heap_sizer(struct gc_heap *heap,
|
||||
const struct gc_common_options *options,
|
||||
uint64_t (*get_allocation_counter_from_thread)(struct gc_heap*),
|
||||
void (*set_heap_size_from_thread)(struct gc_heap*, size_t),
|
||||
struct gc_background_thread *thread) {
|
||||
struct gc_heap_sizer ret = { options->heap_size_policy, };
|
||||
switch (options->heap_size_policy) {
|
||||
case GC_HEAP_SIZE_FIXED:
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_GROWABLE:
|
||||
ret.growable =
|
||||
gc_make_growable_heap_sizer(heap, options->heap_size_multiplier);
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_ADAPTIVE:
|
||||
ret.adaptive =
|
||||
gc_make_adaptive_heap_sizer (heap, options->heap_expansiveness,
|
||||
get_allocation_counter_from_thread,
|
||||
set_heap_size_from_thread,
|
||||
thread);
|
||||
break;
|
||||
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
gc_heap_sizer_on_gc(struct gc_heap_sizer sizer, size_t heap_size,
|
||||
size_t live_bytes, size_t pause_ns,
|
||||
void (*set_heap_size)(struct gc_heap*, size_t)) {
|
||||
switch (sizer.policy) {
|
||||
case GC_HEAP_SIZE_FIXED:
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_GROWABLE:
|
||||
gc_growable_heap_sizer_on_gc(sizer.growable, heap_size, live_bytes,
|
||||
pause_ns, set_heap_size);
|
||||
break;
|
||||
|
||||
case GC_HEAP_SIZE_ADAPTIVE:
|
||||
if (sizer.adaptive->background_task_id < 0)
|
||||
gc_adaptive_heap_sizer_background_task(sizer.adaptive);
|
||||
gc_adaptive_heap_sizer_on_gc(sizer.adaptive, live_bytes, pause_ns,
|
||||
set_heap_size);
|
||||
break;
|
||||
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // HEAP_SIZER_H
|
525
libguile/whippet/src/large-object-space.h
Normal file
525
libguile/whippet/src/large-object-space.h
Normal file
|
@ -0,0 +1,525 @@
|
|||
#ifndef LARGE_OBJECT_SPACE_H
|
||||
#define LARGE_OBJECT_SPACE_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <malloc.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "gc-assert.h"
|
||||
#include "gc-ref.h"
|
||||
#include "gc-conservative-ref.h"
|
||||
#include "gc-trace.h"
|
||||
#include "address-map.h"
|
||||
#include "address-set.h"
|
||||
#include "background-thread.h"
|
||||
#include "freelist.h"
|
||||
|
||||
// A mark-sweep space with generational support.
|
||||
|
||||
struct gc_heap;
|
||||
|
||||
enum large_object_state {
|
||||
LARGE_OBJECT_NURSERY = 0,
|
||||
LARGE_OBJECT_MARKED_BIT = 1,
|
||||
LARGE_OBJECT_MARK_TOGGLE_BIT = 2,
|
||||
LARGE_OBJECT_MARK_0 = LARGE_OBJECT_MARKED_BIT,
|
||||
LARGE_OBJECT_MARK_1 = LARGE_OBJECT_MARKED_BIT | LARGE_OBJECT_MARK_TOGGLE_BIT
|
||||
};
|
||||
|
||||
struct large_object {
|
||||
uintptr_t addr;
|
||||
size_t size;
|
||||
};
|
||||
struct large_object_node;
|
||||
struct large_object_live_data {
|
||||
uint8_t mark;
|
||||
enum gc_trace_kind trace;
|
||||
};
|
||||
struct large_object_dead_data {
|
||||
uint8_t age;
|
||||
struct large_object_node **prev;
|
||||
struct large_object_node *next;
|
||||
};
|
||||
struct large_object_data {
|
||||
uint8_t is_live;
|
||||
union {
|
||||
struct large_object_live_data live;
|
||||
struct large_object_dead_data dead;
|
||||
};
|
||||
};
|
||||
|
||||
#define SPLAY_TREE_PREFIX large_object_
|
||||
typedef struct large_object large_object_key_span;
|
||||
typedef uintptr_t large_object_key;
|
||||
typedef struct large_object_data large_object_value;
|
||||
static inline int
|
||||
large_object_compare(uintptr_t addr, struct large_object obj) {
|
||||
if (addr < obj.addr) return -1;
|
||||
if (addr - obj.addr < obj.size) return 0;
|
||||
return 1;
|
||||
}
|
||||
static inline uintptr_t
|
||||
large_object_span_start(struct large_object obj) {
|
||||
return obj.addr;
|
||||
}
|
||||
#include "splay-tree.h"
|
||||
|
||||
DEFINE_FREELIST(large_object_freelist, sizeof(uintptr_t) * 8 - 1, 2,
|
||||
struct large_object_node*);
|
||||
|
||||
struct large_object_space {
|
||||
// Lock for object_map, quarantine, nursery, and marked.
|
||||
pthread_mutex_t lock;
|
||||
// Lock for object_tree.
|
||||
pthread_mutex_t object_tree_lock;
|
||||
// Lock for remembered_edges.
|
||||
pthread_mutex_t remembered_edges_lock;
|
||||
// Locking order: You must hold the space lock when taking
|
||||
// object_tree_lock. Take no other lock while holding
|
||||
// object_tree_lock. remembered_edges_lock is a leaf; take no locks
|
||||
// when holding it.
|
||||
|
||||
// The value for a large_object_node's "mark" field indicating a
|
||||
// marked object; always nonzero, and alternating between two values
|
||||
// at every major GC.
|
||||
uint8_t marked;
|
||||
|
||||
// Splay tree of objects, keyed by <addr, size> tuple. Useful when
|
||||
// looking up object-for-address.
|
||||
struct large_object_tree object_tree;
|
||||
|
||||
// Hash table of objects, where values are pointers to splay tree
|
||||
// nodes. Useful when you have the object address and just want to
|
||||
// check something about it (for example its size).
|
||||
struct address_map object_map;
|
||||
|
||||
// In generational configurations, we collect all allocations in the
|
||||
// last cycle into the nursery.
|
||||
struct address_map nursery;
|
||||
|
||||
// Size-segregated freelist of dead objects. Allocations are first
|
||||
// served from the quarantine freelist before falling back to the OS
|
||||
// if needed. Collected objects spend a second or two in quarantine
|
||||
// before being returned to the OS. This is an optimization to avoid
|
||||
// mucking about too much with the TLB and so on.
|
||||
struct large_object_freelist quarantine;
|
||||
|
||||
// Set of edges from lospace that may reference young objects,
|
||||
// possibly in other spaces.
|
||||
struct address_set remembered_edges;
|
||||
|
||||
size_t page_size;
|
||||
size_t page_size_log2;
|
||||
size_t total_pages;
|
||||
size_t free_pages;
|
||||
size_t live_pages_at_last_collection;
|
||||
size_t pages_freed_by_last_collection;
|
||||
int synchronous_release;
|
||||
};
|
||||
|
||||
static size_t
|
||||
large_object_space_npages(struct large_object_space *space, size_t bytes) {
|
||||
return (bytes + space->page_size - 1) >> space->page_size_log2;
|
||||
}
|
||||
|
||||
static size_t
|
||||
large_object_space_size_at_last_collection(struct large_object_space *space) {
|
||||
return space->live_pages_at_last_collection << space->page_size_log2;
|
||||
}
|
||||
|
||||
static inline int
|
||||
large_object_space_contains_with_lock(struct large_object_space *space,
|
||||
struct gc_ref ref) {
|
||||
return address_map_contains(&space->object_map, gc_ref_value(ref));
|
||||
}
|
||||
|
||||
static inline int
|
||||
large_object_space_contains(struct large_object_space *space,
|
||||
struct gc_ref ref) {
|
||||
pthread_mutex_lock(&space->lock);
|
||||
int ret = large_object_space_contains_with_lock(space, ref);
|
||||
pthread_mutex_unlock(&space->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct gc_ref
|
||||
large_object_space_object_containing_edge(struct large_object_space *space,
|
||||
struct gc_edge edge) {
|
||||
pthread_mutex_lock(&space->object_tree_lock);
|
||||
struct large_object_node *node =
|
||||
large_object_tree_lookup(&space->object_tree, gc_edge_address(edge));
|
||||
uintptr_t addr = (node && node->value.is_live) ? node->key.addr : 0;
|
||||
pthread_mutex_unlock(&space->object_tree_lock);
|
||||
return gc_ref(addr);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_start_gc(struct large_object_space *space, int is_minor_gc) {
|
||||
// Take the space lock to prevent
|
||||
// large_object_space_process_quarantine from concurrently mutating
|
||||
// the object map.
|
||||
pthread_mutex_lock(&space->lock);
|
||||
if (!is_minor_gc) {
|
||||
space->marked ^= LARGE_OBJECT_MARK_TOGGLE_BIT;
|
||||
space->live_pages_at_last_collection = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct gc_trace_plan
|
||||
large_object_space_object_trace_plan(struct large_object_space *space,
|
||||
struct gc_ref ref) {
|
||||
uintptr_t node_bits =
|
||||
address_map_lookup(&space->object_map, gc_ref_value(ref), 0);
|
||||
GC_ASSERT(node_bits);
|
||||
struct large_object_node *node = (struct large_object_node*) node_bits;
|
||||
switch (node->value.live.trace) {
|
||||
case GC_TRACE_PRECISELY:
|
||||
return (struct gc_trace_plan){ GC_TRACE_PRECISELY, };
|
||||
case GC_TRACE_NONE:
|
||||
return (struct gc_trace_plan){ GC_TRACE_NONE, };
|
||||
#if GC_CONSERVATIVE_TRACE
|
||||
case GC_TRACE_CONSERVATIVELY: {
|
||||
return (struct gc_trace_plan){ GC_TRACE_CONSERVATIVELY, node->key.size };
|
||||
}
|
||||
// No large ephemerons.
|
||||
#endif
|
||||
default:
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static uint8_t*
|
||||
large_object_node_mark_loc(struct large_object_node *node) {
|
||||
GC_ASSERT(node->value.is_live);
|
||||
return &node->value.live.mark;
|
||||
}
|
||||
|
||||
static uint8_t
|
||||
large_object_node_get_mark(struct large_object_node *node) {
|
||||
return atomic_load_explicit(large_object_node_mark_loc(node),
|
||||
memory_order_acquire);
|
||||
}
|
||||
|
||||
static struct large_object_node*
|
||||
large_object_space_lookup(struct large_object_space *space, struct gc_ref ref) {
|
||||
return (struct large_object_node*) address_map_lookup(&space->object_map,
|
||||
gc_ref_value(ref),
|
||||
0);
|
||||
}
|
||||
|
||||
static int
|
||||
large_object_space_mark(struct large_object_space *space, struct gc_ref ref) {
|
||||
struct large_object_node *node = large_object_space_lookup(space, ref);
|
||||
if (!node)
|
||||
return 0;
|
||||
GC_ASSERT(node->value.is_live);
|
||||
|
||||
uint8_t *loc = large_object_node_mark_loc(node);
|
||||
uint8_t mark = atomic_load_explicit(loc, memory_order_relaxed);
|
||||
do {
|
||||
if (mark == space->marked)
|
||||
return 0;
|
||||
} while (!atomic_compare_exchange_weak_explicit(loc, &mark, space->marked,
|
||||
memory_order_acq_rel,
|
||||
memory_order_acquire));
|
||||
|
||||
size_t pages = node->key.size >> space->page_size_log2;
|
||||
atomic_fetch_add(&space->live_pages_at_last_collection, pages);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
large_object_space_is_marked(struct large_object_space *space,
|
||||
struct gc_ref ref) {
|
||||
struct large_object_node *node = large_object_space_lookup(space, ref);
|
||||
if (!node)
|
||||
return 0;
|
||||
GC_ASSERT(node->value.is_live);
|
||||
|
||||
return atomic_load_explicit(large_object_node_mark_loc(node),
|
||||
memory_order_acquire) == space->marked;
|
||||
}
|
||||
|
||||
static int
|
||||
large_object_space_is_survivor(struct large_object_space *space,
|
||||
struct gc_ref ref) {
|
||||
GC_ASSERT(large_object_space_contains(space, ref));
|
||||
pthread_mutex_lock(&space->lock);
|
||||
int old = large_object_space_is_marked(space, ref);
|
||||
pthread_mutex_unlock(&space->lock);
|
||||
return old;
|
||||
}
|
||||
|
||||
static int
|
||||
large_object_space_remember_edge(struct large_object_space *space,
|
||||
struct gc_ref obj,
|
||||
struct gc_edge edge) {
|
||||
GC_ASSERT(large_object_space_contains(space, obj));
|
||||
if (!large_object_space_is_survivor(space, obj))
|
||||
return 0;
|
||||
|
||||
uintptr_t edge_addr = gc_edge_address(edge);
|
||||
int remembered = 0;
|
||||
pthread_mutex_lock(&space->remembered_edges_lock);
|
||||
if (!address_set_contains(&space->remembered_edges, edge_addr)) {
|
||||
address_set_add(&space->remembered_edges, edge_addr);
|
||||
remembered = 1;
|
||||
}
|
||||
pthread_mutex_unlock(&space->remembered_edges_lock);
|
||||
return remembered;
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_forget_edge(struct large_object_space *space,
|
||||
struct gc_edge edge) {
|
||||
uintptr_t edge_addr = gc_edge_address(edge);
|
||||
pthread_mutex_lock(&space->remembered_edges_lock);
|
||||
GC_ASSERT(address_set_contains(&space->remembered_edges, edge_addr));
|
||||
address_set_remove(&space->remembered_edges, edge_addr);
|
||||
pthread_mutex_unlock(&space->remembered_edges_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_clear_remembered_edges(struct large_object_space *space) {
|
||||
address_set_clear(&space->remembered_edges);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_add_to_freelist(struct large_object_space *space,
|
||||
struct large_object_node *node) {
|
||||
node->value.is_live = 0;
|
||||
struct large_object_dead_data *data = &node->value.dead;
|
||||
memset(data, 0, sizeof(*data));
|
||||
data->age = 0;
|
||||
struct large_object_node **bucket =
|
||||
large_object_freelist_bucket(&space->quarantine, node->key.size);
|
||||
data->next = *bucket;
|
||||
if (data->next)
|
||||
data->next->value.dead.prev = &data->next;
|
||||
data->prev = bucket;
|
||||
*bucket = node;
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_remove_from_freelist(struct large_object_space *space,
|
||||
struct large_object_node *node) {
|
||||
GC_ASSERT(!node->value.is_live);
|
||||
struct large_object_dead_data *dead = &node->value.dead;
|
||||
GC_ASSERT(dead->prev);
|
||||
if (dead->next)
|
||||
dead->next->value.dead.prev = dead->prev;
|
||||
*dead->prev = dead->next;
|
||||
dead->prev = NULL;
|
||||
dead->next = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_sweep_one(uintptr_t addr, uintptr_t node_bits,
|
||||
void *data) {
|
||||
struct large_object_space *space = data;
|
||||
struct large_object_node *node = (struct large_object_node*) node_bits;
|
||||
if (!node->value.is_live)
|
||||
return;
|
||||
GC_ASSERT(node->value.is_live);
|
||||
uint8_t mark = atomic_load_explicit(large_object_node_mark_loc(node),
|
||||
memory_order_acquire);
|
||||
if (mark != space->marked)
|
||||
large_object_space_add_to_freelist(space, node);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_process_quarantine(void *data) {
|
||||
struct large_object_space *space = data;
|
||||
pthread_mutex_lock(&space->lock);
|
||||
pthread_mutex_lock(&space->object_tree_lock);
|
||||
for (size_t idx = 0; idx < large_object_freelist_num_size_classes(); idx++) {
|
||||
struct large_object_node **link = &space->quarantine.buckets[idx];
|
||||
for (struct large_object_node *node = *link; node; node = *link) {
|
||||
GC_ASSERT(!node->value.is_live);
|
||||
if (++node->value.dead.age < 2) {
|
||||
link = &node->value.dead.next;
|
||||
} else {
|
||||
struct large_object obj = node->key;
|
||||
large_object_space_remove_from_freelist(space, node);
|
||||
address_map_remove(&space->object_map, obj.addr);
|
||||
large_object_tree_remove(&space->object_tree, obj.addr);
|
||||
gc_platform_release_memory((void*)obj.addr, obj.size);
|
||||
}
|
||||
}
|
||||
}
|
||||
pthread_mutex_unlock(&space->object_tree_lock);
|
||||
pthread_mutex_unlock(&space->lock);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_finish_gc(struct large_object_space *space,
|
||||
int is_minor_gc) {
|
||||
if (GC_GENERATIONAL) {
|
||||
address_map_for_each(is_minor_gc ? &space->nursery : &space->object_map,
|
||||
large_object_space_sweep_one,
|
||||
space);
|
||||
address_map_clear(&space->nursery);
|
||||
} else {
|
||||
address_map_for_each(&space->object_map,
|
||||
large_object_space_sweep_one,
|
||||
space);
|
||||
}
|
||||
size_t free_pages =
|
||||
space->total_pages - space->live_pages_at_last_collection;
|
||||
space->pages_freed_by_last_collection = free_pages - space->free_pages;
|
||||
space->free_pages = free_pages;
|
||||
pthread_mutex_unlock(&space->lock);
|
||||
if (space->synchronous_release)
|
||||
large_object_space_process_quarantine(space);
|
||||
}
|
||||
|
||||
static void
|
||||
large_object_space_add_to_allocation_counter(struct large_object_space *space,
|
||||
uint64_t *counter) {
|
||||
size_t pages = space->total_pages - space->free_pages;
|
||||
pages -= space->live_pages_at_last_collection;
|
||||
*counter += pages << space->page_size_log2;
|
||||
}
|
||||
|
||||
static inline struct gc_ref
|
||||
large_object_space_mark_conservative_ref(struct large_object_space *space,
|
||||
struct gc_conservative_ref ref,
|
||||
int possibly_interior) {
|
||||
uintptr_t addr = gc_conservative_ref_value(ref);
|
||||
|
||||
if (!possibly_interior) {
|
||||
// Addr not aligned on page boundary? Not a large object.
|
||||
// Otherwise strip the displacement to obtain the true base address.
|
||||
uintptr_t displacement = addr & (space->page_size - 1);
|
||||
if (!gc_is_valid_conservative_ref_displacement(displacement))
|
||||
return gc_ref_null();
|
||||
addr -= displacement;
|
||||
}
|
||||
|
||||
struct large_object_node *node;
|
||||
if (possibly_interior) {
|
||||
pthread_mutex_lock(&space->object_tree_lock);
|
||||
node = large_object_tree_lookup(&space->object_tree, addr);
|
||||
pthread_mutex_unlock(&space->object_tree_lock);
|
||||
} else {
|
||||
node = large_object_space_lookup(space, gc_ref(addr));
|
||||
}
|
||||
|
||||
if (node && node->value.is_live &&
|
||||
large_object_space_mark(space, gc_ref(node->key.addr)))
|
||||
return gc_ref(node->key.addr);
|
||||
|
||||
return gc_ref_null();
|
||||
}
|
||||
|
||||
static void*
|
||||
large_object_space_alloc(struct large_object_space *space, size_t npages,
|
||||
enum gc_trace_kind trace) {
|
||||
void *ret = NULL;
|
||||
pthread_mutex_lock(&space->lock);
|
||||
|
||||
size_t size = npages << space->page_size_log2;
|
||||
for (size_t idx = large_object_freelist_size_class(size);
|
||||
idx < large_object_freelist_num_size_classes();
|
||||
idx++) {
|
||||
struct large_object_node *node = space->quarantine.buckets[idx];
|
||||
while (node && node->key.size < size)
|
||||
node = node->value.dead.next;
|
||||
if (node) {
|
||||
// We found a suitable hole in quarantine. Unlink it from the
|
||||
// freelist.
|
||||
large_object_space_remove_from_freelist(space, node);
|
||||
|
||||
// Mark the hole as live.
|
||||
node->value.is_live = 1;
|
||||
memset(&node->value.live, 0, sizeof(node->value.live));
|
||||
node->value.live.mark = LARGE_OBJECT_NURSERY;
|
||||
node->value.live.trace = trace;
|
||||
|
||||
// If the hole is actually too big, trim its tail.
|
||||
if (node->key.size > size) {
|
||||
struct large_object tail = {node->key.addr + size, node->key.size - size};
|
||||
struct large_object_data tail_value = {0,};
|
||||
node->key.size = size;
|
||||
pthread_mutex_lock(&space->object_tree_lock);
|
||||
struct large_object_node *tail_node =
|
||||
large_object_tree_insert(&space->object_tree, tail, tail_value);
|
||||
pthread_mutex_unlock(&space->object_tree_lock);
|
||||
uintptr_t tail_node_bits = (uintptr_t)tail_node;
|
||||
address_map_add(&space->object_map, tail_node->key.addr,
|
||||
tail_node_bits);
|
||||
large_object_space_add_to_freelist(space, tail_node);
|
||||
}
|
||||
|
||||
// Add the object to the nursery.
|
||||
if (GC_GENERATIONAL)
|
||||
address_map_add(&space->nursery, node->key.addr, (uintptr_t)node);
|
||||
|
||||
space->free_pages -= npages;
|
||||
ret = (void*)node->key.addr;
|
||||
memset(ret, 0, size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't find anything in the quarantine, get fresh pages from the OS.
|
||||
if (!ret) {
|
||||
ret = gc_platform_acquire_memory(size, 0);
|
||||
if (ret) {
|
||||
uintptr_t addr = (uintptr_t)ret;
|
||||
struct large_object k = { addr, size };
|
||||
struct large_object_data v = {0,};
|
||||
v.is_live = 1;
|
||||
v.live.mark = LARGE_OBJECT_NURSERY;
|
||||
v.live.trace = trace;
|
||||
|
||||
pthread_mutex_lock(&space->object_tree_lock);
|
||||
struct large_object_node *node =
|
||||
large_object_tree_insert(&space->object_tree, k, v);
|
||||
uintptr_t node_bits = (uintptr_t)node;
|
||||
address_map_add(&space->object_map, addr, node_bits);
|
||||
space->total_pages += npages;
|
||||
pthread_mutex_unlock(&space->object_tree_lock);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&space->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
large_object_space_init(struct large_object_space *space,
|
||||
struct gc_heap *heap,
|
||||
struct gc_background_thread *thread) {
|
||||
memset(space, 0, sizeof(*space));
|
||||
pthread_mutex_init(&space->lock, NULL);
|
||||
pthread_mutex_init(&space->object_tree_lock, NULL);
|
||||
pthread_mutex_init(&space->remembered_edges_lock, NULL);
|
||||
|
||||
space->page_size = getpagesize();
|
||||
space->page_size_log2 = __builtin_ctz(space->page_size);
|
||||
|
||||
space->marked = LARGE_OBJECT_MARK_0;
|
||||
|
||||
large_object_tree_init(&space->object_tree);
|
||||
address_map_init(&space->object_map);
|
||||
address_map_init(&space->nursery);
|
||||
large_object_freelist_init(&space->quarantine);
|
||||
|
||||
address_set_init(&space->remembered_edges);
|
||||
|
||||
if (thread)
|
||||
gc_background_thread_add_task(thread, GC_BACKGROUND_TASK_START,
|
||||
large_object_space_process_quarantine,
|
||||
space);
|
||||
else
|
||||
space->synchronous_release = 1;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // LARGE_OBJECT_SPACE_H
|
59
libguile/whippet/src/local-worklist.h
Normal file
59
libguile/whippet/src/local-worklist.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
#ifndef LOCAL_WORKLIST_H
|
||||
#define LOCAL_WORKLIST_H
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
#define LOCAL_WORKLIST_SIZE 1024
|
||||
#define LOCAL_WORKLIST_MASK (LOCAL_WORKLIST_SIZE - 1)
|
||||
#define LOCAL_WORKLIST_SHARE_AMOUNT (LOCAL_WORKLIST_SIZE * 3 / 4)
|
||||
struct local_worklist {
|
||||
size_t read;
|
||||
size_t write;
|
||||
struct gc_ref data[LOCAL_WORKLIST_SIZE];
|
||||
};
|
||||
|
||||
static inline void
|
||||
local_worklist_init(struct local_worklist *q) {
|
||||
q->read = q->write = 0;
|
||||
}
|
||||
static inline void
|
||||
local_worklist_poison(struct local_worklist *q) {
|
||||
q->read = 0; q->write = LOCAL_WORKLIST_SIZE;
|
||||
}
|
||||
static inline size_t
|
||||
local_worklist_size(struct local_worklist *q) {
|
||||
return q->write - q->read;
|
||||
}
|
||||
static inline int
|
||||
local_worklist_empty(struct local_worklist *q) {
|
||||
return local_worklist_size(q) == 0;
|
||||
}
|
||||
static inline int
|
||||
local_worklist_full(struct local_worklist *q) {
|
||||
return local_worklist_size(q) >= LOCAL_WORKLIST_SIZE;
|
||||
}
|
||||
static inline void
|
||||
local_worklist_push(struct local_worklist *q, struct gc_ref v) {
|
||||
ASSERT(!local_worklist_full(q));
|
||||
q->data[q->write++ & LOCAL_WORKLIST_MASK] = v;
|
||||
}
|
||||
static inline struct gc_ref
|
||||
local_worklist_pop(struct local_worklist *q) {
|
||||
ASSERT(!local_worklist_empty(q));
|
||||
return q->data[q->read++ & LOCAL_WORKLIST_MASK];
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
local_worklist_pop_many(struct local_worklist *q, struct gc_ref **objv,
|
||||
size_t limit) {
|
||||
size_t avail = local_worklist_size(q);
|
||||
size_t read = q->read & LOCAL_WORKLIST_MASK;
|
||||
size_t contig = LOCAL_WORKLIST_SIZE - read;
|
||||
if (contig < avail) avail = contig;
|
||||
if (limit < avail) avail = limit;
|
||||
*objv = q->data + read;
|
||||
q->read += avail;
|
||||
return avail;
|
||||
}
|
||||
|
||||
#endif // LOCAL_WORKLIST_H
|
1266
libguile/whippet/src/mmc.c
Normal file
1266
libguile/whippet/src/mmc.c
Normal file
File diff suppressed because it is too large
Load diff
1944
libguile/whippet/src/nofl-space.h
Normal file
1944
libguile/whippet/src/nofl-space.h
Normal file
File diff suppressed because it is too large
Load diff
433
libguile/whippet/src/parallel-tracer.h
Normal file
433
libguile/whippet/src/parallel-tracer.h
Normal file
|
@ -0,0 +1,433 @@
|
|||
#ifndef PARALLEL_TRACER_H
|
||||
#define PARALLEL_TRACER_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "debug.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-tracepoint.h"
|
||||
#include "local-worklist.h"
|
||||
#include "root-worklist.h"
|
||||
#include "shared-worklist.h"
|
||||
#include "spin.h"
|
||||
#include "tracer.h"
|
||||
|
||||
#ifdef VERBOSE_LOGGING
|
||||
#define LOG(...) fprintf (stderr, "LOG: " __VA_ARGS__)
|
||||
#else
|
||||
#define LOG(...) do { } while (0)
|
||||
#endif
|
||||
|
||||
enum trace_worker_state {
|
||||
TRACE_WORKER_STOPPED,
|
||||
TRACE_WORKER_IDLE,
|
||||
TRACE_WORKER_TRACING,
|
||||
TRACE_WORKER_STOPPING,
|
||||
TRACE_WORKER_DEAD
|
||||
};
|
||||
|
||||
struct gc_heap;
|
||||
struct gc_trace_worker {
|
||||
struct gc_heap *heap;
|
||||
struct gc_tracer *tracer;
|
||||
size_t id;
|
||||
size_t steal_id;
|
||||
pthread_t thread;
|
||||
enum trace_worker_state state;
|
||||
pthread_mutex_t lock;
|
||||
struct shared_worklist shared;
|
||||
struct local_worklist local;
|
||||
struct gc_trace_worker_data *data;
|
||||
};
|
||||
|
||||
static inline struct gc_trace_worker_data*
|
||||
gc_trace_worker_data(struct gc_trace_worker *worker) {
|
||||
return worker->data;
|
||||
}
|
||||
|
||||
#define TRACE_WORKERS_MAX_COUNT 8
|
||||
|
||||
struct gc_tracer {
|
||||
struct gc_heap *heap;
|
||||
atomic_size_t active_tracers;
|
||||
size_t worker_count;
|
||||
long epoch;
|
||||
pthread_mutex_t lock;
|
||||
pthread_cond_t cond;
|
||||
int trace_roots_only;
|
||||
struct root_worklist roots;
|
||||
struct gc_trace_worker workers[TRACE_WORKERS_MAX_COUNT];
|
||||
};
|
||||
|
||||
static int
|
||||
trace_worker_init(struct gc_trace_worker *worker, struct gc_heap *heap,
|
||||
struct gc_tracer *tracer, size_t id) {
|
||||
worker->heap = heap;
|
||||
worker->tracer = tracer;
|
||||
worker->id = id;
|
||||
worker->steal_id = 0;
|
||||
worker->thread = 0;
|
||||
worker->state = TRACE_WORKER_STOPPED;
|
||||
pthread_mutex_init(&worker->lock, NULL);
|
||||
worker->data = NULL;
|
||||
local_worklist_init(&worker->local);
|
||||
return shared_worklist_init(&worker->shared);
|
||||
}
|
||||
|
||||
static void trace_worker_trace(struct gc_trace_worker *worker);
|
||||
|
||||
static void*
|
||||
trace_worker_thread(void *data) {
|
||||
struct gc_trace_worker *worker = data;
|
||||
struct gc_tracer *tracer = worker->tracer;
|
||||
long trace_epoch = 0;
|
||||
|
||||
pthread_mutex_lock(&worker->lock);
|
||||
while (1) {
|
||||
long epoch = atomic_load_explicit(&tracer->epoch, memory_order_acquire);
|
||||
if (trace_epoch != epoch) {
|
||||
trace_epoch = epoch;
|
||||
trace_worker_trace(worker);
|
||||
}
|
||||
pthread_cond_wait(&tracer->cond, &worker->lock);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
trace_worker_spawn(struct gc_trace_worker *worker) {
|
||||
if (pthread_create(&worker->thread, NULL, trace_worker_thread, worker)) {
|
||||
perror("spawning tracer thread failed");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
gc_tracer_init(struct gc_tracer *tracer, struct gc_heap *heap,
|
||||
size_t parallelism) {
|
||||
tracer->heap = heap;
|
||||
atomic_init(&tracer->active_tracers, 0);
|
||||
tracer->epoch = 0;
|
||||
tracer->trace_roots_only = 0;
|
||||
pthread_mutex_init(&tracer->lock, NULL);
|
||||
pthread_cond_init(&tracer->cond, NULL);
|
||||
root_worklist_init(&tracer->roots);
|
||||
size_t desired_worker_count = parallelism;
|
||||
ASSERT(desired_worker_count);
|
||||
if (desired_worker_count > TRACE_WORKERS_MAX_COUNT)
|
||||
desired_worker_count = TRACE_WORKERS_MAX_COUNT;
|
||||
if (!trace_worker_init(&tracer->workers[0], heap, tracer, 0))
|
||||
return 0;
|
||||
tracer->worker_count++;
|
||||
for (size_t i = 1; i < desired_worker_count; i++) {
|
||||
if (!trace_worker_init(&tracer->workers[i], heap, tracer, i))
|
||||
break;
|
||||
pthread_mutex_lock(&tracer->workers[i].lock);
|
||||
if (trace_worker_spawn(&tracer->workers[i]))
|
||||
tracer->worker_count++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void gc_tracer_prepare(struct gc_tracer *tracer) {
|
||||
for (size_t i = 0; i < tracer->worker_count; i++)
|
||||
tracer->workers[i].steal_id = (i + 1) % tracer->worker_count;
|
||||
}
|
||||
static void gc_tracer_release(struct gc_tracer *tracer) {
|
||||
for (size_t i = 0; i < tracer->worker_count; i++)
|
||||
shared_worklist_release(&tracer->workers[i].shared);
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_tracer_add_root(struct gc_tracer *tracer, struct gc_root root) {
|
||||
root_worklist_push(&tracer->roots, root);
|
||||
}
|
||||
|
||||
static inline void
|
||||
tracer_unpark_all_workers(struct gc_tracer *tracer) {
|
||||
long old_epoch =
|
||||
atomic_fetch_add_explicit(&tracer->epoch, 1, memory_order_acq_rel);
|
||||
long epoch = old_epoch + 1;
|
||||
DEBUG("starting trace; %zu workers; epoch=%ld\n", tracer->worker_count,
|
||||
epoch);
|
||||
GC_TRACEPOINT(trace_unpark_all);
|
||||
pthread_cond_broadcast(&tracer->cond);
|
||||
}
|
||||
|
||||
static inline void
|
||||
tracer_maybe_unpark_workers(struct gc_tracer *tracer) {
|
||||
size_t active =
|
||||
atomic_load_explicit(&tracer->active_tracers, memory_order_acquire);
|
||||
if (active < tracer->worker_count)
|
||||
tracer_unpark_all_workers(tracer);
|
||||
}
|
||||
|
||||
static inline void
|
||||
tracer_share(struct gc_trace_worker *worker) {
|
||||
LOG("tracer #%zu: sharing\n", worker->id);
|
||||
GC_TRACEPOINT(trace_share);
|
||||
size_t to_share = LOCAL_WORKLIST_SHARE_AMOUNT;
|
||||
while (to_share) {
|
||||
struct gc_ref *objv;
|
||||
size_t count = local_worklist_pop_many(&worker->local, &objv, to_share);
|
||||
shared_worklist_push_many(&worker->shared, objv, count);
|
||||
to_share -= count;
|
||||
}
|
||||
tracer_maybe_unpark_workers(worker->tracer);
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_trace_worker_enqueue(struct gc_trace_worker *worker, struct gc_ref ref) {
|
||||
ASSERT(gc_ref_is_heap_object(ref));
|
||||
if (local_worklist_full(&worker->local))
|
||||
tracer_share(worker);
|
||||
local_worklist_push(&worker->local, ref);
|
||||
}
|
||||
|
||||
static struct gc_ref
|
||||
tracer_steal_from_worker(struct gc_tracer *tracer, size_t id) {
|
||||
ASSERT(id < tracer->worker_count);
|
||||
return shared_worklist_steal(&tracer->workers[id].shared);
|
||||
}
|
||||
|
||||
static int
|
||||
tracer_can_steal_from_worker(struct gc_tracer *tracer, size_t id) {
|
||||
ASSERT(id < tracer->worker_count);
|
||||
return shared_worklist_can_steal(&tracer->workers[id].shared);
|
||||
}
|
||||
|
||||
static struct gc_ref
|
||||
trace_worker_steal_from_any(struct gc_trace_worker *worker,
|
||||
struct gc_tracer *tracer) {
|
||||
for (size_t i = 0; i < tracer->worker_count; i++) {
|
||||
LOG("tracer #%zu: stealing from #%zu\n", worker->id, worker->steal_id);
|
||||
struct gc_ref obj = tracer_steal_from_worker(tracer, worker->steal_id);
|
||||
if (!gc_ref_is_null(obj)) {
|
||||
LOG("tracer #%zu: stealing got %p\n", worker->id,
|
||||
gc_ref_heap_object(obj));
|
||||
return obj;
|
||||
}
|
||||
worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
|
||||
}
|
||||
LOG("tracer #%zu: failed to steal\n", worker->id);
|
||||
return gc_ref_null();
|
||||
}
|
||||
|
||||
static int
|
||||
trace_worker_can_steal_from_any(struct gc_trace_worker *worker,
|
||||
struct gc_tracer *tracer) {
|
||||
LOG("tracer #%zu: checking if any worker has tasks\n", worker->id);
|
||||
for (size_t i = 0; i < tracer->worker_count; i++) {
|
||||
int res = tracer_can_steal_from_worker(tracer, worker->steal_id);
|
||||
if (res) {
|
||||
LOG("tracer #%zu: worker #%zu has tasks!\n", worker->id,
|
||||
worker->steal_id);
|
||||
return 1;
|
||||
}
|
||||
worker->steal_id = (worker->steal_id + 1) % tracer->worker_count;
|
||||
}
|
||||
LOG("tracer #%zu: nothing to steal\n", worker->id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t
|
||||
trace_worker_should_continue(struct gc_trace_worker *worker, size_t spin_count) {
|
||||
// Helper workers should park themselves immediately if they have no work.
|
||||
if (worker->id != 0)
|
||||
return 0;
|
||||
|
||||
struct gc_tracer *tracer = worker->tracer;
|
||||
|
||||
if (atomic_load_explicit(&tracer->active_tracers, memory_order_acquire) != 1) {
|
||||
LOG("checking for termination: tracers active, spinning #%zu\n", spin_count);
|
||||
yield_for_spin(spin_count);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// All trace workers have exited except us, the main worker. We are
|
||||
// probably done, but we need to synchronize to be sure that there is no
|
||||
// work pending, for example if a worker had a spurious wakeup. Skip
|
||||
// worker 0 (the main worker).
|
||||
|
||||
GC_TRACEPOINT(trace_check_termination_begin);
|
||||
size_t locked = 1;
|
||||
while (locked < tracer->worker_count) {
|
||||
if (pthread_mutex_trylock(&tracer->workers[locked].lock) == 0)
|
||||
locked++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
int done = (locked == tracer->worker_count) &&
|
||||
!trace_worker_can_steal_from_any(worker, tracer);
|
||||
GC_TRACEPOINT(trace_check_termination_end);
|
||||
|
||||
if (done)
|
||||
return 0;
|
||||
while (locked > 1)
|
||||
pthread_mutex_unlock(&tracer->workers[--locked].lock);
|
||||
|
||||
LOG("checking for termination: failed to lock, spinning #%zu\n", spin_count);
|
||||
yield_for_spin(spin_count);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct gc_ref
|
||||
trace_worker_steal(struct gc_trace_worker *worker) {
|
||||
struct gc_tracer *tracer = worker->tracer;
|
||||
|
||||
// It could be that the worker's local trace queue has simply
|
||||
// overflowed. In that case avoid contention by trying to pop
|
||||
// something from the worker's own queue.
|
||||
{
|
||||
LOG("tracer #%zu: trying to pop worker's own deque\n", worker->id);
|
||||
struct gc_ref obj = shared_worklist_try_pop(&worker->shared);
|
||||
if (!gc_ref_is_null(obj))
|
||||
return obj;
|
||||
}
|
||||
|
||||
GC_TRACEPOINT(trace_steal);
|
||||
LOG("tracer #%zu: trying to steal\n", worker->id);
|
||||
struct gc_ref obj = trace_worker_steal_from_any(worker, tracer);
|
||||
if (!gc_ref_is_null(obj))
|
||||
return obj;
|
||||
|
||||
return gc_ref_null();
|
||||
}
|
||||
|
||||
static void
|
||||
trace_with_data(struct gc_tracer *tracer,
|
||||
struct gc_heap *heap,
|
||||
struct gc_trace_worker *worker,
|
||||
struct gc_trace_worker_data *data) {
|
||||
atomic_fetch_add_explicit(&tracer->active_tracers, 1, memory_order_acq_rel);
|
||||
worker->data = data;
|
||||
|
||||
LOG("tracer #%zu: running trace loop\n", worker->id);
|
||||
|
||||
{
|
||||
LOG("tracer #%zu: tracing roots\n", worker->id);
|
||||
size_t n = 0;
|
||||
do {
|
||||
struct gc_root root = root_worklist_pop(&tracer->roots);
|
||||
if (root.kind == GC_ROOT_KIND_NONE)
|
||||
break;
|
||||
trace_root(root, heap, worker);
|
||||
n++;
|
||||
} while (1);
|
||||
|
||||
LOG("tracer #%zu: done tracing roots, %zu roots traced\n", worker->id, n);
|
||||
}
|
||||
|
||||
if (tracer->trace_roots_only) {
|
||||
// Unlike the full trace where work is generated during the trace, a
|
||||
// roots-only trace consumes work monotonically; any object enqueued as a
|
||||
// result of marking roots isn't ours to deal with. However we do need to
|
||||
// synchronize with remote workers to ensure they have completed their
|
||||
// work items.
|
||||
if (worker->id == 0) {
|
||||
for (size_t i = 1; i < tracer->worker_count; i++)
|
||||
pthread_mutex_lock(&tracer->workers[i].lock);
|
||||
}
|
||||
} else {
|
||||
LOG("tracer #%zu: tracing objects\n", worker->id);
|
||||
GC_TRACEPOINT(trace_objects_begin);
|
||||
size_t n = 0;
|
||||
size_t spin_count = 0;
|
||||
do {
|
||||
while (1) {
|
||||
struct gc_ref ref;
|
||||
if (!local_worklist_empty(&worker->local)) {
|
||||
ref = local_worklist_pop(&worker->local);
|
||||
} else {
|
||||
ref = trace_worker_steal(worker);
|
||||
if (gc_ref_is_null(ref))
|
||||
break;
|
||||
}
|
||||
trace_one(ref, heap, worker);
|
||||
n++;
|
||||
}
|
||||
} while (trace_worker_should_continue(worker, spin_count++));
|
||||
GC_TRACEPOINT(trace_objects_end);
|
||||
|
||||
LOG("tracer #%zu: done tracing, %zu objects traced\n", worker->id, n);
|
||||
}
|
||||
|
||||
worker->data = NULL;
|
||||
atomic_fetch_sub_explicit(&tracer->active_tracers, 1, memory_order_acq_rel);
|
||||
}
|
||||
|
||||
static void
|
||||
trace_worker_trace(struct gc_trace_worker *worker) {
|
||||
GC_TRACEPOINT(trace_worker_begin);
|
||||
gc_trace_worker_call_with_data(trace_with_data, worker->tracer,
|
||||
worker->heap, worker);
|
||||
GC_TRACEPOINT(trace_worker_end);
|
||||
}
|
||||
|
||||
static inline int
|
||||
gc_tracer_should_parallelize(struct gc_tracer *tracer) {
|
||||
if (root_worklist_size(&tracer->roots) > 1)
|
||||
return 1;
|
||||
|
||||
if (tracer->trace_roots_only)
|
||||
return 0;
|
||||
|
||||
size_t nonempty_worklists = 0;
|
||||
ssize_t parallel_threshold =
|
||||
LOCAL_WORKLIST_SIZE - LOCAL_WORKLIST_SHARE_AMOUNT;
|
||||
for (size_t i = 0; i < tracer->worker_count; i++) {
|
||||
ssize_t size = shared_worklist_size(&tracer->workers[i].shared);
|
||||
if (!size)
|
||||
continue;
|
||||
nonempty_worklists++;
|
||||
if (nonempty_worklists > 1)
|
||||
return 1;
|
||||
if (size >= parallel_threshold)
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_tracer_trace(struct gc_tracer *tracer) {
|
||||
LOG("starting trace; %zu workers\n", tracer->worker_count);
|
||||
|
||||
for (int i = 1; i < tracer->worker_count; i++)
|
||||
pthread_mutex_unlock(&tracer->workers[i].lock);
|
||||
|
||||
if (gc_tracer_should_parallelize(tracer)) {
|
||||
LOG("waking workers\n");
|
||||
tracer_unpark_all_workers(tracer);
|
||||
} else {
|
||||
LOG("starting in local-only mode\n");
|
||||
}
|
||||
|
||||
trace_worker_trace(&tracer->workers[0]);
|
||||
root_worklist_reset(&tracer->roots);
|
||||
|
||||
LOG("trace finished\n");
|
||||
}
|
||||
|
||||
static inline void
|
||||
gc_tracer_trace_roots(struct gc_tracer *tracer) {
|
||||
LOG("starting roots-only trace\n");
|
||||
|
||||
GC_TRACEPOINT(trace_roots_begin);
|
||||
tracer->trace_roots_only = 1;
|
||||
gc_tracer_trace(tracer);
|
||||
tracer->trace_roots_only = 0;
|
||||
GC_TRACEPOINT(trace_roots_end);
|
||||
|
||||
GC_ASSERT_EQ(atomic_load(&tracer->active_tracers), 0);
|
||||
LOG("roots-only trace finished\n");
|
||||
}
|
||||
|
||||
#endif // PARALLEL_TRACER_H
|
1340
libguile/whippet/src/pcc.c
Normal file
1340
libguile/whippet/src/pcc.c
Normal file
File diff suppressed because it is too large
Load diff
76
libguile/whippet/src/root-worklist.h
Normal file
76
libguile/whippet/src/root-worklist.h
Normal file
|
@ -0,0 +1,76 @@
|
|||
#ifndef ROOT_WORKLIST_H
|
||||
#define ROOT_WORKLIST_H
|
||||
|
||||
#include <stdatomic.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "assert.h"
|
||||
#include "debug.h"
|
||||
#include "gc-inline.h"
|
||||
#include "gc-ref.h"
|
||||
#include "root.h"
|
||||
|
||||
// A single-producer, multiple-consumer worklist that has two phases:
|
||||
// one in which roots are added by the producer, then one in which roots
|
||||
// are consumed from the worklist. Roots are never added once the
|
||||
// consumer phase starts.
|
||||
struct root_worklist {
|
||||
size_t size;
|
||||
size_t read;
|
||||
size_t write;
|
||||
struct gc_root *buf;
|
||||
};
|
||||
|
||||
void
|
||||
root_worklist_alloc(struct root_worklist *q) {
|
||||
q->buf = realloc(q->buf, q->size * sizeof(struct gc_root));
|
||||
if (!q->buf) {
|
||||
perror("Failed to grow root worklist");
|
||||
GC_CRASH();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
root_worklist_init(struct root_worklist *q) {
|
||||
q->size = 16;
|
||||
q->read = 0;
|
||||
q->write = 0;
|
||||
q->buf = NULL;
|
||||
root_worklist_alloc(q);
|
||||
}
|
||||
|
||||
static inline void
|
||||
root_worklist_push(struct root_worklist *q, struct gc_root root) {
|
||||
if (UNLIKELY(q->write == q->size)) {
|
||||
q->size *= 2;
|
||||
root_worklist_alloc(q);
|
||||
}
|
||||
q->buf[q->write++] = root;
|
||||
}
|
||||
|
||||
// Not atomic.
|
||||
static inline size_t
|
||||
root_worklist_size(struct root_worklist *q) {
|
||||
return q->write - q->read;
|
||||
}
|
||||
|
||||
static inline struct gc_root
|
||||
root_worklist_pop(struct root_worklist *q) {
|
||||
size_t idx = atomic_fetch_add(&q->read, 1);
|
||||
if (idx < q->write)
|
||||
return q->buf[idx];
|
||||
return (struct gc_root){ GC_ROOT_KIND_NONE, };
|
||||
}
|
||||
|
||||
static void
|
||||
root_worklist_reset(struct root_worklist *q) {
|
||||
q->read = q->write = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
root_worklist_destroy(struct root_worklist *q) {
|
||||
free(q->buf);
|
||||
}
|
||||
|
||||
#endif // ROOT_WORKLIST_H
|
81
libguile/whippet/src/root.h
Normal file
81
libguile/whippet/src/root.h
Normal file
|
@ -0,0 +1,81 @@
|
|||
#ifndef ROOT_H
|
||||
#define ROOT_H
|
||||
|
||||
#include "gc-edge.h"
|
||||
#include "extents.h"
|
||||
|
||||
struct gc_ephemeron;
|
||||
struct gc_heap;
|
||||
struct gc_mutator;
|
||||
struct gc_edge_buffer;
|
||||
|
||||
enum gc_root_kind {
|
||||
GC_ROOT_KIND_NONE,
|
||||
GC_ROOT_KIND_HEAP,
|
||||
GC_ROOT_KIND_MUTATOR,
|
||||
GC_ROOT_KIND_CONSERVATIVE_EDGES,
|
||||
GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES,
|
||||
GC_ROOT_KIND_RESOLVED_EPHEMERONS,
|
||||
GC_ROOT_KIND_EDGE,
|
||||
GC_ROOT_KIND_EDGE_BUFFER,
|
||||
};
|
||||
|
||||
struct gc_root {
|
||||
enum gc_root_kind kind;
|
||||
union {
|
||||
struct gc_heap *heap;
|
||||
struct gc_mutator *mutator;
|
||||
struct gc_ephemeron *resolved_ephemerons;
|
||||
struct extent_range range;
|
||||
struct gc_edge edge;
|
||||
struct gc_edge_buffer *edge_buffer;
|
||||
};
|
||||
};
|
||||
|
||||
static inline struct gc_root
|
||||
gc_root_heap(struct gc_heap* heap) {
|
||||
struct gc_root ret = { GC_ROOT_KIND_HEAP };
|
||||
ret.heap = heap;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct gc_root
|
||||
gc_root_mutator(struct gc_mutator* mutator) {
|
||||
struct gc_root ret = { GC_ROOT_KIND_MUTATOR };
|
||||
ret.mutator = mutator;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct gc_root
|
||||
gc_root_conservative_edges(uintptr_t lo_addr, uintptr_t hi_addr,
|
||||
int possibly_interior) {
|
||||
enum gc_root_kind kind = possibly_interior
|
||||
? GC_ROOT_KIND_CONSERVATIVE_POSSIBLY_INTERIOR_EDGES
|
||||
: GC_ROOT_KIND_CONSERVATIVE_EDGES;
|
||||
struct gc_root ret = { kind };
|
||||
ret.range = (struct extent_range) {lo_addr, hi_addr};
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct gc_root
|
||||
gc_root_resolved_ephemerons(struct gc_ephemeron* resolved) {
|
||||
struct gc_root ret = { GC_ROOT_KIND_RESOLVED_EPHEMERONS };
|
||||
ret.resolved_ephemerons = resolved;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct gc_root
|
||||
gc_root_edge(struct gc_edge edge) {
|
||||
struct gc_root ret = { GC_ROOT_KIND_EDGE };
|
||||
ret.edge = edge;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct gc_root
|
||||
gc_root_edge_buffer(struct gc_edge_buffer *buf) {
|
||||
struct gc_root ret = { GC_ROOT_KIND_EDGE_BUFFER };
|
||||
ret.edge_buffer = buf;
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif // ROOT_H
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue