1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

Switch to use Whippet allocation fast paths

* libguile/Makefile.am (noinst_HEADERS, modinclude_HEADERS): Move
gc-inline.h to be a private header.
* libguile/gc-inline.h (scm_inline_gc_malloc_pointerless):
(scm_inline_gc_malloc): Use gc_allocate.
* libguile/intrinsics.c (allocate_words_with_freelist):
(allocate_pointerless_words_with_freelist): Remove these intrinsics.
Renumbers the intrinsics.
(scm_bootstrap_intrinsics):
* libguile/intrinsics.h (SCM_FOR_ALL_VM_INTRINSICS): Adapt to intrinsics
change.
* libguile/jit.c (emit_update_alloc_table):
(emit_allocate_bytes_fast_freelist):
(emit_allocate_words_slow): New helpers.
(compile_allocate_words):
(compile_allocate_words_immediate):
(compile_allocate_words_immediate_slow):
(compile_allocate_pointerless_words):
(compile_allocate_pointerless_words_immediate):
(compile_allocate_pointerless_words_immediate_slow): Use new helpers.
* libguile/threads.c (scm_trace_thread_mutator_roots):
(on_thread_exit):
* libguile/threads.h: Remove Guile-managed thread-local freelists.
This commit is contained in:
Andy Wingo 2025-04-22 13:44:44 +02:00
parent 7696344634
commit 0532602cd3
7 changed files with 93 additions and 185 deletions

View file

@ -539,6 +539,7 @@ noinst_HEADERS = custom-ports.h \
quicksort.i.c \
atomics-internal.h \
cache-internal.h \
gc-inline.h \
gc-internal.h \
posix-w32.h \
private-options.h \
@ -640,7 +641,6 @@ modinclude_HEADERS = \
fports.h \
frames.h \
gc.h \
gc-inline.h \
gettext.h \
generalized-vectors.h \
goops.h \

View file

@ -1,7 +1,7 @@
#ifndef SCM_GC_INLINE_H
#define SCM_GC_INLINE_H
/* Copyright 1995-1996,1998-2004,2006-2014,2018-2019
/* Copyright 1995-1996,1998-2004,2006-2014,2018-2019,2025
Free Software Foundation, Inc.
This file is part of Guile.
@ -40,75 +40,22 @@
#include "libguile/gc.h"
#include "libguile/bdw-gc.h"
#include "libguile/gc-internal.h"
#include "libguile/threads.h"
#include <gc/gc_inline.h> /* GC_generic_malloc_many */
static inline size_t
scm_inline_gc_bytes_to_freelist_index (size_t bytes)
{
return (bytes - 1U) / SCM_INLINE_GC_GRANULE_BYTES;
}
static inline size_t
scm_inline_gc_freelist_object_size (size_t idx)
{
return (idx + 1U) * SCM_INLINE_GC_GRANULE_BYTES;
}
/* The values of these must match the internal POINTERLESS and NORMAL
definitions in libgc, for which unfortunately there are no external
definitions. Alack. */
typedef enum scm_inline_gc_kind
{
SCM_INLINE_GC_KIND_POINTERLESS,
SCM_INLINE_GC_KIND_NORMAL
} scm_inline_gc_kind;
static inline void *
scm_inline_gc_alloc (void **freelist, size_t idx, scm_inline_gc_kind kind)
{
void *head = *freelist;
if (SCM_UNLIKELY (!head))
{
size_t bytes = scm_inline_gc_freelist_object_size (idx);
GC_generic_malloc_many (bytes, kind, freelist);
head = *freelist;
if (SCM_UNLIKELY (!head))
return (*GC_get_oom_fn ()) (bytes);
}
*freelist = *(void **)(head);
return head;
}
static inline void *
scm_inline_gc_malloc_pointerless (scm_thread *thread, size_t bytes)
{
size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes);
if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT))
return GC_malloc_atomic (bytes);
return scm_inline_gc_alloc
(&thread->pointerless_freelists[idx], idx, SCM_INLINE_GC_KIND_POINTERLESS);
return gc_allocate (thread->mutator, bytes,
GC_ALLOCATION_UNTAGGED_POINTERLESS);
}
static inline void *
scm_inline_gc_malloc (scm_thread *thread, size_t bytes)
{
size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes);
if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT))
return GC_malloc (bytes);
return scm_inline_gc_alloc
(&thread->freelists[idx], idx, SCM_INLINE_GC_KIND_NORMAL);
return gc_allocate (thread->mutator, bytes, GC_ALLOCATION_TAGGED);
}
static inline void *

View file

@ -1,4 +1,4 @@
/* Copyright 2018-2021, 2023
/* Copyright 2018-2021, 2023, 2025
Free Software Foundation, Inc.
This file is part of Guile.
@ -466,30 +466,12 @@ allocate_words (scm_thread *thread, size_t n)
return SCM_PACK_POINTER (scm_inline_gc_malloc_words (thread, n));
}
static SCM
allocate_words_with_freelist (scm_thread *thread, size_t freelist_idx)
{
return SCM_PACK_POINTER
(scm_inline_gc_alloc (&thread->freelists[freelist_idx],
freelist_idx,
SCM_INLINE_GC_KIND_NORMAL));
}
static SCM
allocate_pointerless_words (scm_thread *thread, size_t n)
{
return SCM_PACK_POINTER (scm_inline_gc_malloc_pointerless_words (thread, n));
}
static SCM
allocate_pointerless_words_with_freelist (scm_thread *thread, size_t freelist_idx)
{
return SCM_PACK_POINTER
(scm_inline_gc_alloc (&thread->pointerless_freelists[freelist_idx],
freelist_idx,
SCM_INLINE_GC_KIND_POINTERLESS));
}
static SCM
current_module (scm_thread *thread)
{
@ -641,7 +623,6 @@ scm_bootstrap_intrinsics (void)
scm_vm_intrinsics.allocate_words = allocate_words;
scm_vm_intrinsics.current_module = current_module;
scm_vm_intrinsics.push_prompt = push_prompt;
scm_vm_intrinsics.allocate_words_with_freelist = allocate_words_with_freelist;
scm_vm_intrinsics.abs = scm_abs;
scm_vm_intrinsics.sqrt = scm_sqrt;
scm_vm_intrinsics.fabs = fabs;
@ -665,8 +646,6 @@ scm_bootstrap_intrinsics (void)
scm_vm_intrinsics.fatan = atan;
scm_vm_intrinsics.fatan2 = atan2;
scm_vm_intrinsics.allocate_pointerless_words = allocate_pointerless_words;
scm_vm_intrinsics.allocate_pointerless_words_with_freelist =
allocate_pointerless_words_with_freelist;
scm_vm_intrinsics.inexact = scm_exact_to_inexact;
scm_vm_intrinsics.string_to_utf8 = scm_string_to_utf8;
scm_vm_intrinsics.string_utf8_length = INT64_INTRINSIC (string_utf8_length);

View file

@ -1,4 +1,4 @@
/* Copyright 2018-2021, 2023
/* Copyright 2018-2021, 2023, 2025
Free Software Foundation, Inc.
This file is part of Guile.
@ -173,7 +173,6 @@ typedef void (*scm_t_scm_uimm_scm_intrinsic) (SCM, uint8_t, SCM);
M(thread_u8_scm_sp_vra_mra, push_prompt, "push-prompt", PUSH_PROMPT) \
M(thread_scm, unpack_values_object, "unpack-values-object", UNPACK_VALUES_OBJECT) \
M(vcode, handle_interrupt_code, "%handle-interrupt-code", HANDLE_INTERRUPT_CODE) \
M(scm_from_thread_sz, allocate_words_with_freelist, "allocate-words/freelist", ALLOCATE_WORDS_WITH_FREELIST) \
M(scm_from_scm, abs, "abs", ABS) \
M(scm_from_scm, sqrt, "sqrt", SQRT) \
M(f64_from_f64, fabs, "fabs", FABS) \
@ -197,7 +196,6 @@ typedef void (*scm_t_scm_uimm_scm_intrinsic) (SCM, uint8_t, SCM);
M(f64_from_f64, fatan, "fatan", FATAN) \
M(f64_from_f64_f64, fatan2, "fatan2", FATAN2) \
M(scm_from_thread_sz, allocate_pointerless_words, "allocate-pointerless-words", ALLOCATE_POINTERLESS_WORDS) \
M(scm_from_thread_sz, allocate_pointerless_words_with_freelist, "allocate-pointerless-words/freelist", ALLOCATE_POINTERLESS_WORDS_WITH_FREELIST) \
M(scm_from_scm, inexact, "inexact", INEXACT) \
M(f64_from_s64, s64_to_f64, "s64->f64", S64_TO_F64) \
M(scm_from_scm, car, "$car", CAR) \

View file

@ -2285,18 +2285,60 @@ compile_bind_optionals_slow (scm_jit_state *j, uint32_t nlocals)
{
}
static void
emit_update_alloc_table(scm_jit_state *j, jit_gpr_t obj, size_t size,
enum gc_allocation_kind kind)
{
size_t alignment = gc_allocator_alloc_table_alignment();
if (!alignment) return;
DIE ("allocation table unimplemented");
}
static inline void
emit_allocate_bytes_fast_freelist (scm_jit_state *j, jit_gpr_t dst, size_t bytes,
enum gc_allocation_kind kind,
jit_gpr_t tmp1, jit_gpr_t tmp2)
{
jit_gpr_t mut = tmp1;
emit_ldxi (j, mut, THREAD, offsetof(struct scm_thread, mutator));
size_t offset = gc_allocator_freelist_offset (bytes, kind);
emit_ldxi (j, dst, mut, offset);
add_slow_path_patch (j, jit_beqi (j->jit, dst, 0));
jit_gpr_t new_freelist = tmp2;
emit_ldr (j, new_freelist, dst);
jit_stxi (j->jit, offset, mut, new_freelist);
emit_update_alloc_table(j, dst, bytes, kind);
}
static inline void
emit_allocate_words_slow (scm_jit_state *j, jit_gpr_t res, jit_operand_t nwords,
enum gc_allocation_kind kind)
{
emit_store_current_ip (j, res);
SCM (*intrinsic)(struct scm_thread *, size_t);
switch (kind)
{
case GC_ALLOCATION_TAGGED:
intrinsic = scm_vm_intrinsics.allocate_words;
break;
case GC_ALLOCATION_UNTAGGED_POINTERLESS:
intrinsic = scm_vm_intrinsics.allocate_pointerless_words;
break;
default:
DIE ("unknown allocation kind");
}
emit_call_2 (j, intrinsic, thread_operand(), nwords);
emit_retval (j, res);
emit_reload_sp (j);
}
static void
compile_allocate_words (scm_jit_state *j, uint32_t dst, uint32_t nwords)
{
jit_gpr_t t = T0;
emit_store_current_ip (j, t);
emit_call_2 (j, scm_vm_intrinsics.allocate_words, thread_operand (),
sp_sz_operand (j, nwords));
emit_retval (j, t);
record_gpr_clobber (j, t);
emit_reload_sp (j);
emit_sp_set_scm (j, dst, t);
emit_allocate_words_slow (j, T0, sp_sz_operand (j, nwords),
GC_ALLOCATION_TAGGED);
emit_sp_set_scm (j, dst, T0);
}
static void
compile_allocate_words_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords)
@ -2307,49 +2349,30 @@ static void
compile_allocate_words_immediate (scm_jit_state *j, uint32_t dst, uint32_t nwords)
{
size_t bytes = nwords * sizeof(SCM);
size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes);
if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT))
{
jit_gpr_t t = T0;
emit_store_current_ip (j, t);
emit_call_1 (j, GC_malloc, jit_operand_imm (JIT_OPERAND_ABI_WORD, bytes));
emit_retval (j, t);
emit_reload_sp (j);
emit_sp_set_scm (j, dst, t);
}
if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST &&
bytes <= gc_allocator_large_threshold ())
emit_allocate_bytes_fast_freelist (j, T0, bytes, GC_ALLOCATION_TAGGED,
T1, T2);
else
{
jit_gpr_t res = T0;
ptrdiff_t offset = offsetof(struct scm_thread, freelists);
offset += idx * sizeof(void*);
emit_ldxi (j, res, THREAD, offset);
add_slow_path_patch (j, jit_beqi (j->jit, res, 0));
jit_gpr_t new_freelist = T1;
emit_ldr (j, new_freelist, res);
jit_stxi (j->jit, offset, THREAD, new_freelist);
emit_sp_set_scm (j, dst, res);
}
emit_allocate_words_slow (j, T0,
jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords),
GC_ALLOCATION_TAGGED);
emit_sp_set_scm (j, dst, T0);
}
static void
compile_allocate_words_immediate_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords)
{
size_t bytes = nwords * sizeof(SCM);
size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes);
if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT))
/* Only emit a slow path if there is a fast path. */
if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST &&
bytes <= gc_allocator_large_threshold ())
{
}
else
{
jit_gpr_t res = T0;
emit_store_current_ip (j, res);
emit_call_2 (j, scm_vm_intrinsics.allocate_words_with_freelist,
thread_operand (),
jit_operand_imm (JIT_OPERAND_ABI_WORD, idx));
emit_retval (j, res);
emit_reload_sp (j);
emit_sp_set_scm (j, dst, res);
emit_allocate_words_slow (j, T0,
jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords),
GC_ALLOCATION_TAGGED);
emit_sp_set_scm (j, dst, T0);
continue_after_slow_path (j, j->next_ip);
}
}
@ -2357,15 +2380,9 @@ compile_allocate_words_immediate_slow (scm_jit_state *j, uint32_t dst, uint32_t
static void
compile_allocate_pointerless_words (scm_jit_state *j, uint32_t dst, uint32_t nwords)
{
jit_gpr_t t = T0;
emit_store_current_ip (j, t);
emit_call_2 (j, scm_vm_intrinsics.allocate_pointerless_words, thread_operand (),
sp_sz_operand (j, nwords));
emit_retval (j, t);
record_gpr_clobber (j, t);
emit_reload_sp (j);
emit_sp_set_scm (j, dst, t);
emit_allocate_words_slow (j, T0, sp_sz_operand (j, nwords),
GC_ALLOCATION_UNTAGGED_POINTERLESS);
emit_sp_set_scm (j, dst, T0);
}
static void
compile_allocate_pointerless_words_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords)
@ -2376,49 +2393,31 @@ static void
compile_allocate_pointerless_words_immediate (scm_jit_state *j, uint32_t dst, uint32_t nwords)
{
size_t bytes = nwords * sizeof(SCM);
size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes);
if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT))
{
jit_gpr_t t = T0;
emit_store_current_ip (j, t);
emit_call_1 (j, GC_malloc_atomic, jit_operand_imm (JIT_OPERAND_ABI_WORD, bytes));
emit_retval (j, t);
emit_reload_sp (j);
emit_sp_set_scm (j, dst, t);
}
if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST &&
bytes <= gc_allocator_large_threshold ())
emit_allocate_bytes_fast_freelist (j, T0, bytes,
GC_ALLOCATION_UNTAGGED_POINTERLESS,
T1, T2);
else
{
jit_gpr_t res = T0;
ptrdiff_t offset = offsetof(struct scm_thread, pointerless_freelists);
offset += idx * sizeof(void*);
emit_ldxi (j, res, THREAD, offset);
add_slow_path_patch (j, jit_beqi (j->jit, res, 0));
jit_gpr_t new_freelist = T1;
emit_ldr (j, new_freelist, res);
jit_stxi (j->jit, offset, THREAD, new_freelist);
emit_sp_set_scm (j, dst, res);
}
emit_allocate_words_slow (j, T0,
jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords),
GC_ALLOCATION_UNTAGGED_POINTERLESS);
emit_sp_set_scm (j, dst, T0);
}
static void
compile_allocate_pointerless_words_immediate_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords)
{
size_t bytes = nwords * sizeof(SCM);
size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes);
if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT))
/* Only emit a slow path if there is a fast path. */
if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST &&
bytes <= gc_allocator_large_threshold ())
{
}
else
{
jit_gpr_t res = T0;
emit_store_current_ip (j, res);
emit_call_2 (j, scm_vm_intrinsics.allocate_pointerless_words_with_freelist,
thread_operand (),
jit_operand_imm (JIT_OPERAND_ABI_WORD, idx));
emit_retval (j, res);
emit_reload_sp (j);
emit_sp_set_scm (j, dst, res);
emit_allocate_words_slow (j, T0,
jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords),
GC_ALLOCATION_UNTAGGED_POINTERLESS);
emit_sp_set_scm (j, dst, T0);
continue_after_slow_path (j, j->next_ip);
}
}

View file

@ -132,15 +132,6 @@ scm_trace_thread_mutator_roots (struct scm_thread *thread,
{
scm_trace_vm (&thread->vm, trace_edge, heap, trace_data);
/* FIXME: Remove these in favor of Whippet inline allocation. */
for (size_t i = 0; i < SCM_INLINE_GC_FREELIST_COUNT; i++)
trace_edge (gc_edge (&thread->freelists[i]), heap, trace_data);
for (size_t i = 0; i < SCM_INLINE_GC_FREELIST_COUNT; i++)
for (void **loc = &thread->pointerless_freelists[i];
*loc;
loc = (void **) *loc)
trace_edge (gc_edge (loc), heap, trace_data);
/* FIXME: Call instead via gc_trace_object. */
scm_trace_thread (thread, trace_edge, heap, trace_data);
}
@ -521,8 +512,6 @@ on_thread_exit (void *v)
/* Although this thread has exited, the thread object might still be
alive. Release unused memory. */
for (size_t n = 0; n < SCM_INLINE_GC_FREELIST_COUNT; n++)
t->freelists[n] = t->pointerless_freelists[n] = NULL;
t->dynamic_state = NULL;
t->dynstack.base = NULL;
t->dynstack.top = NULL;

View file

@ -77,10 +77,6 @@ struct scm_thread {
/* Every thread is a mutator for the GC. */
struct gc_mutator *mutator;
/* Thread-local freelists; see gc-inline.h. */
void *freelists[SCM_INLINE_GC_FREELIST_COUNT];
void *pointerless_freelists[SCM_INLINE_GC_FREELIST_COUNT];
scm_i_pthread_t pthread;
SCM result;