From 0532602cd3a2b615b4b5130bba475cce8b3382c0 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 22 Apr 2025 13:44:44 +0200 Subject: [PATCH] Switch to use Whippet allocation fast paths * libguile/Makefile.am (noinst_HEADERS, modinclude_HEADERS): Move gc-inline.h to be a private header. * libguile/gc-inline.h (scm_inline_gc_malloc_pointerless): (scm_inline_gc_malloc): Use gc_allocate. * libguile/intrinsics.c (allocate_words_with_freelist): (allocate_pointerless_words_with_freelist): Remove these intrinsics. Renumbers the intrinsics. (scm_bootstrap_intrinsics): * libguile/intrinsics.h (SCM_FOR_ALL_VM_INTRINSICS): Adapt to intrinsics change. * libguile/jit.c (emit_update_alloc_table): (emit_allocate_bytes_fast_freelist): (emit_allocate_words_slow): New helpers. (compile_allocate_words): (compile_allocate_words_immediate): (compile_allocate_words_immediate_slow): (compile_allocate_pointerless_words): (compile_allocate_pointerless_words_immediate): (compile_allocate_pointerless_words_immediate_slow): Use new helpers. * libguile/threads.c (scm_trace_thread_mutator_roots): (on_thread_exit): * libguile/threads.h: Remove Guile-managed thread-local freelists. --- libguile/Makefile.am | 2 +- libguile/gc-inline.h | 63 ++-------------- libguile/intrinsics.c | 23 +----- libguile/intrinsics.h | 4 +- libguile/jit.c | 171 +++++++++++++++++++++--------------------- libguile/threads.c | 11 --- libguile/threads.h | 4 - 7 files changed, 93 insertions(+), 185 deletions(-) diff --git a/libguile/Makefile.am b/libguile/Makefile.am index dbd91e7fe..53ba9dcff 100644 --- a/libguile/Makefile.am +++ b/libguile/Makefile.am @@ -539,6 +539,7 @@ noinst_HEADERS = custom-ports.h \ quicksort.i.c \ atomics-internal.h \ cache-internal.h \ + gc-inline.h \ gc-internal.h \ posix-w32.h \ private-options.h \ @@ -640,7 +641,6 @@ modinclude_HEADERS = \ fports.h \ frames.h \ gc.h \ - gc-inline.h \ gettext.h \ generalized-vectors.h \ goops.h \ diff --git a/libguile/gc-inline.h b/libguile/gc-inline.h index cb55aa86a..2949d335f 100644 --- a/libguile/gc-inline.h +++ b/libguile/gc-inline.h @@ -1,7 +1,7 @@ #ifndef SCM_GC_INLINE_H #define SCM_GC_INLINE_H -/* Copyright 1995-1996,1998-2004,2006-2014,2018-2019 +/* Copyright 1995-1996,1998-2004,2006-2014,2018-2019,2025 Free Software Foundation, Inc. This file is part of Guile. @@ -40,75 +40,22 @@ #include "libguile/gc.h" -#include "libguile/bdw-gc.h" +#include "libguile/gc-internal.h" #include "libguile/threads.h" -#include /* GC_generic_malloc_many */ - -static inline size_t -scm_inline_gc_bytes_to_freelist_index (size_t bytes) -{ - return (bytes - 1U) / SCM_INLINE_GC_GRANULE_BYTES; -} - -static inline size_t -scm_inline_gc_freelist_object_size (size_t idx) -{ - return (idx + 1U) * SCM_INLINE_GC_GRANULE_BYTES; -} - -/* The values of these must match the internal POINTERLESS and NORMAL - definitions in libgc, for which unfortunately there are no external - definitions. Alack. */ -typedef enum scm_inline_gc_kind - { - SCM_INLINE_GC_KIND_POINTERLESS, - SCM_INLINE_GC_KIND_NORMAL - } scm_inline_gc_kind; - -static inline void * -scm_inline_gc_alloc (void **freelist, size_t idx, scm_inline_gc_kind kind) -{ - void *head = *freelist; - - if (SCM_UNLIKELY (!head)) - { - size_t bytes = scm_inline_gc_freelist_object_size (idx); - GC_generic_malloc_many (bytes, kind, freelist); - head = *freelist; - if (SCM_UNLIKELY (!head)) - return (*GC_get_oom_fn ()) (bytes); - } - - *freelist = *(void **)(head); - - return head; -} - static inline void * scm_inline_gc_malloc_pointerless (scm_thread *thread, size_t bytes) { - size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes); - - if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT)) - return GC_malloc_atomic (bytes); - - return scm_inline_gc_alloc - (&thread->pointerless_freelists[idx], idx, SCM_INLINE_GC_KIND_POINTERLESS); + return gc_allocate (thread->mutator, bytes, + GC_ALLOCATION_UNTAGGED_POINTERLESS); } static inline void * scm_inline_gc_malloc (scm_thread *thread, size_t bytes) { - size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes); - - if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT)) - return GC_malloc (bytes); - - return scm_inline_gc_alloc - (&thread->freelists[idx], idx, SCM_INLINE_GC_KIND_NORMAL); + return gc_allocate (thread->mutator, bytes, GC_ALLOCATION_TAGGED); } static inline void * diff --git a/libguile/intrinsics.c b/libguile/intrinsics.c index 99c044cbd..b2b823c81 100644 --- a/libguile/intrinsics.c +++ b/libguile/intrinsics.c @@ -1,4 +1,4 @@ -/* Copyright 2018-2021, 2023 +/* Copyright 2018-2021, 2023, 2025 Free Software Foundation, Inc. This file is part of Guile. @@ -466,30 +466,12 @@ allocate_words (scm_thread *thread, size_t n) return SCM_PACK_POINTER (scm_inline_gc_malloc_words (thread, n)); } -static SCM -allocate_words_with_freelist (scm_thread *thread, size_t freelist_idx) -{ - return SCM_PACK_POINTER - (scm_inline_gc_alloc (&thread->freelists[freelist_idx], - freelist_idx, - SCM_INLINE_GC_KIND_NORMAL)); -} - static SCM allocate_pointerless_words (scm_thread *thread, size_t n) { return SCM_PACK_POINTER (scm_inline_gc_malloc_pointerless_words (thread, n)); } -static SCM -allocate_pointerless_words_with_freelist (scm_thread *thread, size_t freelist_idx) -{ - return SCM_PACK_POINTER - (scm_inline_gc_alloc (&thread->pointerless_freelists[freelist_idx], - freelist_idx, - SCM_INLINE_GC_KIND_POINTERLESS)); -} - static SCM current_module (scm_thread *thread) { @@ -641,7 +623,6 @@ scm_bootstrap_intrinsics (void) scm_vm_intrinsics.allocate_words = allocate_words; scm_vm_intrinsics.current_module = current_module; scm_vm_intrinsics.push_prompt = push_prompt; - scm_vm_intrinsics.allocate_words_with_freelist = allocate_words_with_freelist; scm_vm_intrinsics.abs = scm_abs; scm_vm_intrinsics.sqrt = scm_sqrt; scm_vm_intrinsics.fabs = fabs; @@ -665,8 +646,6 @@ scm_bootstrap_intrinsics (void) scm_vm_intrinsics.fatan = atan; scm_vm_intrinsics.fatan2 = atan2; scm_vm_intrinsics.allocate_pointerless_words = allocate_pointerless_words; - scm_vm_intrinsics.allocate_pointerless_words_with_freelist = - allocate_pointerless_words_with_freelist; scm_vm_intrinsics.inexact = scm_exact_to_inexact; scm_vm_intrinsics.string_to_utf8 = scm_string_to_utf8; scm_vm_intrinsics.string_utf8_length = INT64_INTRINSIC (string_utf8_length); diff --git a/libguile/intrinsics.h b/libguile/intrinsics.h index 8a1c7c04e..5194ff4c4 100644 --- a/libguile/intrinsics.h +++ b/libguile/intrinsics.h @@ -1,4 +1,4 @@ -/* Copyright 2018-2021, 2023 +/* Copyright 2018-2021, 2023, 2025 Free Software Foundation, Inc. This file is part of Guile. @@ -173,7 +173,6 @@ typedef void (*scm_t_scm_uimm_scm_intrinsic) (SCM, uint8_t, SCM); M(thread_u8_scm_sp_vra_mra, push_prompt, "push-prompt", PUSH_PROMPT) \ M(thread_scm, unpack_values_object, "unpack-values-object", UNPACK_VALUES_OBJECT) \ M(vcode, handle_interrupt_code, "%handle-interrupt-code", HANDLE_INTERRUPT_CODE) \ - M(scm_from_thread_sz, allocate_words_with_freelist, "allocate-words/freelist", ALLOCATE_WORDS_WITH_FREELIST) \ M(scm_from_scm, abs, "abs", ABS) \ M(scm_from_scm, sqrt, "sqrt", SQRT) \ M(f64_from_f64, fabs, "fabs", FABS) \ @@ -197,7 +196,6 @@ typedef void (*scm_t_scm_uimm_scm_intrinsic) (SCM, uint8_t, SCM); M(f64_from_f64, fatan, "fatan", FATAN) \ M(f64_from_f64_f64, fatan2, "fatan2", FATAN2) \ M(scm_from_thread_sz, allocate_pointerless_words, "allocate-pointerless-words", ALLOCATE_POINTERLESS_WORDS) \ - M(scm_from_thread_sz, allocate_pointerless_words_with_freelist, "allocate-pointerless-words/freelist", ALLOCATE_POINTERLESS_WORDS_WITH_FREELIST) \ M(scm_from_scm, inexact, "inexact", INEXACT) \ M(f64_from_s64, s64_to_f64, "s64->f64", S64_TO_F64) \ M(scm_from_scm, car, "$car", CAR) \ diff --git a/libguile/jit.c b/libguile/jit.c index cb96088b7..c8919a3c9 100644 --- a/libguile/jit.c +++ b/libguile/jit.c @@ -2285,18 +2285,60 @@ compile_bind_optionals_slow (scm_jit_state *j, uint32_t nlocals) { } +static void +emit_update_alloc_table(scm_jit_state *j, jit_gpr_t obj, size_t size, + enum gc_allocation_kind kind) +{ + size_t alignment = gc_allocator_alloc_table_alignment(); + if (!alignment) return; + DIE ("allocation table unimplemented"); +} + +static inline void +emit_allocate_bytes_fast_freelist (scm_jit_state *j, jit_gpr_t dst, size_t bytes, + enum gc_allocation_kind kind, + jit_gpr_t tmp1, jit_gpr_t tmp2) +{ + jit_gpr_t mut = tmp1; + emit_ldxi (j, mut, THREAD, offsetof(struct scm_thread, mutator)); + size_t offset = gc_allocator_freelist_offset (bytes, kind); + emit_ldxi (j, dst, mut, offset); + add_slow_path_patch (j, jit_beqi (j->jit, dst, 0)); + jit_gpr_t new_freelist = tmp2; + emit_ldr (j, new_freelist, dst); + jit_stxi (j->jit, offset, mut, new_freelist); + emit_update_alloc_table(j, dst, bytes, kind); +} + +static inline void +emit_allocate_words_slow (scm_jit_state *j, jit_gpr_t res, jit_operand_t nwords, + enum gc_allocation_kind kind) +{ + emit_store_current_ip (j, res); + SCM (*intrinsic)(struct scm_thread *, size_t); + switch (kind) + { + case GC_ALLOCATION_TAGGED: + intrinsic = scm_vm_intrinsics.allocate_words; + break; + case GC_ALLOCATION_UNTAGGED_POINTERLESS: + intrinsic = scm_vm_intrinsics.allocate_pointerless_words; + break; + default: + DIE ("unknown allocation kind"); + } + + emit_call_2 (j, intrinsic, thread_operand(), nwords); + emit_retval (j, res); + emit_reload_sp (j); +} + static void compile_allocate_words (scm_jit_state *j, uint32_t dst, uint32_t nwords) { - jit_gpr_t t = T0; - - emit_store_current_ip (j, t); - emit_call_2 (j, scm_vm_intrinsics.allocate_words, thread_operand (), - sp_sz_operand (j, nwords)); - emit_retval (j, t); - record_gpr_clobber (j, t); - emit_reload_sp (j); - emit_sp_set_scm (j, dst, t); + emit_allocate_words_slow (j, T0, sp_sz_operand (j, nwords), + GC_ALLOCATION_TAGGED); + emit_sp_set_scm (j, dst, T0); } static void compile_allocate_words_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords) @@ -2307,49 +2349,30 @@ static void compile_allocate_words_immediate (scm_jit_state *j, uint32_t dst, uint32_t nwords) { size_t bytes = nwords * sizeof(SCM); - size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes); - if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT)) - { - jit_gpr_t t = T0; - emit_store_current_ip (j, t); - emit_call_1 (j, GC_malloc, jit_operand_imm (JIT_OPERAND_ABI_WORD, bytes)); - emit_retval (j, t); - emit_reload_sp (j); - emit_sp_set_scm (j, dst, t); - } + if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST && + bytes <= gc_allocator_large_threshold ()) + emit_allocate_bytes_fast_freelist (j, T0, bytes, GC_ALLOCATION_TAGGED, + T1, T2); else - { - jit_gpr_t res = T0; - ptrdiff_t offset = offsetof(struct scm_thread, freelists); - offset += idx * sizeof(void*); - emit_ldxi (j, res, THREAD, offset); - add_slow_path_patch (j, jit_beqi (j->jit, res, 0)); - jit_gpr_t new_freelist = T1; - emit_ldr (j, new_freelist, res); - jit_stxi (j->jit, offset, THREAD, new_freelist); - emit_sp_set_scm (j, dst, res); - } + emit_allocate_words_slow (j, T0, + jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords), + GC_ALLOCATION_TAGGED); + emit_sp_set_scm (j, dst, T0); } static void compile_allocate_words_immediate_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords) { size_t bytes = nwords * sizeof(SCM); - size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes); - if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT)) + /* Only emit a slow path if there is a fast path. */ + if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST && + bytes <= gc_allocator_large_threshold ()) { - } - else - { - jit_gpr_t res = T0; - emit_store_current_ip (j, res); - emit_call_2 (j, scm_vm_intrinsics.allocate_words_with_freelist, - thread_operand (), - jit_operand_imm (JIT_OPERAND_ABI_WORD, idx)); - emit_retval (j, res); - emit_reload_sp (j); - emit_sp_set_scm (j, dst, res); + emit_allocate_words_slow (j, T0, + jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords), + GC_ALLOCATION_TAGGED); + emit_sp_set_scm (j, dst, T0); continue_after_slow_path (j, j->next_ip); } } @@ -2357,15 +2380,9 @@ compile_allocate_words_immediate_slow (scm_jit_state *j, uint32_t dst, uint32_t static void compile_allocate_pointerless_words (scm_jit_state *j, uint32_t dst, uint32_t nwords) { - jit_gpr_t t = T0; - - emit_store_current_ip (j, t); - emit_call_2 (j, scm_vm_intrinsics.allocate_pointerless_words, thread_operand (), - sp_sz_operand (j, nwords)); - emit_retval (j, t); - record_gpr_clobber (j, t); - emit_reload_sp (j); - emit_sp_set_scm (j, dst, t); + emit_allocate_words_slow (j, T0, sp_sz_operand (j, nwords), + GC_ALLOCATION_UNTAGGED_POINTERLESS); + emit_sp_set_scm (j, dst, T0); } static void compile_allocate_pointerless_words_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords) @@ -2376,49 +2393,31 @@ static void compile_allocate_pointerless_words_immediate (scm_jit_state *j, uint32_t dst, uint32_t nwords) { size_t bytes = nwords * sizeof(SCM); - size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes); - if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT)) - { - jit_gpr_t t = T0; - emit_store_current_ip (j, t); - emit_call_1 (j, GC_malloc_atomic, jit_operand_imm (JIT_OPERAND_ABI_WORD, bytes)); - emit_retval (j, t); - emit_reload_sp (j); - emit_sp_set_scm (j, dst, t); - } + if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST && + bytes <= gc_allocator_large_threshold ()) + emit_allocate_bytes_fast_freelist (j, T0, bytes, + GC_ALLOCATION_UNTAGGED_POINTERLESS, + T1, T2); else - { - jit_gpr_t res = T0; - ptrdiff_t offset = offsetof(struct scm_thread, pointerless_freelists); - offset += idx * sizeof(void*); - emit_ldxi (j, res, THREAD, offset); - add_slow_path_patch (j, jit_beqi (j->jit, res, 0)); - jit_gpr_t new_freelist = T1; - emit_ldr (j, new_freelist, res); - jit_stxi (j->jit, offset, THREAD, new_freelist); - emit_sp_set_scm (j, dst, res); - } + emit_allocate_words_slow (j, T0, + jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords), + GC_ALLOCATION_UNTAGGED_POINTERLESS); + emit_sp_set_scm (j, dst, T0); } static void compile_allocate_pointerless_words_immediate_slow (scm_jit_state *j, uint32_t dst, uint32_t nwords) { size_t bytes = nwords * sizeof(SCM); - size_t idx = scm_inline_gc_bytes_to_freelist_index (bytes); - if (SCM_UNLIKELY (idx >= SCM_INLINE_GC_FREELIST_COUNT)) + /* Only emit a slow path if there is a fast path. */ + if (gc_allocator_kind() == GC_ALLOCATOR_INLINE_FREELIST && + bytes <= gc_allocator_large_threshold ()) { - } - else - { - jit_gpr_t res = T0; - emit_store_current_ip (j, res); - emit_call_2 (j, scm_vm_intrinsics.allocate_pointerless_words_with_freelist, - thread_operand (), - jit_operand_imm (JIT_OPERAND_ABI_WORD, idx)); - emit_retval (j, res); - emit_reload_sp (j); - emit_sp_set_scm (j, dst, res); + emit_allocate_words_slow (j, T0, + jit_operand_imm (JIT_OPERAND_ABI_WORD, nwords), + GC_ALLOCATION_UNTAGGED_POINTERLESS); + emit_sp_set_scm (j, dst, T0); continue_after_slow_path (j, j->next_ip); } } diff --git a/libguile/threads.c b/libguile/threads.c index 0efd2c60a..69e422f76 100644 --- a/libguile/threads.c +++ b/libguile/threads.c @@ -132,15 +132,6 @@ scm_trace_thread_mutator_roots (struct scm_thread *thread, { scm_trace_vm (&thread->vm, trace_edge, heap, trace_data); - /* FIXME: Remove these in favor of Whippet inline allocation. */ - for (size_t i = 0; i < SCM_INLINE_GC_FREELIST_COUNT; i++) - trace_edge (gc_edge (&thread->freelists[i]), heap, trace_data); - for (size_t i = 0; i < SCM_INLINE_GC_FREELIST_COUNT; i++) - for (void **loc = &thread->pointerless_freelists[i]; - *loc; - loc = (void **) *loc) - trace_edge (gc_edge (loc), heap, trace_data); - /* FIXME: Call instead via gc_trace_object. */ scm_trace_thread (thread, trace_edge, heap, trace_data); } @@ -521,8 +512,6 @@ on_thread_exit (void *v) /* Although this thread has exited, the thread object might still be alive. Release unused memory. */ - for (size_t n = 0; n < SCM_INLINE_GC_FREELIST_COUNT; n++) - t->freelists[n] = t->pointerless_freelists[n] = NULL; t->dynamic_state = NULL; t->dynstack.base = NULL; t->dynstack.top = NULL; diff --git a/libguile/threads.h b/libguile/threads.h index fd912c1cc..2e184391c 100644 --- a/libguile/threads.h +++ b/libguile/threads.h @@ -77,10 +77,6 @@ struct scm_thread { /* Every thread is a mutator for the GC. */ struct gc_mutator *mutator; - /* Thread-local freelists; see gc-inline.h. */ - void *freelists[SCM_INLINE_GC_FREELIST_COUNT]; - void *pointerless_freelists[SCM_INLINE_GC_FREELIST_COUNT]; - scm_i_pthread_t pthread; SCM result;