From af72d01de8c64776f550205c540f9678c5dda2e5 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 18 Jun 2019 21:40:55 +0200 Subject: [PATCH] Speed up returns in JIT This patch is a bit unfortunate, in the sense that it exposes some of the JIT guts to the rest of the VM. Code needs to treat "machine return addresses" as valid if non-NULL (as before) and also not equal to a tier-down trampoline. This is because tier-down at a return needs the old frame pointer to load the "virtual return address", and the way this patch works is that it passes the vra in a well-known register. It's a custom calling convention for a certain kind of return. * libguile/jit.h (scm_jit_return_to_interpreter_trampoline): New internal global. * libguile/jit.c: (scm_jit_clear_mcode_return_addresses): Move here, from vm.c. Instead of zeroing return addresses, set them to the return-to-interpreter trampoline. * libguile/vm-engine.c (return-values): Don't enter mcode if the mra is scm_jit_return_to_interpreter_trampoline. * libguile/vm.c (capture_continuation): Treat the tier-down trampoline as NULL. --- libguile/jit.c | 50 ++++++++++++++++++++++++++++++++++---------- libguile/jit.h | 5 ++++- libguile/vm-engine.c | 2 +- libguile/vm.c | 19 +++++------------ 4 files changed, 49 insertions(+), 27 deletions(-) diff --git a/libguile/jit.c b/libguile/jit.c index 8e0781d22..a90b9b444 100644 --- a/libguile/jit.c +++ b/libguile/jit.c @@ -152,6 +152,10 @@ static void *exit_mcode; instruction, compiled as a stub on the side to reduce code size. */ static void *handle_interrupts_trampoline; +/* Return to interpreter trampoline: trampoline to load IP from the VRA + and tier down. */ +void *scm_jit_return_to_interpreter_trampoline; + /* Thread-local buffer into which to write code. */ struct code_arena { @@ -1590,27 +1594,31 @@ compile_shuffle_down (scm_jit_state *j, uint16_t from, uint16_t to) j->frame_size_max -= (from - to); } +static const jit_gpr_t old_fp_for_return_trampoline = T0; + static void compile_return_values (scm_jit_state *j) { - jit_gpr_t old_fp = T0, ra = T1; - jit_reloc_t interp; + jit_gpr_t ra = T1; - emit_pop_fp (j, old_fp); - - emit_load_mra (j, ra, old_fp); - interp = jit_beqi (j->jit, ra, 0); + emit_pop_fp (j, old_fp_for_return_trampoline); + emit_load_mra (j, ra, old_fp_for_return_trampoline); jit_jmpr (j->jit, ra); - jit_patch_here (j->jit, interp); - emit_load_vra (j, ra, old_fp); - emit_store_ip (j, ra); - emit_exit (j); - j->frame_size_min = 0; j->frame_size_max = INT32_MAX; } +static void +emit_return_to_interpreter_trampoline (scm_jit_state *j) +{ + jit_gpr_t ra = T1; + + emit_load_vra (j, ra, old_fp_for_return_trampoline); + emit_store_ip (j, ra); + emit_exit (j); +} + static void compile_subr_call (scm_jit_state *j, uint32_t idx) { @@ -4688,6 +4696,10 @@ initialize_jit (void) handle_interrupts_trampoline = emit_code (j, emit_handle_interrupts_trampoline); ASSERT (handle_interrupts_trampoline); + + scm_jit_return_to_interpreter_trampoline = + emit_code (j, emit_return_to_interpreter_trampoline); + ASSERT (scm_jit_return_to_interpreter_trampoline); } static uint8_t * @@ -4804,10 +4816,26 @@ void scm_jit_enter_mcode (scm_thread *thread, const uint8_t *mcode) { LOG ("entering mcode: %p\n", mcode); + if (!SCM_FRAME_MACHINE_RETURN_ADDRESS (thread->vm.fp)) + SCM_FRAME_SET_MACHINE_RETURN_ADDRESS + (thread->vm.fp, scm_jit_return_to_interpreter_trampoline); enter_mcode (thread, mcode); LOG ("exited mcode\n"); } +/* Call to force a thread to go back to the interpreter, for example + when single-stepping is enabled. */ +void +scm_jit_clear_mcode_return_addresses (scm_thread *thread) +{ + union scm_vm_stack_element *fp; + struct scm_vm *vp = &thread->vm; + + for (fp = vp->fp; fp < vp->stack_top; fp = SCM_FRAME_DYNAMIC_LINK (fp)) + SCM_FRAME_SET_MACHINE_RETURN_ADDRESS + (fp, scm_jit_return_to_interpreter_trampoline); +} + void scm_jit_state_free (scm_jit_state *j) { diff --git a/libguile/jit.h b/libguile/jit.h index 365be07e9..455f9c79c 100644 --- a/libguile/jit.h +++ b/libguile/jit.h @@ -1,7 +1,7 @@ #ifndef SCM_JIT_H #define SCM_JIT_H -/* Copyright 2018 +/* Copyright 2018-2019 Free Software Foundation, Inc. This file is part of Guile. @@ -62,6 +62,9 @@ SCM_INTERNAL void scm_jit_enter_mcode (scm_thread *thread, const uint8_t *mcode); SCM_INTERNAL void scm_jit_state_free (struct scm_jit_state *j); +SCM_INTERNAL void *scm_jit_return_to_interpreter_trampoline; +SCM_INTERNAL void scm_jit_clear_mcode_return_addresses (scm_thread *thread); + SCM_INTERNAL void scm_init_jit (void); #endif /* SCM_JIT_H */ diff --git a/libguile/vm-engine.c b/libguile/vm-engine.c index 5596dab02..469a31cea 100644 --- a/libguile/vm-engine.c +++ b/libguile/vm-engine.c @@ -551,7 +551,7 @@ VM_NAME (scm_thread *thread) if (!VP->disable_mcode) { mcode = SCM_FRAME_MACHINE_RETURN_ADDRESS (old_fp); - if (mcode) + if (mcode && mcode != scm_jit_return_to_interpreter_trampoline) { scm_jit_enter_mcode (thread, mcode); CACHE_REGISTER (); diff --git a/libguile/vm.c b/libguile/vm.c index 82cdae91a..d7b1788d8 100644 --- a/libguile/vm.c +++ b/libguile/vm.c @@ -199,18 +199,6 @@ scm_i_capture_current_stack (void) 0); } -/* Call to force a thread to go back to the interpreter, for example - when single-stepping is enabled. */ -static void -vm_clear_mcode_return_addresses (scm_thread *thread) -{ - union scm_vm_stack_element *fp; - struct scm_vm *vp = &thread->vm; - - for (fp = vp->fp; fp < vp->stack_top; fp = SCM_FRAME_DYNAMIC_LINK (fp)) - SCM_FRAME_SET_MACHINE_RETURN_ADDRESS (fp, NULL); -} - #define FOR_EACH_HOOK(M) \ M(apply) \ M(return) \ @@ -242,7 +230,7 @@ vm_recompute_disable_mcode (scm_thread *thread) #undef DISABLE_MCODE_IF_HOOK_ENABLED if (thread->vm.disable_mcode && !was_disabled) - vm_clear_mcode_return_addresses (thread); + scm_jit_clear_mcode_return_addresses (thread); } static int @@ -1163,11 +1151,14 @@ static SCM capture_continuation (scm_thread *thread) { struct scm_vm *vp = &thread->vm; + void *mra = SCM_FRAME_MACHINE_RETURN_ADDRESS (vp->fp); + if (mra == scm_jit_return_to_interpreter_trampoline) + mra = NULL; SCM vm_cont = capture_stack (vp->stack_top, SCM_FRAME_DYNAMIC_LINK (vp->fp), SCM_FRAME_PREVIOUS_SP (vp->fp), SCM_FRAME_VIRTUAL_RETURN_ADDRESS (vp->fp), - SCM_FRAME_MACHINE_RETURN_ADDRESS (vp->fp), + mra, scm_dynstack_capture_all (&thread->dynstack), 0); return scm_i_make_continuation (thread, vm_cont);