1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

Rewrite subr implementation

* libguile/gsubr.c: Reimplement to store subr names and procedures in a
  side table, and to allocate fresh vcode for each subr.  This allows
  JIT of subrs, moves to a uniform all-code-starts-with-instrument-entry
  regime, and also allows statprof to distinguish between subrs based on
  IP.
* libguile/gsubr.h (SCM_SUBRF, SCM_SUBR_NAME): Call out to functions,
  now that these are in a side table.
  (scm_subr_function, scm_subr_name): New exports.
  (scm_i_primitive_name): New internal function, for looking up a
  primitive name based on IP.
  (scm_apply_subr): Take the subr index.
* libguile/vm-engine.c (subr-call):
* libguile/jit.c (compile_subr_call): Adapt to take index as arg.
* module/statprof.scm (sample-stack-procs, count-call):
  (stack-samples->procedure-data): Update to always record IP in stack
  samples and call counts.
* module/system/vm/frame.scm (frame-procedure-name): Simplify.
  (frame-instruction-pointer-or-primitive-procedure-name): Removed.
* libguile/programs.h:
* libguile/programs.c (scm_primitive_code_name): New function.
* module/system/vm/program.scm (primitive-code-name): New export.
This commit is contained in:
Andy Wingo 2018-07-29 15:36:07 +02:00
parent 5077e67371
commit b8a9a666f1
9 changed files with 363 additions and 272 deletions

View file

@ -25,16 +25,19 @@
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include "foreign.h"
#include "frames.h"
#include "instructions.h"
#include "jit.h"
#include "modules.h"
#include "numbers.h"
#include "private-options.h"
#include "programs.h"
#include "srfi-4.h"
#include "symbols.h"
#include "threads.h"
#include "gsubr.h"
@ -46,224 +49,292 @@
* and rest arguments.
*/
static scm_i_pthread_mutex_t admin_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
static void **subrs = NULL;
static uint32_t next_subr_idx = 0;
static uint32_t subrs_array_size = 0;
static uint32_t
alloc_subr_idx (void *subr)
{
uint32_t idx;
scm_i_pthread_mutex_lock (&admin_mutex);
idx = next_subr_idx++;
if (idx > 0xffffff) abort ();
if (idx >= subrs_array_size)
{
void **new_subrs;
if (subrs_array_size)
subrs_array_size *= 2;
else
/* In July 2018 there were 1140 subrs defined in stock Guile. */
subrs_array_size = 1500;
/* Leak this allocation, as code lives as long as the program
does. In the likely case, we only make one malloc for the
program; in the general case it's still O(n) in number of subrs
because of the geometric factor. Use malloc instead of GC
allocations because it's not traceable and not collectable. */
new_subrs = malloc (subrs_array_size * sizeof (void*));
memcpy (new_subrs, subrs, idx * sizeof (void*));
subrs = new_subrs;
}
subrs[idx] = subr;
scm_i_pthread_mutex_unlock (&admin_mutex);
return idx;
}
/* OK here goes nothing: we're going to define VM assembly trampolines for
invoking subrs. Ready? Right! */
static SCM *names = NULL;
static uint32_t names_array_size = 0;
/* There's a maximum of 10 args, so the number of possible combinations is:
(REQ-OPT-REST)
for 0 args: 1 (000) (1 + 0)
for 1 arg: 3 (100, 010, 001) (2 + 1)
for 2 args: 5 (200, 110, 020, 101, 011) (3 + 2)
for 3 args: 7 (300, 210, 120, 030, 201, 111, 021) (4 + 3)
for N args: 2N+1
static void
record_subr_name (uint32_t idx, SCM name)
{
scm_i_pthread_mutex_lock (&admin_mutex);
and the index at which N args starts:
for 0 args: 0
for 1 args: 1
for 2 args: 4
for 3 args: 9
for N args: N^2
if (idx >= names_array_size)
{
SCM *new_names;
uint32_t new_size;
One can prove this:
/* See comments in alloc_subr_idx about how we choose 1500 as
initial size. It's a GC-managed allocation though. */
(1 + 3 + 5 + ... + (2N+1))
= ((2N+1)+1)/2 * (N+1)
= 2(N+1)/2 * (N+1)
= (N+1)^2
if (names_array_size)
new_size = names_array_size * 2;
else
new_size = 1500;
Thus the total sum is 11^2 = 121. Let's just generate all of them as
read-only data.
*/
new_names = SCM_GC_MALLOC (new_size * sizeof (SCM));
memcpy (new_names, names, names_array_size * sizeof (SCM));
while (names_array_size < new_size)
new_names[names_array_size++] = SCM_BOOL_F;
names = new_names;
names_array_size = new_size;
}
/* A: req; B: opt; C: rest */
#define A(nreq) \
SCM_PACK_OP_24 (assert_nargs_ee, nreq + 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0), \
0, \
0
names[idx] = name;
#define B(nopt) \
SCM_PACK_OP_24 (assert_nargs_le, nopt + 1), \
SCM_PACK_OP_24 (alloc_frame, nopt + 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0), \
0
scm_i_pthread_mutex_unlock (&admin_mutex);
}
#define C() \
SCM_PACK_OP_24 (bind_rest, 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0), \
0, \
0
#define AB(nreq, nopt) \
SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \
SCM_PACK_OP_24 (assert_nargs_le, nreq + nopt + 1), \
SCM_PACK_OP_24 (alloc_frame, nreq + nopt + 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0)
static char *arena = NULL;
static size_t arena_used = 0;
static size_t arena_size = 0;
#define AC(nreq) \
SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \
SCM_PACK_OP_24 (bind_rest, nreq + 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0), \
0
static size_t
round_up_power_of_two (size_t n, size_t m)
{
return (n + (m-1)) & ~(m-1);
}
#define BC(nopt) \
SCM_PACK_OP_24 (bind_rest, nopt + 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0), \
0, \
0
static char *
alloc (size_t byte_size)
{
char *ret;
#define ABC(nreq, nopt) \
SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1), \
SCM_PACK_OP_24 (bind_rest, nreq + nopt + 1), \
SCM_PACK_OP_24 (subr_call, 0), \
SCM_PACK_OP_24 (handle_interrupts, 0), \
SCM_PACK_OP_24 (return_values, 0), \
0
byte_size = round_up_power_of_two (byte_size, sizeof (void *));
scm_i_pthread_mutex_lock (&admin_mutex);
/*
(defun generate-bytecode (n)
"Generate bytecode for N arguments"
(interactive "p")
(insert (format "/\* %d arguments *\/\n " n))
(let ((nreq n))
(while (<= 0 nreq)
(let ((nopt (- n nreq)))
(insert
(if (< 0 nreq)
(if (< 0 nopt)
(format " AB(%d,%d)," nreq nopt)
(format " A(%d)," nreq))
(if (< 0 nopt)
(format " B(%d)," nopt)
(format " A(0),"))))
(setq nreq (1- nreq))))
(insert "\n ")
(setq nreq (1- n))
(while (<= 0 nreq)
(let ((nopt (- n nreq 1)))
(insert
(if (< 0 nreq)
(if (< 0 nopt)
(format " ABC(%d,%d)," nreq nopt)
(format " AC(%d)," nreq))
(if (< 0 nopt)
(format " BC(%d)," nopt)
(format " C(),"))))
(setq nreq (1- nreq))))
(insert "\n\n ")))
while (arena_used + byte_size > arena_size)
{
char *new_arena;
(defun generate-bytecodes (n)
"Generate bytecodes for up to N arguments"
(interactive "p")
(let ((i 0))
(while (<= i n)
(generate-bytecode i)
(setq i (1+ i)))))
*/
static const uint32_t subr_stub_code[] = {
/* C-u 1 0 M-x generate-bytecodes RET */
/* 0 arguments */
A(0),
/* See comments in alloc_subr_idx about how we choose 1500 as
initial size and why we leak the allocation. */
/* 1 arguments */
A(1), B(1),
C(),
if (arena_size)
arena_size *= 2;
else
{
size_t avg_size = 6 * sizeof(uint32_t);
avg_size += sizeof(struct scm_jit_function_data);
arena_size = 1500 * avg_size;
}
/* 2 arguments */
A(2), AB(1,1), B(2),
AC(1), BC(1),
new_arena = malloc (arena_size);
memcpy (new_arena, arena, arena_used);
arena = new_arena;
}
/* 3 arguments */
A(3), AB(2,1), AB(1,2), B(3),
AC(2), ABC(1,1), BC(2),
ret = arena + arena_used;
arena_used += byte_size;
/* 4 arguments */
A(4), AB(3,1), AB(2,2), AB(1,3), B(4),
AC(3), ABC(2,1), ABC(1,2), BC(3),
scm_i_pthread_mutex_unlock (&admin_mutex);
/* 5 arguments */
A(5), AB(4,1), AB(3,2), AB(2,3), AB(1,4), B(5),
AC(4), ABC(3,1), ABC(2,2), ABC(1,3), BC(4),
memset (ret, 0, byte_size);
/* 6 arguments */
A(6), AB(5,1), AB(4,2), AB(3,3), AB(2,4), AB(1,5), B(6),
AC(5), ABC(4,1), ABC(3,2), ABC(2,3), ABC(1,4), BC(5),
return ret;
}
/* 7 arguments */
A(7), AB(6,1), AB(5,2), AB(4,3), AB(3,4), AB(2,5), AB(1,6), B(7),
AC(6), ABC(5,1), ABC(4,2), ABC(3,3), ABC(2,4), ABC(1,5), BC(6),
static uint32_t *
alloc_primitive_code_with_instrumentation (size_t uint32_count,
uint32_t **write_ptr)
{
char *ptr;
uint32_t *ret;
struct scm_jit_function_data *data;
size_t byte_size, padded_byte_size;
/* 8 arguments */
A(8), AB(7,1), AB(6,2), AB(5,3), AB(4,4), AB(3,5), AB(2,6), AB(1,7), B(8),
AC(7), ABC(6,1), ABC(5,2), ABC(4,3), ABC(3,4), ABC(2,5), ABC(1,6), BC(7),
/* Reserve space for instrument-entry. */
byte_size = (2 + uint32_count) * sizeof (uint32_t);
padded_byte_size = round_up_power_of_two (byte_size, sizeof (void *));
/* Reserve space for jit data. */
ptr = alloc (padded_byte_size + sizeof (struct scm_jit_function_data));
ret = (uint32_t *) ptr;
data = (struct scm_jit_function_data*) (ret + padded_byte_size);
/* 9 arguments */
A(9), AB(8,1), AB(7,2), AB(6,3), AB(5,4), AB(4,5), AB(3,6), AB(2,7), AB(1,8), B(9),
AC(8), ABC(7,1), ABC(6,2), ABC(5,3), ABC(4,4), ABC(3,5), ABC(2,6), ABC(1,7), BC(8),
ret[0] = SCM_PACK_OP_24 (instrument_entry, 0);
ret[1] = padded_byte_size / 4;
/* 10 arguments */
A(10), AB(9,1), AB(8,2), AB(7,3), AB(6,4), AB(5,5), AB(4,6), AB(3,7), AB(2,8), AB(1,9), B(10),
AC(9), ABC(8,1), ABC(7,2), ABC(6,3), ABC(5,4), ABC(4,5), ABC(3,6), ABC(2,7), ABC(1,8), BC(9),
*write_ptr = ret + 2;
data->mcode = NULL;
data->counter = 0;
data->start = -padded_byte_size;
data->end = -(padded_byte_size - byte_size);
return (uint32_t *) ret;
}
static int
is_primitive_code (const void *ptr)
{
const char *cptr = ptr;
int ret;
scm_i_pthread_mutex_lock (&admin_mutex);
ret = cptr >= arena && (cptr - arena) < arena_used;
scm_i_pthread_mutex_unlock (&admin_mutex);
return ret;
}
static uint32_t *
alloc_subr_code (uint32_t subr_idx, uint32_t code[], size_t code_size)
{
uint32_t post[3] = { SCM_PACK_OP_24 (subr_call, subr_idx),
SCM_PACK_OP_24 (handle_interrupts, 0),
SCM_PACK_OP_24 (return_values, 0) };
uint32_t *ret, *write;
ret = alloc_primitive_code_with_instrumentation (code_size + 3, &write);
memcpy (write, code, code_size * sizeof (uint32_t));
write += code_size;
memcpy (write, post, 3 * sizeof (uint32_t));
return ret;
}
enum arity_kind {
NULLARY = 0,
REQ = 1,
OPT = 2,
REST = 4,
REQ_OPT = REQ + OPT,
REQ_REST = REQ + REST,
OPT_REST = OPT + REST,
REQ_OPT_REST = REQ + OPT + REST
};
#undef A
#undef B
#undef C
#undef AB
#undef AC
#undef BC
#undef ABC
/* (nargs * nargs) + nopt + rest * (nargs + 1) */
#define SUBR_STUB_CODE(nreq,nopt,rest) \
&subr_stub_code[((nreq + nopt + rest) * (nreq + nopt + rest) \
+ nopt + rest * (nreq + nopt + rest + 1)) * 6]
static const uint32_t*
get_subr_stub_code (unsigned int nreq, unsigned int nopt, unsigned int rest)
static uint32_t*
get_subr_stub_code (uint32_t subr_idx,
unsigned int nreq, unsigned int nopt, unsigned int rest)
{
enum arity_kind kind = NULLARY;
if (SCM_UNLIKELY (rest > 1 || nreq + nopt + rest > 10))
scm_out_of_range ("make-subr", scm_from_uint (nreq + nopt + rest));
return SUBR_STUB_CODE (nreq, nopt, rest);
if (nreq) kind += REQ;
if (nopt) kind += OPT;
if (rest) kind += REST;
switch (kind)
{
case NULLARY:
case REQ:
{
uint32_t code[1] = { SCM_PACK_OP_24 (assert_nargs_ee, nreq + 1) };
return alloc_subr_code (subr_idx, code, 1);
}
case OPT:
{
uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_le, nopt + 1),
SCM_PACK_OP_24 (alloc_frame, nopt + 1) };
return alloc_subr_code (subr_idx, code, 2);
}
case REST:
{
uint32_t code[1] = { SCM_PACK_OP_24 (bind_rest, 1) };
return alloc_subr_code (subr_idx, code, 1);
}
case REQ_OPT:
{
uint32_t code[3] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1),
SCM_PACK_OP_24 (assert_nargs_le, nreq + nopt + 1),
SCM_PACK_OP_24 (alloc_frame, nreq + nopt + 1) };
return alloc_subr_code (subr_idx, code, 3);
}
case REQ_REST:
{
uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1),
SCM_PACK_OP_24 (bind_rest, nreq + 1) };
return alloc_subr_code (subr_idx, code, 2);
}
case OPT_REST:
{
uint32_t code[1] = { SCM_PACK_OP_24 (bind_rest, nopt + 1) };
return alloc_subr_code (subr_idx, code, 1);
}
case REQ_OPT_REST:
{
uint32_t code[2] = { SCM_PACK_OP_24 (assert_nargs_ge, nreq + 1),
SCM_PACK_OP_24 (bind_rest, nreq + nopt + 1) };
return alloc_subr_code (subr_idx, code, 2);
}
default:
abort ();
}
}
static SCM
create_subr (int define, const char *name,
unsigned int nreq, unsigned int nopt, unsigned int rest,
SCM (*fcn) (), SCM *generic_loc)
void *fcn, SCM *generic_loc)
{
SCM ret, sname;
uint32_t idx;
scm_t_bits flags;
scm_t_bits nfree = generic_loc ? 3 : 2;
scm_t_bits nfree = generic_loc ? 1 : 0;
idx = alloc_subr_idx (fcn);
sname = scm_from_utf8_symbol (name);
flags = SCM_F_PROGRAM_IS_PRIMITIVE;
flags |= generic_loc ? SCM_F_PROGRAM_IS_PRIMITIVE_GENERIC : 0;
ret = scm_words (scm_tc7_program | (nfree << 16) | flags, nfree + 2);
SCM_SET_CELL_WORD_1 (ret, get_subr_stub_code (nreq, nopt, rest));
SCM_PROGRAM_FREE_VARIABLE_SET (ret, 0, scm_from_pointer (fcn, NULL));
SCM_PROGRAM_FREE_VARIABLE_SET (ret, 1, sname);
SCM_SET_CELL_WORD_1 (ret, get_subr_stub_code (idx, nreq, nopt, rest));
record_subr_name (idx, sname);
if (generic_loc)
SCM_PROGRAM_FREE_VARIABLE_SET (ret, 2,
scm_from_pointer (generic_loc, NULL));
SCM_PROGRAM_FREE_VARIABLE_SET (ret, 0,
scm_from_pointer (generic_loc, NULL));
if (define)
scm_define (sname, ret);
@ -274,33 +345,86 @@ create_subr (int define, const char *name,
int
scm_i_primitive_code_p (const uint32_t *code)
{
if (code < subr_stub_code)
return 0;
if (code > subr_stub_code + (sizeof(subr_stub_code) / sizeof(uint32_t)))
return 0;
return is_primitive_code (code);
}
return 1;
static uintptr_t
primitive_call_ip (const uint32_t *code)
{
int direction = 0;
while (1)
{
switch (code[0] & 0xff)
{
case scm_op_instrument_entry:
if (direction < 0) abort ();
direction = 1;
code += 2;
break;
case scm_op_assert_nargs_ee:
case scm_op_assert_nargs_le:
case scm_op_assert_nargs_ge:
case scm_op_bind_rest:
case scm_op_alloc_frame:
if (direction < 0) abort ();
direction = 1;
code += 1;
break;
case scm_op_subr_call:
return (uintptr_t) code;
case scm_op_return_values:
case scm_op_handle_interrupts:
/* Going back isn't possible for instruction streams where we
don't know the length of the preceding instruction, but for
the code we emit, these particular opcodes are only ever
preceded by 4-byte instructions. */
if (direction > 0) abort ();
direction = -1;
code -= 1;
break;
default:
abort ();
}
}
}
static uint32_t
primitive_subr_idx (const uint32_t *code)
{
uintptr_t call_ip = primitive_call_ip (code);
uint32_t idx = ((uint32_t *) call_ip)[0] >> 8;
if (idx >= next_subr_idx) abort();
return idx;
}
uintptr_t
scm_i_primitive_call_ip (SCM subr)
{
size_t i;
const uint32_t *code = SCM_PROGRAM_CODE (subr);
/* A stub is 6 32-bit words long, or 24 bytes. The call will be one
instruction, in either the fourth, third, or second word. Return a
byte offset from the entry. */
for (i = 1; i < 4; i++)
if ((code[i] & 0xff) == scm_op_subr_call)
return (uintptr_t) (code + i);
abort ();
return primitive_call_ip (SCM_PROGRAM_CODE (subr));
}
SCM
scm_apply_subr (union scm_vm_stack_element *sp, ptrdiff_t nslots)
scm_i_primitive_name (const uint32_t *code)
{
SCM (*subr)() = SCM_SUBRF (sp[nslots - 1].as_scm);
return names[primitive_subr_idx (code)];
}
scm_t_subr
scm_subr_function (SCM subr)
{
return subrs[primitive_subr_idx (SCM_PROGRAM_CODE (subr))];
}
SCM
scm_subr_name (SCM subr)
{
return scm_i_primitive_name (SCM_PROGRAM_CODE (subr));
}
SCM
scm_apply_subr (union scm_vm_stack_element *sp, uint32_t idx, ptrdiff_t nslots)
{
SCM (*subr)() = subrs[idx];
#define ARG(i) (sp[i].as_scm)
switch (nslots - 1)

View file

@ -38,14 +38,11 @@
#define SCM_PRIMITIVE_GENERIC_P(x) (SCM_PROGRAM_P (x) && SCM_PROGRAM_IS_PRIMITIVE_GENERIC (x))
#define SCM_SUBRF(x) \
((SCM (*) (void)) \
SCM_POINTER_VALUE (SCM_PROGRAM_FREE_VARIABLE_REF (x, 0)))
#define SCM_SUBR_NAME(x) (SCM_PROGRAM_FREE_VARIABLE_REF (x, 1))
#define SCM_SUBRF(x) scm_subr_function (x)
#define SCM_SUBR_NAME(x) scm_subr_name (x)
#define SCM_SUBR_GENERIC(x) \
((SCM *) SCM_POINTER_VALUE (SCM_PROGRAM_FREE_VARIABLE_REF (x, 2)))
((SCM *) SCM_POINTER_VALUE (SCM_PROGRAM_FREE_VARIABLE_REF (x, 0)))
#define SCM_SET_SUBR_GENERIC(x, g) \
(*SCM_SUBR_GENERIC (x) = (g))
@ -54,9 +51,13 @@
SCM_INTERNAL int scm_i_primitive_code_p (const uint32_t *code);
SCM_INTERNAL uintptr_t scm_i_primitive_call_ip (SCM subr);
SCM_INTERNAL SCM scm_i_primitive_name (const uint32_t *code);
SCM_API scm_t_subr scm_subr_function (SCM subr);
SCM_API SCM scm_subr_name (SCM subr);
SCM_INTERNAL SCM scm_apply_subr (union scm_vm_stack_element *sp,
ptrdiff_t nargs);
uint32_t subr_idx, ptrdiff_t nargs);
SCM_API SCM scm_c_make_gsubr (const char *name,
int req, int opt, int rst, scm_t_subr fcn);

View file

@ -101,7 +101,7 @@ compile_return_values (scm_jit_state *j)
}
static void
compile_subr_call (scm_jit_state *j)
compile_subr_call (scm_jit_state *j, uint32_t idx)
{
}

View file

@ -178,6 +178,20 @@ SCM_DEFINE (scm_primitive_call_ip, "primitive-call-ip", 1, 0, 0,
}
#undef FUNC_NAME
SCM_DEFINE (scm_primitive_code_name, "primitive-code-name", 1, 0, 0,
(SCM code),
"")
#define FUNC_NAME s_scm_primitive_code_name
{
const uint32_t * ptr = (const uint32_t *) scm_to_uintptr_t (code);
if (scm_i_primitive_code_p (ptr))
return scm_i_primitive_name (ptr);
return SCM_BOOL_F;
}
#undef FUNC_NAME
SCM
scm_find_source_for_addr (SCM ip)
{

View file

@ -60,6 +60,7 @@ SCM_INTERNAL SCM scm_program_p (SCM obj);
SCM_INTERNAL SCM scm_program_code (SCM program);
SCM_INTERNAL SCM scm_primitive_code_p (SCM code);
SCM_INTERNAL SCM scm_primitive_code_name (SCM code);
SCM_INTERNAL SCM scm_primitive_call_ip (SCM prim);
SCM_INTERNAL SCM scm_i_program_name (SCM program);

View file

@ -595,19 +595,23 @@ VM_NAME (scm_thread *thread)
* Specialized call stubs
*/
/* subr-call _:24
/* subr-call idx:24
*
* Call a subr, passing all locals in this frame as arguments. Return
* from the calling frame. This instruction is part of the
* trampolines created in gsubr.c, and is not generated by the
* compiler.
*/
VM_DEFINE_OP (10, subr_call, "subr-call", OP1 (X32))
VM_DEFINE_OP (10, subr_call, "subr-call", OP1 (X8_C24))
{
SCM ret;
uint32_t idx;
UNPACK_24 (op, idx);
SYNC_IP ();
ret = scm_apply_subr (sp, FRAME_LOCALS_COUNT ());
ret = scm_apply_subr (sp, idx, FRAME_LOCALS_COUNT ());
CACHE_SP ();
if (SCM_UNLIKELY (scm_is_values (ret)))

View file

@ -1,7 +1,7 @@
;;;; (statprof) -- a statistical profiler for Guile
;;;; -*-scheme-*-
;;;;
;;;; Copyright (C) 2009, 2010, 2011, 2013-2017 Free Software Foundation, Inc.
;;;; Copyright (C) 2009, 2010, 2011, 2013-2018 Free Software Foundation, Inc.
;;;; Copyright (C) 2004, 2009 Andy Wingo <wingo at pobox dot com>
;;;; Copyright (C) 2001 Rob Browning <rlb at defaultvalue dot org>
;;;;
@ -91,26 +91,16 @@
;;; distinguish between different closures which share the same code,
;;; but that is usually what we want anyway.
;;;
;;; One case in which we do want to distinguish closures is the case of
;;; primitive procedures. If slot 0 in the frame is a primitive
;;; procedure, we record the procedure's name into the buffer instead of
;;; the IP. It's fairly cheap to check whether a value is a primitive
;;; procedure, and then get its name, as its name is stored in the
;;; closure data. Calling procedure-name in the stack sampler isn't
;;; something you want to do for other kinds of procedures, though, as
;;; that involves grovelling the debug information.
;;;
;;; The other part of data collection is the exact call counter, which
;;; uses the VM's "apply" hook to record each procedure call.
;;; Naturally, this is quite expensive, and it is off by default.
;;; Running code at every procedure call effectively penalizes procedure
;;; calls. Still, it's useful sometimes. If the profiler state has a
;;; call-counts table, then calls will be counted. As with the stack
;;; counter, usually the key in the hash table is the code pointer of
;;; the procedure being called, except for primitive procedures, in
;;; which case it is the name of the primitive. The call counter can
;;; also see calls of non-programs, for example in the case of
;;; applicable structs. In that case the key is the procedure itself.
;;; counter, the key in the hash table is the code pointer of the
;;; procedure being called. The call counter can also see calls of
;;; non-programs, for example in the case of applicable structs. In
;;; that case the key is the procedure itself.
;;;
;;; After collection is finished, the data can be analyzed. The first
;;; step is usually to run over the stack traces, tabulating sample
@ -249,8 +239,7 @@
(set-buffer! state buffer)
(set-buffer-pos! state (1+ pos)))
(else
(write-sample-and-continue
(frame-instruction-pointer-or-primitive-procedure-name frame))))))
(write-sample-and-continue (frame-instruction-pointer frame))))))
(define (reset-sigprof-timer usecs)
;; Guile's setitimer binding is terrible.
@ -296,7 +285,7 @@
(define (count-call frame)
(let ((state (existing-profiler-state)))
(unless (inside-profiler? state)
(let* ((key (frame-instruction-pointer-or-primitive-procedure-name frame))
(let* ((key (frame-instruction-pointer frame))
(handle (hashv-create-handle! (call-counts state) key 0)))
(set-cdr! handle (1+ (cdr handle)))))))
@ -447,42 +436,26 @@ always collects full stacks.)"
(hashv-set! table entry data)
data))))
(define (callee->call-data callee)
(cond
((number? callee) (addr->call-data callee))
((hashv-ref table callee))
(else
(let ((data (make-call-data
(cond ((procedure? callee) (procedure-name callee))
;; a primitive
((symbol? callee) callee)
(else #f))
(with-output-to-string (lambda () (write callee)))
#f
(and call-counts (hashv-ref call-counts callee))
0
0)))
(hashv-set! table callee data)
data))))
(when call-counts
(hash-for-each (lambda (callee count)
(callee->call-data callee))
(unless (number? callee)
(error "unexpected callee" callee))
(addr->call-data callee))
call-counts))
(let visit-stacks ((pos 0))
(cond
((< pos len)
(let ((pos (if call-counts
(skip-count-call buffer pos len)
pos)))
(skip-count-call buffer pos len)
pos)))
(inc-call-data-self-sample-count!
(callee->call-data (vector-ref buffer pos)))
(addr->call-data (vector-ref buffer pos)))
(let visit-stack ((pos pos))
(cond
((vector-ref buffer pos)
=> (lambda (callee)
(inc-call-data-cum-sample-count! (callee->call-data callee))
=> (lambda (ip)
(inc-call-data-cum-sample-count! (addr->call-data ip))
(visit-stack (1+ pos))))
(else
(visit-stacks (1+ pos)))))))

View file

@ -1,6 +1,6 @@
;;; Guile VM frame functions
;;; Copyright (C) 2001, 2005, 2009-2016 Free Software Foundation, Inc.
;;; Copyright (C) 2001, 2005, 2009-2016, 2018 Free Software Foundation, Inc.
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
@ -37,7 +37,6 @@
frame-lookup-binding
binding-ref binding-set!
frame-instruction-pointer-or-primitive-procedure-name
frame-call-representation
frame-environment
frame-object-binding frame-object-name))
@ -325,24 +324,9 @@
(define* (frame-procedure-name frame #:key
(info (find-program-debug-info
(frame-instruction-pointer frame))))
(cond
(info => program-debug-info-name)
;; We can only try to get the name from the closure if we know that
;; slot 0 corresponds to the frame's procedure. This isn't possible
;; to know in general. If the frame has already begun executing and
;; the closure binding is dead, it could have been replaced with any
;; other random value, or an unboxed value. Even if we're catching
;; the frame at its application, before it has started running, if
;; the callee is well-known and has only one free variable, closure
;; optimization could have chosen to represent its closure as that
;; free variable, and that free variable might be some other program,
;; or even an unboxed value. It would be an error to try to get the
;; procedure name of some procedure that doesn't correspond to the
;; one being applied. (Free variables are currently always boxed but
;; that could change in the future.)
((primitive-code? (frame-instruction-pointer frame))
(procedure-name (frame-local-ref frame 0 'scm)))
(else #f)))
(if info
(program-debug-info-name info)
(primitive-code-name (frame-instruction-pointer frame))))
;; This function is always called to get some sort of representation of the
;; frame to present to the user, so let's do the logical thing and dispatch to
@ -350,17 +334,6 @@
(define (frame-arguments frame)
(cdr (frame-call-representation frame)))
;; Usually the IP is sufficient to identify the procedure being called.
;; However all primitive applications of the same arity share the same
;; code. Perhaps we should change that in the future, but for now we
;; export this function to avoid having to export frame-local-ref.
;;
(define (frame-instruction-pointer-or-primitive-procedure-name frame)
(let ((ip (frame-instruction-pointer frame)))
(if (primitive-code? ip)
(procedure-name (frame-local-ref frame 0 'scm))
ip)))
;;;
;;; Pretty printing

View file

@ -1,6 +1,6 @@
;;; Guile VM program functions
;;; Copyright (C) 2001, 2009, 2010, 2013, 2014 Free Software Foundation, Inc.
;;; Copyright (C) 2001, 2009, 2010, 2013, 2014, 2018 Free Software Foundation, Inc.
;;;
;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public
@ -44,7 +44,8 @@
print-program
primitive-code?))
primitive-code?
primitive-code-name))
(load-extension (string-append "libguile-" (effective-version))
"scm_init_programs")