1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-09 23:40:29 +02:00

frame-instruction-pointer is absolute; rewrite (system vm coverage)

* libguile/frames.c (scm_frame_source): Instead of assuming that
  scm_frame_procedure is correct, use the IP to get the source.
  (scm_frame_instruction_pointer): Return an absolute value instead of
  assuming that slot 0 is correct.  (It isn't, when preparing for a tail
  call.)

* libguile/programs.h:
* libguile/programs.c (scm_find_source_for_addr): New internal helper.

* module/system/repl/debug.scm (print-registers): Readably print
  absolute instruction pointers.

* module/system/vm/coverage.scm: Complete rewrite to use absolute IP's.
  We can't assume that frame-procedure is cheap if it is correct, or
  correct if it is cheap.  Anyway using the address is better anyway.
  (coverage-data->lcov): Disable per-function info temporarily.
  (loaded-modules, module-procedures, closest-source-line)
  (closed-over-procedures): Remove these.  Instead of going from
  procedures to source info, now we go from ELF image to source info.

* module/system/vm/debug.scm (debug-context-length): New interface.

* module/system/vm/program.scm (source-for-addr): New internal helper.
This commit is contained in:
Andy Wingo 2013-11-07 23:03:45 +01:00
parent 72b82b0f21
commit 581a4eb82b
7 changed files with 204 additions and 220 deletions

View file

@ -104,18 +104,9 @@ SCM_DEFINE (scm_frame_source, "frame-source", 1, 0, 0,
"")
#define FUNC_NAME s_scm_frame_source
{
SCM proc;
SCM_VALIDATE_VM_FRAME (1, frame);
proc = scm_frame_procedure (frame);
if (SCM_PROGRAM_P (proc) || SCM_RTL_PROGRAM_P (proc))
return scm_program_source (scm_frame_procedure (frame),
scm_frame_instruction_pointer (frame),
SCM_UNDEFINED);
return SCM_BOOL_F;
return scm_find_source_for_addr (scm_frame_instruction_pointer (frame));
}
#undef FUNC_NAME
@ -254,22 +245,9 @@ SCM_DEFINE (scm_frame_instruction_pointer, "frame-instruction-pointer", 1, 0, 0,
"")
#define FUNC_NAME s_scm_frame_instruction_pointer
{
SCM program;
const struct scm_objcode *c_objcode;
SCM_VALIDATE_VM_FRAME (1, frame);
program = scm_frame_procedure (frame);
if (SCM_RTL_PROGRAM_P (program))
return scm_from_ptrdiff_t (SCM_VM_FRAME_IP (frame) -
(scm_t_uint8 *) SCM_RTL_PROGRAM_CODE (program));
if (!SCM_PROGRAM_P (program))
return SCM_INUM0;
c_objcode = SCM_PROGRAM_DATA (program);
return scm_from_unsigned_integer ((SCM_VM_FRAME_IP (frame)
- SCM_C_OBJCODE_BASE (c_objcode)));
return scm_from_uintptr_t ((scm_t_uintptr) SCM_VM_FRAME_IP (frame));
}
#undef FUNC_NAME

View file

@ -398,6 +398,22 @@ scm_i_program_properties (SCM program)
}
#undef FUNC_NAME
SCM
scm_find_source_for_addr (SCM ip)
{
static SCM source_for_addr = SCM_BOOL_F;
if (scm_is_false (source_for_addr)) {
if (!scm_module_system_booted_p)
return SCM_BOOL_F;
source_for_addr =
scm_c_private_variable ("system vm program", "source-for-addr");
}
return scm_call_1 (scm_variable_ref (source_for_addr), ip);
}
SCM
scm_program_source (SCM program, SCM ip, SCM sources)
{

View file

@ -51,6 +51,8 @@ SCM_INTERNAL SCM scm_i_rtl_program_name (SCM program);
SCM_INTERNAL SCM scm_i_rtl_program_documentation (SCM program);
SCM_INTERNAL SCM scm_i_rtl_program_properties (SCM program);
SCM_INTERNAL SCM scm_find_source_for_addr (SCM ip);
/*
* Programs
*/

View file

@ -94,7 +94,12 @@
(format port fmt val))
(format port "~aRegisters:~%" per-line-prefix)
(print "ip = ~d\n" (frame-instruction-pointer frame))
(print "ip = #x~x" (frame-instruction-pointer frame))
(when (rtl-program? (frame-procedure frame))
(let ((code (rtl-program-code (frame-procedure frame))))
(format port " (#x~x~@d)" code
(- (frame-instruction-pointer frame) code))))
(newline port)
(print "sp = #x~x\n" (frame-stack-pointer frame))
(print "fp = #x~x\n" (frame-address frame)))

View file

@ -20,10 +20,14 @@
#:use-module (system vm vm)
#:use-module (system vm frame)
#:use-module (system vm program)
#:use-module (system vm debug)
#:use-module (ice-9 format)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-9)
#:use-module (srfi srfi-9 gnu)
#:use-module (srfi srfi-11)
#:use-module (srfi srfi-26)
#:use-module (ice-9 match)
#:export (with-code-coverage
coverage-data?
instrumented-source-files
@ -46,54 +50,20 @@
;;; Gathering coverage data.
;;;
(define (hashq-proc proc n)
;; Return the hash of PROC's objcode.
(if (rtl-program? proc)
(hashq (rtl-program-code proc) n)
(hashq (program-objcode proc) n)))
(define (assq-proc proc alist)
;; Instead of really looking for PROC in ALIST, look for the objcode of PROC.
;; IOW the alist is indexed by procedures, not objcodes, but those procedures
;; are taken as an arbitrary representative of all the procedures (closures)
;; sharing that objcode. This can significantly reduce memory consumption.
(if (rtl-program? proc)
(let ((code (rtl-program-code proc)))
(find (lambda (pair)
(let ((proc (car pair)))
(and (rtl-program? proc)
(eqv? code (rtl-program-code proc)))))
alist))
(let ((code (program-objcode proc)))
(find (lambda (pair)
(let ((proc (car pair)))
(and (program? proc)
(eq? code (program-objcode proc)))))
alist))))
(define (with-code-coverage vm thunk)
"Run THUNK, a zero-argument procedure, using VM; instrument VM to collect code
coverage data. Return code coverage data and the values returned by THUNK."
(define procedure->ip-counts
;; Mapping from procedures to hash tables; said hash tables map instruction
;; pointers to the number of times they were executed.
(make-hash-table 500))
(define ip-counts
;; A table mapping instruction pointers to the number of times they were
;; executed.
(make-hash-table 5000))
(define (collect! frame)
;; Update PROCEDURE->IP-COUNTS with info from FRAME.
(let* ((proc (frame-procedure frame))
(ip (frame-instruction-pointer frame))
(proc-entry (hashx-create-handle! hashq-proc assq-proc
procedure->ip-counts proc #f)))
(let loop ()
(define ip-counts (cdr proc-entry))
(if ip-counts
(let ((ip-entry (hashv-create-handle! ip-counts ip 0)))
(set-cdr! ip-entry (+ (cdr ip-entry) 1)))
(begin
(set-cdr! proc-entry (make-hash-table))
(loop))))))
;; Update IP-COUNTS with info from FRAME.
(let* ((ip (frame-instruction-pointer frame))
(ip-entry (hashv-create-handle! ip-counts ip 0)))
(set-cdr! ip-entry (+ (cdr ip-entry) 1))))
;; FIXME: It's unclear what the dynamic-wind is for, given that if the
;; VM is different from the current one, continuations will not be
@ -111,7 +81,48 @@ coverage data. Return code coverage data and the values returned by THUNK."
(set-vm-trace-level! vm level)
(remove-hook! hook collect!)))))
(lambda args
(apply values (make-coverage-data procedure->ip-counts) args))))
(apply values (make-coverage-data ip-counts) args))))
;;;
;;; Source chunks.
;;;
(define-record-type <source-chunk>
(make-source-chunk base length sources)
source-chunk?
(base source-chunk-base)
(length source-chunk-length)
(sources source-chunk-sources))
(set-record-type-printer!
<source-chunk>
(lambda (obj port)
(format port "<source-chunk #x~x-#x~x>"
(source-chunk-base obj)
(+ (source-chunk-base obj) (source-chunk-length obj)))))
(define (compute-source-chunk ctx)
"Build a sorted vector of source information for a given debugging
context (ELF image). The return value is a @code{<source-chunk>}, which also
records the address range to which the source information applies."
(make-source-chunk
(debug-context-base ctx)
(debug-context-length ctx)
;; The source locations are sorted already, but collected in reverse order.
(list->vector (reverse! (fold-source-locations cons '() ctx)))))
(define (all-source-information)
"Build and return a vector of source information corresponding to all
loaded code. The vector will be sorted by ascending address order."
(sort! (list->vector (fold-all-debug-contexts
(lambda (ctx seed)
(cons (compute-source-chunk ctx) seed))
'()))
(lambda (x y)
(< (source-chunk-base x) (source-chunk-base y)))))
;;;
@ -119,124 +130,137 @@ coverage data. Return code coverage data and the values returned by THUNK."
;;;
(define-record-type <coverage-data>
(%make-coverage-data procedure->ip-counts
procedure->sources
(%make-coverage-data ip-counts
sources
file->procedures
file->line-counts)
coverage-data?
;; Mapping from procedures to hash tables; said hash tables map instruction
;; pointers to the number of times they were executed.
(procedure->ip-counts data-procedure->ip-counts)
;; Mapping from instruction pointers to the number of times they were
;; executed, as a sorted vector of IP-count pairs.
(ip-counts data-ip-counts)
;; Mapping from procedures to the result of `program-sources'.
(procedure->sources data-procedure->sources)
;; Complete source census at the time the coverage analysis was run, as a
;; sorted vector of <source-chunk> values.
(sources data-sources)
;; Mapping from source file names to lists of procedures defined in the file.
;; FIXME.
(file->procedures data-file->procedures)
;; Mapping from file names to hash tables, which in turn map from line numbers
;; to execution counts.
(file->line-counts data-file->line-counts))
(set-record-type-printer!
<coverage-data>
(lambda (obj port)
(format port "<coverage-data ~x>" (object-address obj))))
(define (make-coverage-data procedure->ip-counts)
(define (make-coverage-data ip-counts)
;; Return a `coverage-data' object based on the coverage data available in
;; PROCEDURE->IP-COUNTS. Precompute the other hash tables that make up
;; `coverage-data' objects.
(let* ((procedure->sources (make-hash-table 500))
;; IP-COUNTS. Precompute the other hash tables that make up `coverage-data'
;; objects.
(let* ((all-sources (all-source-information))
(all-counts (sort! (list->vector (hash-fold acons '() ip-counts))
(lambda (x y)
(< (car x) (car y)))))
(file->procedures (make-hash-table 100))
(file->line-counts (make-hash-table 100))
(data (%make-coverage-data procedure->ip-counts
procedure->sources
(data (%make-coverage-data all-counts
all-sources
file->procedures
file->line-counts)))
(define (increment-execution-count! file line count)
(define (observe-execution-count! file line count)
;; Make the execution count of FILE:LINE the maximum of its current value
;; and COUNT. This is so that LINE's execution count is correct when
;; several instruction pointers map to LINE.
(when file
(let ((file-entry (hash-create-handle! file->line-counts file #f)))
(if (not (cdr file-entry))
(set-cdr! file-entry (make-hash-table 500)))
(let ((line-entry (hashv-create-handle! (cdr file-entry) line 0)))
(set-cdr! line-entry (max (cdr line-entry) count)))))
(set-cdr! line-entry (max (cdr line-entry) count))))))
;; Update execution counts for procs that were executed.
(hash-for-each (lambda (proc ip-counts)
(let* ((sources (program-sources* data proc))
(file (and (pair? sources)
(source:file (car sources)))))
(and file
(begin
;; Add a zero count for all IPs in SOURCES and in
;; the sources of procedures closed over by PROC.
(for-each
(lambda (source)
(let ((file (source:file source))
(line (source:line source)))
(increment-execution-count! file line 0)))
(append-map (cut program-sources* data <>)
(closed-over-procedures proc)))
;; First, visit every known source location and mark it as instrumented but
;; unvisited.
;;
;; FIXME: This is not always necessary. It's important to have the ability
;; to know when a source location is not reached, but sometimes all we need
;; to know is that a particular site *was* reached. In that case we
;; wouldn't need to load up all the DWARF sections. As it is, though, we
;; use the complete source census as part of the later phase.
(let visit-chunk ((chunk-idx 0))
(when (< chunk-idx (vector-length all-sources))
(match (vector-ref all-sources chunk-idx)
(($ <source-chunk> base chunk-length chunk-sources)
(let visit-source ((source-idx 0))
(when (< source-idx (vector-length chunk-sources))
(let ((s (vector-ref chunk-sources source-idx)))
(observe-execution-count! (source-file s) (source-line s) 0)
(visit-source (1+ source-idx)))))))
(visit-chunk (1+ chunk-idx))))
;; Add the actual execution count collected.
(hash-for-each
(lambda (ip count)
(let ((line (closest-source-line sources ip)))
(increment-execution-count! file line count)))
ip-counts)))))
procedure->ip-counts)
;; Set the execution count to zero for procedures loaded and not executed.
;; FIXME: Traversing thousands of procedures here is inefficient.
(for-each (lambda (proc)
(and (not (hashq-ref procedure->sources proc))
(for-each (lambda (proc)
(let* ((sources (program-sources* data proc))
(file (and (pair? sources)
(source:file (car sources)))))
(and file
(for-each
(lambda (ip)
(let ((line (closest-source-line sources ip)))
(increment-execution-count! file line 0)))
(map source:addr sources)))))
(closed-over-procedures proc))))
(append-map module-procedures (loaded-modules)))
;; Then, visit the measured execution counts, walking the complete source
;; census at the same time. This allows us to map observed addresses to
;; source locations. Record observed execution counts.
(let visit-chunk ((chunk-idx 0) (count-idx 0))
(when (< chunk-idx (vector-length all-sources))
(match (vector-ref all-sources chunk-idx)
(($ <source-chunk> base chunk-length chunk-sources)
(let visit-count ((count-idx count-idx) (source-idx 0) (source #f))
(when (< count-idx (vector-length all-counts))
(match (vector-ref all-counts count-idx)
((ip . count)
(cond
((< ip base)
;; Address before chunk base; no corresponding source.
(visit-count (1+ count-idx) source-idx source))
((< ip (+ base chunk-length))
;; Address in chunk; count it.
(let visit-source ((source-idx source-idx) (source source))
(define (finish)
(when source
(observe-execution-count! (source-file source)
(source-line source)
count))
(visit-count (1+ count-idx) source-idx source))
(cond
((< source-idx (vector-length chunk-sources))
(let ((source* (vector-ref chunk-sources source-idx)))
(if (<= (source-pre-pc source*) ip)
(visit-source (1+ source-idx) source*)
(finish))))
(else
(finish)))))
(else
;; Address past chunk; fetch the next chunk.
(visit-chunk (1+ chunk-idx) count-idx)))))))))))
data))
(define (procedure-execution-count data proc)
"Return the number of times PROC's code was executed, according to DATA, or #f
if PROC was not executed. When PROC is a closure, the number of times its code
was executed is returned, not the number of times this code associated with this
particular closure was executed."
(let ((sources (program-sources* data proc)))
(and (pair? sources)
(and=> (hashx-ref hashq-proc assq-proc
(data-procedure->ip-counts data) proc)
(lambda (ip-counts)
;; FIXME: broken with lambda*
(let ((entry-ip (source:addr (car sources))))
(hashv-ref ip-counts entry-ip 0)))))))
(define (program-sources* data proc)
;; A memoizing version of `program-sources'.
(or (hashq-ref (data-procedure->sources data) proc)
(and (or (program? proc) (rtl-program? proc))
(let ((sources (program-sources proc))
(p->s (data-procedure->sources data))
(f->p (data-file->procedures data)))
(if (pair? sources)
(let* ((file (source:file (car sources)))
(entry (hash-create-handle! f->p file '())))
(hashq-set! p->s proc sources)
(set-cdr! entry (cons proc (cdr entry)))
sources)
sources)))))
(define (file-procedures data file)
;; Return the list of globally bound procedures defined in FILE.
(hash-ref (data-file->procedures data) file '()))
"Return the number of times PROC's code was executed, according to DATA. When
PROC is a closure, the number of times its code was executed is returned, not
the number of times this code associated with this particular closure was
executed."
(define (binary-search v key val)
(let lp ((start 0) (end (vector-length v)))
(and (not (eqv? start end))
(let* ((idx (floor/ (+ start end) 2))
(elt (vector-ref v idx))
(val* (key elt)))
(cond
((< val val*)
(lp start idx))
((< val* val)
(lp (1+ idx) end))
(else elt))))))
(and (rtl-program? proc)
(match (binary-search (data-ip-counts data) car (rtl-program-code proc))
(#f 0)
((ip . code) code))))
(define (instrumented/executed-lines data file)
"Return the number of instrumented and the number of executed source lines in
@ -271,66 +295,6 @@ was loaded at the time DATA was collected."
'()
(data-file->line-counts data)))
;;;
;;; Helpers.
;;;
(define (loaded-modules)
;; Return the list of all the modules currently loaded.
(define seen (make-hash-table))
(let loop ((modules (module-submodules (resolve-module '() #f)))
(result '()))
(hash-fold (lambda (name module result)
(if (hashq-ref seen module)
result
(begin
(hashq-set! seen module #t)
(loop (module-submodules module)
(cons module result)))))
result
modules)))
(define (module-procedures module)
;; Return the list of procedures bound globally in MODULE.
(hash-fold (lambda (binding var result)
(if (variable-bound? var)
(let ((value (variable-ref var)))
(if (procedure? value)
(cons value result)
result))
result))
'()
(module-obarray module)))
(define (closest-source-line sources ip)
;; Given SOURCES, as returned by `program-sources' for a given procedure,
;; return the source line of code that is the closest to IP. This is similar
;; to what `program-source' does.
(let loop ((sources sources)
(line (and (pair? sources) (source:line (car sources)))))
(if (null? sources)
line
(let ((source (car sources)))
(if (> (source:addr source) ip)
line
(loop (cdr sources) (source:line source)))))))
(define (closed-over-procedures proc)
;; Return the list of procedures PROC closes over, PROC included.
(let loop ((proc proc)
(result '()))
(if (and (or (program? proc) (rtl-program? proc)) (not (memq proc result)))
(fold loop (cons proc result)
;; FIXME: Include statically nested procedures for RTL
;; programs.
(append (if (program? proc)
(vector->list (or (program-objects proc) #()))
'())
(program-free-variables proc)))
result)))
;;;
;;; LCOV output.
@ -342,6 +306,10 @@ was loaded at the time DATA was collected."
The report will include all the modules loaded at the time coverage data was
gathered, even if their code was not executed."
;; FIXME: Re-enable this code, but using for-each-elf-symbol on each source
;; chunk. Use that to build a map of file -> proc-addr + line + name. Then
;; use something like procedure-execution-count to get the execution count.
#;
(define (dump-function proc)
;; Dump source location and basic coverage data for PROC.
(and (or (program? proc) (rtl-program? proc))
@ -358,11 +326,11 @@ gathered, even if their code was not executed."
;; Output per-file coverage data.
(format port "TN:~%")
(for-each (lambda (file)
(let ((procs (file-procedures data file))
(path (search-path %load-path file)))
(let ((path (search-path %load-path file)))
(if (string? path)
(begin
(format port "SF:~A~%" path)
#;
(for-each dump-function procs)
(for-each (lambda (line+count)
(let ((line (car line+count))

View file

@ -35,6 +35,7 @@
#:use-module (srfi srfi-9)
#:export (debug-context-image
debug-context-base
debug-context-length
debug-context-text-base
program-debug-info-name
@ -96,6 +97,11 @@
@var{context}."
(elf-bytes (debug-context-elf context)))
(define (debug-context-length context)
"Return the size of the mapped ELF image corresponding to
@var{context}, in bytes."
(bytevector-length (debug-context-image context)))
(define (for-each-elf-symbol context proc)
"Call @var{proc} on each symbol in the symbol table of @var{context}."
(let ((elf (debug-context-elf context)))

View file

@ -120,6 +120,15 @@
;; fixed length
(instruction-length inst))))))
(define (source-for-addr addr)
(and=> (find-source-for-addr addr)
(lambda (source)
;; FIXME: absolute or relative address?
(cons* 0
(source-file source)
(source-line source)
(source-column source)))))
(define (program-sources proc)
(cond
((rtl-program? proc)