1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-09 23:40:29 +02:00

frame-instruction-pointer is absolute; rewrite (system vm coverage)

* libguile/frames.c (scm_frame_source): Instead of assuming that
  scm_frame_procedure is correct, use the IP to get the source.
  (scm_frame_instruction_pointer): Return an absolute value instead of
  assuming that slot 0 is correct.  (It isn't, when preparing for a tail
  call.)

* libguile/programs.h:
* libguile/programs.c (scm_find_source_for_addr): New internal helper.

* module/system/repl/debug.scm (print-registers): Readably print
  absolute instruction pointers.

* module/system/vm/coverage.scm: Complete rewrite to use absolute IP's.
  We can't assume that frame-procedure is cheap if it is correct, or
  correct if it is cheap.  Anyway using the address is better anyway.
  (coverage-data->lcov): Disable per-function info temporarily.
  (loaded-modules, module-procedures, closest-source-line)
  (closed-over-procedures): Remove these.  Instead of going from
  procedures to source info, now we go from ELF image to source info.

* module/system/vm/debug.scm (debug-context-length): New interface.

* module/system/vm/program.scm (source-for-addr): New internal helper.
This commit is contained in:
Andy Wingo 2013-11-07 23:03:45 +01:00
parent 72b82b0f21
commit 581a4eb82b
7 changed files with 204 additions and 220 deletions

View file

@ -104,18 +104,9 @@ SCM_DEFINE (scm_frame_source, "frame-source", 1, 0, 0,
"") "")
#define FUNC_NAME s_scm_frame_source #define FUNC_NAME s_scm_frame_source
{ {
SCM proc;
SCM_VALIDATE_VM_FRAME (1, frame); SCM_VALIDATE_VM_FRAME (1, frame);
proc = scm_frame_procedure (frame); return scm_find_source_for_addr (scm_frame_instruction_pointer (frame));
if (SCM_PROGRAM_P (proc) || SCM_RTL_PROGRAM_P (proc))
return scm_program_source (scm_frame_procedure (frame),
scm_frame_instruction_pointer (frame),
SCM_UNDEFINED);
return SCM_BOOL_F;
} }
#undef FUNC_NAME #undef FUNC_NAME
@ -254,22 +245,9 @@ SCM_DEFINE (scm_frame_instruction_pointer, "frame-instruction-pointer", 1, 0, 0,
"") "")
#define FUNC_NAME s_scm_frame_instruction_pointer #define FUNC_NAME s_scm_frame_instruction_pointer
{ {
SCM program;
const struct scm_objcode *c_objcode;
SCM_VALIDATE_VM_FRAME (1, frame); SCM_VALIDATE_VM_FRAME (1, frame);
program = scm_frame_procedure (frame);
if (SCM_RTL_PROGRAM_P (program)) return scm_from_uintptr_t ((scm_t_uintptr) SCM_VM_FRAME_IP (frame));
return scm_from_ptrdiff_t (SCM_VM_FRAME_IP (frame) -
(scm_t_uint8 *) SCM_RTL_PROGRAM_CODE (program));
if (!SCM_PROGRAM_P (program))
return SCM_INUM0;
c_objcode = SCM_PROGRAM_DATA (program);
return scm_from_unsigned_integer ((SCM_VM_FRAME_IP (frame)
- SCM_C_OBJCODE_BASE (c_objcode)));
} }
#undef FUNC_NAME #undef FUNC_NAME

View file

@ -398,6 +398,22 @@ scm_i_program_properties (SCM program)
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM
scm_find_source_for_addr (SCM ip)
{
static SCM source_for_addr = SCM_BOOL_F;
if (scm_is_false (source_for_addr)) {
if (!scm_module_system_booted_p)
return SCM_BOOL_F;
source_for_addr =
scm_c_private_variable ("system vm program", "source-for-addr");
}
return scm_call_1 (scm_variable_ref (source_for_addr), ip);
}
SCM SCM
scm_program_source (SCM program, SCM ip, SCM sources) scm_program_source (SCM program, SCM ip, SCM sources)
{ {

View file

@ -51,6 +51,8 @@ SCM_INTERNAL SCM scm_i_rtl_program_name (SCM program);
SCM_INTERNAL SCM scm_i_rtl_program_documentation (SCM program); SCM_INTERNAL SCM scm_i_rtl_program_documentation (SCM program);
SCM_INTERNAL SCM scm_i_rtl_program_properties (SCM program); SCM_INTERNAL SCM scm_i_rtl_program_properties (SCM program);
SCM_INTERNAL SCM scm_find_source_for_addr (SCM ip);
/* /*
* Programs * Programs
*/ */

View file

@ -94,7 +94,12 @@
(format port fmt val)) (format port fmt val))
(format port "~aRegisters:~%" per-line-prefix) (format port "~aRegisters:~%" per-line-prefix)
(print "ip = ~d\n" (frame-instruction-pointer frame)) (print "ip = #x~x" (frame-instruction-pointer frame))
(when (rtl-program? (frame-procedure frame))
(let ((code (rtl-program-code (frame-procedure frame))))
(format port " (#x~x~@d)" code
(- (frame-instruction-pointer frame) code))))
(newline port)
(print "sp = #x~x\n" (frame-stack-pointer frame)) (print "sp = #x~x\n" (frame-stack-pointer frame))
(print "fp = #x~x\n" (frame-address frame))) (print "fp = #x~x\n" (frame-address frame)))

View file

@ -20,10 +20,14 @@
#:use-module (system vm vm) #:use-module (system vm vm)
#:use-module (system vm frame) #:use-module (system vm frame)
#:use-module (system vm program) #:use-module (system vm program)
#:use-module (system vm debug)
#:use-module (ice-9 format)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-9) #:use-module (srfi srfi-9)
#:use-module (srfi srfi-9 gnu)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (srfi srfi-26) #:use-module (srfi srfi-26)
#:use-module (ice-9 match)
#:export (with-code-coverage #:export (with-code-coverage
coverage-data? coverage-data?
instrumented-source-files instrumented-source-files
@ -46,54 +50,20 @@
;;; Gathering coverage data. ;;; Gathering coverage data.
;;; ;;;
(define (hashq-proc proc n)
;; Return the hash of PROC's objcode.
(if (rtl-program? proc)
(hashq (rtl-program-code proc) n)
(hashq (program-objcode proc) n)))
(define (assq-proc proc alist)
;; Instead of really looking for PROC in ALIST, look for the objcode of PROC.
;; IOW the alist is indexed by procedures, not objcodes, but those procedures
;; are taken as an arbitrary representative of all the procedures (closures)
;; sharing that objcode. This can significantly reduce memory consumption.
(if (rtl-program? proc)
(let ((code (rtl-program-code proc)))
(find (lambda (pair)
(let ((proc (car pair)))
(and (rtl-program? proc)
(eqv? code (rtl-program-code proc)))))
alist))
(let ((code (program-objcode proc)))
(find (lambda (pair)
(let ((proc (car pair)))
(and (program? proc)
(eq? code (program-objcode proc)))))
alist))))
(define (with-code-coverage vm thunk) (define (with-code-coverage vm thunk)
"Run THUNK, a zero-argument procedure, using VM; instrument VM to collect code "Run THUNK, a zero-argument procedure, using VM; instrument VM to collect code
coverage data. Return code coverage data and the values returned by THUNK." coverage data. Return code coverage data and the values returned by THUNK."
(define procedure->ip-counts (define ip-counts
;; Mapping from procedures to hash tables; said hash tables map instruction ;; A table mapping instruction pointers to the number of times they were
;; pointers to the number of times they were executed. ;; executed.
(make-hash-table 500)) (make-hash-table 5000))
(define (collect! frame) (define (collect! frame)
;; Update PROCEDURE->IP-COUNTS with info from FRAME. ;; Update IP-COUNTS with info from FRAME.
(let* ((proc (frame-procedure frame)) (let* ((ip (frame-instruction-pointer frame))
(ip (frame-instruction-pointer frame)) (ip-entry (hashv-create-handle! ip-counts ip 0)))
(proc-entry (hashx-create-handle! hashq-proc assq-proc (set-cdr! ip-entry (+ (cdr ip-entry) 1))))
procedure->ip-counts proc #f)))
(let loop ()
(define ip-counts (cdr proc-entry))
(if ip-counts
(let ((ip-entry (hashv-create-handle! ip-counts ip 0)))
(set-cdr! ip-entry (+ (cdr ip-entry) 1)))
(begin
(set-cdr! proc-entry (make-hash-table))
(loop))))))
;; FIXME: It's unclear what the dynamic-wind is for, given that if the ;; FIXME: It's unclear what the dynamic-wind is for, given that if the
;; VM is different from the current one, continuations will not be ;; VM is different from the current one, continuations will not be
@ -111,7 +81,48 @@ coverage data. Return code coverage data and the values returned by THUNK."
(set-vm-trace-level! vm level) (set-vm-trace-level! vm level)
(remove-hook! hook collect!))))) (remove-hook! hook collect!)))))
(lambda args (lambda args
(apply values (make-coverage-data procedure->ip-counts) args)))) (apply values (make-coverage-data ip-counts) args))))
;;;
;;; Source chunks.
;;;
(define-record-type <source-chunk>
(make-source-chunk base length sources)
source-chunk?
(base source-chunk-base)
(length source-chunk-length)
(sources source-chunk-sources))
(set-record-type-printer!
<source-chunk>
(lambda (obj port)
(format port "<source-chunk #x~x-#x~x>"
(source-chunk-base obj)
(+ (source-chunk-base obj) (source-chunk-length obj)))))
(define (compute-source-chunk ctx)
"Build a sorted vector of source information for a given debugging
context (ELF image). The return value is a @code{<source-chunk>}, which also
records the address range to which the source information applies."
(make-source-chunk
(debug-context-base ctx)
(debug-context-length ctx)
;; The source locations are sorted already, but collected in reverse order.
(list->vector (reverse! (fold-source-locations cons '() ctx)))))
(define (all-source-information)
"Build and return a vector of source information corresponding to all
loaded code. The vector will be sorted by ascending address order."
(sort! (list->vector (fold-all-debug-contexts
(lambda (ctx seed)
(cons (compute-source-chunk ctx) seed))
'()))
(lambda (x y)
(< (source-chunk-base x) (source-chunk-base y)))))
;;; ;;;
@ -119,124 +130,137 @@ coverage data. Return code coverage data and the values returned by THUNK."
;;; ;;;
(define-record-type <coverage-data> (define-record-type <coverage-data>
(%make-coverage-data procedure->ip-counts (%make-coverage-data ip-counts
procedure->sources sources
file->procedures file->procedures
file->line-counts) file->line-counts)
coverage-data? coverage-data?
;; Mapping from procedures to hash tables; said hash tables map instruction ;; Mapping from instruction pointers to the number of times they were
;; pointers to the number of times they were executed. ;; executed, as a sorted vector of IP-count pairs.
(procedure->ip-counts data-procedure->ip-counts) (ip-counts data-ip-counts)
;; Mapping from procedures to the result of `program-sources'. ;; Complete source census at the time the coverage analysis was run, as a
(procedure->sources data-procedure->sources) ;; sorted vector of <source-chunk> values.
(sources data-sources)
;; Mapping from source file names to lists of procedures defined in the file. ;; Mapping from source file names to lists of procedures defined in the file.
;; FIXME.
(file->procedures data-file->procedures) (file->procedures data-file->procedures)
;; Mapping from file names to hash tables, which in turn map from line numbers ;; Mapping from file names to hash tables, which in turn map from line numbers
;; to execution counts. ;; to execution counts.
(file->line-counts data-file->line-counts)) (file->line-counts data-file->line-counts))
(set-record-type-printer!
<coverage-data>
(lambda (obj port)
(format port "<coverage-data ~x>" (object-address obj))))
(define (make-coverage-data procedure->ip-counts) (define (make-coverage-data ip-counts)
;; Return a `coverage-data' object based on the coverage data available in ;; Return a `coverage-data' object based on the coverage data available in
;; PROCEDURE->IP-COUNTS. Precompute the other hash tables that make up ;; IP-COUNTS. Precompute the other hash tables that make up `coverage-data'
;; `coverage-data' objects. ;; objects.
(let* ((procedure->sources (make-hash-table 500)) (let* ((all-sources (all-source-information))
(all-counts (sort! (list->vector (hash-fold acons '() ip-counts))
(lambda (x y)
(< (car x) (car y)))))
(file->procedures (make-hash-table 100)) (file->procedures (make-hash-table 100))
(file->line-counts (make-hash-table 100)) (file->line-counts (make-hash-table 100))
(data (%make-coverage-data procedure->ip-counts (data (%make-coverage-data all-counts
procedure->sources all-sources
file->procedures file->procedures
file->line-counts))) file->line-counts)))
(define (increment-execution-count! file line count)
(define (observe-execution-count! file line count)
;; Make the execution count of FILE:LINE the maximum of its current value ;; Make the execution count of FILE:LINE the maximum of its current value
;; and COUNT. This is so that LINE's execution count is correct when ;; and COUNT. This is so that LINE's execution count is correct when
;; several instruction pointers map to LINE. ;; several instruction pointers map to LINE.
(let ((file-entry (hash-create-handle! file->line-counts file #f))) (when file
(if (not (cdr file-entry)) (let ((file-entry (hash-create-handle! file->line-counts file #f)))
(set-cdr! file-entry (make-hash-table 500))) (if (not (cdr file-entry))
(let ((line-entry (hashv-create-handle! (cdr file-entry) line 0))) (set-cdr! file-entry (make-hash-table 500)))
(set-cdr! line-entry (max (cdr line-entry) count))))) (let ((line-entry (hashv-create-handle! (cdr file-entry) line 0)))
(set-cdr! line-entry (max (cdr line-entry) count))))))
;; Update execution counts for procs that were executed. ;; First, visit every known source location and mark it as instrumented but
(hash-for-each (lambda (proc ip-counts) ;; unvisited.
(let* ((sources (program-sources* data proc)) ;;
(file (and (pair? sources) ;; FIXME: This is not always necessary. It's important to have the ability
(source:file (car sources))))) ;; to know when a source location is not reached, but sometimes all we need
(and file ;; to know is that a particular site *was* reached. In that case we
(begin ;; wouldn't need to load up all the DWARF sections. As it is, though, we
;; Add a zero count for all IPs in SOURCES and in ;; use the complete source census as part of the later phase.
;; the sources of procedures closed over by PROC. (let visit-chunk ((chunk-idx 0))
(for-each (when (< chunk-idx (vector-length all-sources))
(lambda (source) (match (vector-ref all-sources chunk-idx)
(let ((file (source:file source)) (($ <source-chunk> base chunk-length chunk-sources)
(line (source:line source))) (let visit-source ((source-idx 0))
(increment-execution-count! file line 0))) (when (< source-idx (vector-length chunk-sources))
(append-map (cut program-sources* data <>) (let ((s (vector-ref chunk-sources source-idx)))
(closed-over-procedures proc))) (observe-execution-count! (source-file s) (source-line s) 0)
(visit-source (1+ source-idx)))))))
(visit-chunk (1+ chunk-idx))))
;; Add the actual execution count collected. ;; Then, visit the measured execution counts, walking the complete source
(hash-for-each ;; census at the same time. This allows us to map observed addresses to
(lambda (ip count) ;; source locations. Record observed execution counts.
(let ((line (closest-source-line sources ip))) (let visit-chunk ((chunk-idx 0) (count-idx 0))
(increment-execution-count! file line count))) (when (< chunk-idx (vector-length all-sources))
ip-counts))))) (match (vector-ref all-sources chunk-idx)
procedure->ip-counts) (($ <source-chunk> base chunk-length chunk-sources)
(let visit-count ((count-idx count-idx) (source-idx 0) (source #f))
;; Set the execution count to zero for procedures loaded and not executed. (when (< count-idx (vector-length all-counts))
;; FIXME: Traversing thousands of procedures here is inefficient. (match (vector-ref all-counts count-idx)
(for-each (lambda (proc) ((ip . count)
(and (not (hashq-ref procedure->sources proc)) (cond
(for-each (lambda (proc) ((< ip base)
(let* ((sources (program-sources* data proc)) ;; Address before chunk base; no corresponding source.
(file (and (pair? sources) (visit-count (1+ count-idx) source-idx source))
(source:file (car sources))))) ((< ip (+ base chunk-length))
(and file ;; Address in chunk; count it.
(for-each (let visit-source ((source-idx source-idx) (source source))
(lambda (ip) (define (finish)
(let ((line (closest-source-line sources ip))) (when source
(increment-execution-count! file line 0))) (observe-execution-count! (source-file source)
(map source:addr sources))))) (source-line source)
(closed-over-procedures proc)))) count))
(append-map module-procedures (loaded-modules))) (visit-count (1+ count-idx) source-idx source))
(cond
((< source-idx (vector-length chunk-sources))
(let ((source* (vector-ref chunk-sources source-idx)))
(if (<= (source-pre-pc source*) ip)
(visit-source (1+ source-idx) source*)
(finish))))
(else
(finish)))))
(else
;; Address past chunk; fetch the next chunk.
(visit-chunk (1+ chunk-idx) count-idx)))))))))))
data)) data))
(define (procedure-execution-count data proc) (define (procedure-execution-count data proc)
"Return the number of times PROC's code was executed, according to DATA, or #f "Return the number of times PROC's code was executed, according to DATA. When
if PROC was not executed. When PROC is a closure, the number of times its code PROC is a closure, the number of times its code was executed is returned, not
was executed is returned, not the number of times this code associated with this the number of times this code associated with this particular closure was
particular closure was executed." executed."
(let ((sources (program-sources* data proc))) (define (binary-search v key val)
(and (pair? sources) (let lp ((start 0) (end (vector-length v)))
(and=> (hashx-ref hashq-proc assq-proc (and (not (eqv? start end))
(data-procedure->ip-counts data) proc) (let* ((idx (floor/ (+ start end) 2))
(lambda (ip-counts) (elt (vector-ref v idx))
;; FIXME: broken with lambda* (val* (key elt)))
(let ((entry-ip (source:addr (car sources)))) (cond
(hashv-ref ip-counts entry-ip 0))))))) ((< val val*)
(lp start idx))
(define (program-sources* data proc) ((< val* val)
;; A memoizing version of `program-sources'. (lp (1+ idx) end))
(or (hashq-ref (data-procedure->sources data) proc) (else elt))))))
(and (or (program? proc) (rtl-program? proc)) (and (rtl-program? proc)
(let ((sources (program-sources proc)) (match (binary-search (data-ip-counts data) car (rtl-program-code proc))
(p->s (data-procedure->sources data)) (#f 0)
(f->p (data-file->procedures data))) ((ip . code) code))))
(if (pair? sources)
(let* ((file (source:file (car sources)))
(entry (hash-create-handle! f->p file '())))
(hashq-set! p->s proc sources)
(set-cdr! entry (cons proc (cdr entry)))
sources)
sources)))))
(define (file-procedures data file)
;; Return the list of globally bound procedures defined in FILE.
(hash-ref (data-file->procedures data) file '()))
(define (instrumented/executed-lines data file) (define (instrumented/executed-lines data file)
"Return the number of instrumented and the number of executed source lines in "Return the number of instrumented and the number of executed source lines in
@ -271,66 +295,6 @@ was loaded at the time DATA was collected."
'() '()
(data-file->line-counts data))) (data-file->line-counts data)))
;;;
;;; Helpers.
;;;
(define (loaded-modules)
;; Return the list of all the modules currently loaded.
(define seen (make-hash-table))
(let loop ((modules (module-submodules (resolve-module '() #f)))
(result '()))
(hash-fold (lambda (name module result)
(if (hashq-ref seen module)
result
(begin
(hashq-set! seen module #t)
(loop (module-submodules module)
(cons module result)))))
result
modules)))
(define (module-procedures module)
;; Return the list of procedures bound globally in MODULE.
(hash-fold (lambda (binding var result)
(if (variable-bound? var)
(let ((value (variable-ref var)))
(if (procedure? value)
(cons value result)
result))
result))
'()
(module-obarray module)))
(define (closest-source-line sources ip)
;; Given SOURCES, as returned by `program-sources' for a given procedure,
;; return the source line of code that is the closest to IP. This is similar
;; to what `program-source' does.
(let loop ((sources sources)
(line (and (pair? sources) (source:line (car sources)))))
(if (null? sources)
line
(let ((source (car sources)))
(if (> (source:addr source) ip)
line
(loop (cdr sources) (source:line source)))))))
(define (closed-over-procedures proc)
;; Return the list of procedures PROC closes over, PROC included.
(let loop ((proc proc)
(result '()))
(if (and (or (program? proc) (rtl-program? proc)) (not (memq proc result)))
(fold loop (cons proc result)
;; FIXME: Include statically nested procedures for RTL
;; programs.
(append (if (program? proc)
(vector->list (or (program-objects proc) #()))
'())
(program-free-variables proc)))
result)))
;;; ;;;
;;; LCOV output. ;;; LCOV output.
@ -342,6 +306,10 @@ was loaded at the time DATA was collected."
The report will include all the modules loaded at the time coverage data was The report will include all the modules loaded at the time coverage data was
gathered, even if their code was not executed." gathered, even if their code was not executed."
;; FIXME: Re-enable this code, but using for-each-elf-symbol on each source
;; chunk. Use that to build a map of file -> proc-addr + line + name. Then
;; use something like procedure-execution-count to get the execution count.
#;
(define (dump-function proc) (define (dump-function proc)
;; Dump source location and basic coverage data for PROC. ;; Dump source location and basic coverage data for PROC.
(and (or (program? proc) (rtl-program? proc)) (and (or (program? proc) (rtl-program? proc))
@ -358,11 +326,11 @@ gathered, even if their code was not executed."
;; Output per-file coverage data. ;; Output per-file coverage data.
(format port "TN:~%") (format port "TN:~%")
(for-each (lambda (file) (for-each (lambda (file)
(let ((procs (file-procedures data file)) (let ((path (search-path %load-path file)))
(path (search-path %load-path file)))
(if (string? path) (if (string? path)
(begin (begin
(format port "SF:~A~%" path) (format port "SF:~A~%" path)
#;
(for-each dump-function procs) (for-each dump-function procs)
(for-each (lambda (line+count) (for-each (lambda (line+count)
(let ((line (car line+count)) (let ((line (car line+count))

View file

@ -35,6 +35,7 @@
#:use-module (srfi srfi-9) #:use-module (srfi srfi-9)
#:export (debug-context-image #:export (debug-context-image
debug-context-base debug-context-base
debug-context-length
debug-context-text-base debug-context-text-base
program-debug-info-name program-debug-info-name
@ -96,6 +97,11 @@
@var{context}." @var{context}."
(elf-bytes (debug-context-elf context))) (elf-bytes (debug-context-elf context)))
(define (debug-context-length context)
"Return the size of the mapped ELF image corresponding to
@var{context}, in bytes."
(bytevector-length (debug-context-image context)))
(define (for-each-elf-symbol context proc) (define (for-each-elf-symbol context proc)
"Call @var{proc} on each symbol in the symbol table of @var{context}." "Call @var{proc} on each symbol in the symbol table of @var{context}."
(let ((elf (debug-context-elf context))) (let ((elf (debug-context-elf context)))

View file

@ -120,6 +120,15 @@
;; fixed length ;; fixed length
(instruction-length inst)))))) (instruction-length inst))))))
(define (source-for-addr addr)
(and=> (find-source-for-addr addr)
(lambda (source)
;; FIXME: absolute or relative address?
(cons* 0
(source-file source)
(source-line source)
(source-column source)))))
(define (program-sources proc) (define (program-sources proc)
(cond (cond
((rtl-program? proc) ((rtl-program? proc)