1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 04:10:18 +02:00

split linker out of elf module

* module/Makefile.am:
* module/system/vm/linker.scm: New file, split out of (system vm elf).
  (make-string-table, string-table-intern): Export under their bare
  names, instead of make-elf-string-table and elf-string-table-intern.

* module/system/vm/elf.scm: Remove linking capabilities.
  (string-table-ref): Export.

* module/language/objcode/elf.scm (bytecode->elf): Adapt to use (system
  vm linker).

* test-suite/tests/linker.test: New test.
This commit is contained in:
Andy Wingo 2013-04-17 23:07:04 +02:00
parent f6f4feb0a2
commit 45037e7527
5 changed files with 562 additions and 383 deletions

View file

@ -348,6 +348,7 @@ SYSTEM_SOURCES = \
system/vm/inspect.scm \ system/vm/inspect.scm \
system/vm/coverage.scm \ system/vm/coverage.scm \
system/vm/elf.scm \ system/vm/elf.scm \
system/vm/linker.scm \
system/vm/frame.scm \ system/vm/frame.scm \
system/vm/instruction.scm \ system/vm/instruction.scm \
system/vm/objcode.scm \ system/vm/objcode.scm \

View file

@ -1,6 +1,6 @@
;;; Embedding bytecode in ELF ;;; Embedding bytecode in ELF
;; Copyright (C) 2012 Free Software Foundation, Inc. ;; Copyright (C) 2012, 2013 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -30,24 +30,25 @@
#:use-module (rnrs bytevectors) #:use-module (rnrs bytevectors)
#:use-module (ice-9 binary-ports) #:use-module (ice-9 binary-ports)
#:use-module (system vm elf) #:use-module (system vm elf)
#:use-module (system vm linker)
#:export (write-objcode)) #:export (write-objcode))
(define (bytecode->elf bv) (define (bytecode->elf bv)
(let ((string-table (make-elf-string-table))) (let ((string-table (make-string-table)))
(define (intern-string! string) (define (intern-string! string)
(call-with-values (call-with-values
(lambda () (elf-string-table-intern string-table string)) (lambda () (string-table-intern string-table string))
(lambda (table idx) (lambda (table idx)
(set! string-table table) (set! string-table table)
idx))) idx)))
(define (make-object name bv relocs . kwargs) (define (make-object name bv relocs . kwargs)
(let ((name-idx (intern-string! (symbol->string name)))) (let ((name-idx (intern-string! (symbol->string name))))
(make-elf-object (apply make-elf-section (make-linker-object (apply make-elf-section
#:name name-idx #:name name-idx
#:size (bytevector-length bv) #:size (bytevector-length bv)
kwargs) kwargs)
bv relocs bv relocs
(list (make-elf-symbol name 0))))) (list (make-linker-symbol name 0)))))
(define (make-dynamic-section word-size endianness) (define (make-dynamic-section word-size endianness)
(define (make-dynamic-section/32) (define (make-dynamic-section/32)
(let ((bv (make-bytevector 24 0))) (let ((bv (make-bytevector 24 0)))
@ -57,7 +58,7 @@
(bytevector-u32-set! bv 12 0 endianness) (bytevector-u32-set! bv 12 0 endianness)
(bytevector-u32-set! bv 16 DT_NULL endianness) (bytevector-u32-set! bv 16 DT_NULL endianness)
(bytevector-u32-set! bv 20 0 endianness) (bytevector-u32-set! bv 20 0 endianness)
(values bv (make-elf-reloc 'abs32/1 12 0 '.rtl-text)))) (values bv (make-linker-reloc 'abs32/1 12 0 '.rtl-text))))
(define (make-dynamic-section/64) (define (make-dynamic-section/64)
(let ((bv (make-bytevector 48 0))) (let ((bv (make-bytevector 48 0)))
(bytevector-u64-set! bv 0 DT_GUILE_RTL_VERSION endianness) (bytevector-u64-set! bv 0 DT_GUILE_RTL_VERSION endianness)
@ -66,7 +67,7 @@
(bytevector-u64-set! bv 24 0 endianness) (bytevector-u64-set! bv 24 0 endianness)
(bytevector-u64-set! bv 32 DT_NULL endianness) (bytevector-u64-set! bv 32 DT_NULL endianness)
(bytevector-u64-set! bv 40 0 endianness) (bytevector-u64-set! bv 40 0 endianness)
(values bv (make-elf-reloc 'abs64/1 24 0 '.rtl-text)))) (values bv (make-linker-reloc 'abs64/1 24 0 '.rtl-text))))
(call-with-values (lambda () (call-with-values (lambda ()
(case word-size (case word-size
((4) (make-dynamic-section/32)) ((4) (make-dynamic-section/32))
@ -75,9 +76,9 @@
(lambda (bv reloc) (lambda (bv reloc)
(make-object '.dynamic bv (list reloc) (make-object '.dynamic bv (list reloc)
#:type SHT_DYNAMIC #:flags SHF_ALLOC)))) #:type SHT_DYNAMIC #:flags SHF_ALLOC))))
(define (link-string-table) (define (make-string-table)
(intern-string! ".shstrtab") (intern-string! ".shstrtab")
(make-object '.shstrtab (link-elf-string-table string-table) '() (make-object '.shstrtab (link-string-table string-table) '()
#:type SHT_STRTAB #:flags 0)) #:type SHT_STRTAB #:flags 0))
(let* ((word-size (target-word-size)) (let* ((word-size (target-word-size))
(endianness (target-endianness)) (endianness (target-endianness))
@ -85,7 +86,7 @@
(dt (make-dynamic-section word-size endianness)) (dt (make-dynamic-section word-size endianness))
;; This needs to be linked last, because linking other ;; This needs to be linked last, because linking other
;; sections adds entries to the string table. ;; sections adds entries to the string table.
(shstrtab (link-string-table))) (shstrtab (make-string-table)))
(link-elf (list text dt shstrtab) (link-elf (list text dt shstrtab)
#:endianness endianness #:word-size word-size)))) #:endianness endianness #:word-size word-size))))

View file

@ -1,6 +1,6 @@
;;; Guile ELF reader and writer ;;; Guile ELF reader and writer
;; Copyright (C) 2011, 2012 Free Software Foundation, Inc. ;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -33,12 +33,22 @@
elf-entry elf-phoff elf-shoff elf-flags elf-ehsize elf-entry elf-phoff elf-shoff elf-flags elf-ehsize
elf-phentsize elf-phnum elf-shentsize elf-shnum elf-shstrndx elf-phentsize elf-phnum elf-shentsize elf-shnum elf-shstrndx
elf-header-len write-elf-header
(make-elf-segment* . make-elf-segment) (make-elf-segment* . make-elf-segment)
elf-segment? elf-segment?
elf-segment-type elf-segment-offset elf-segment-vaddr elf-segment-type elf-segment-offset elf-segment-vaddr
elf-segment-paddr elf-segment-filesz elf-segment-memsz elf-segment-paddr elf-segment-filesz elf-segment-memsz
elf-segment-flags elf-segment-align elf-segment-flags elf-segment-align
elf-program-header-len write-elf-program-header
PT_NULL PT_LOAD PT_DYNAMIC PT_INTERP PT_NOTE PT_SHLIB
PT_PHDR PT_TLS PT_NUM PT_LOOS PT_GNU_EH_FRAME PT_GNU_STACK
PT_GNU_RELRO
PF_R PF_W PF_X
(make-elf-section* . make-elf-section) (make-elf-section* . make-elf-section)
elf-section? elf-section?
elf-section-name elf-section-type elf-section-flags elf-section-name elf-section-type elf-section-flags
@ -46,11 +56,15 @@
elf-section-link elf-section-info elf-section-addralign elf-section-link elf-section-info elf-section-addralign
elf-section-entsize elf-section-entsize
elf-section-header-len write-elf-section-header
make-elf-symbol elf-symbol? make-elf-symbol elf-symbol?
elf-symbol-name elf-symbol-value elf-symbol-size elf-symbol-name elf-symbol-value elf-symbol-size
elf-symbol-info elf-symbol-other elf-symbol-shndx elf-symbol-info elf-symbol-other elf-symbol-shndx
elf-symbol-binding elf-symbol-type elf-symbol-visibility elf-symbol-binding elf-symbol-type elf-symbol-visibility
SHN_UNDEF
SHT_NULL SHT_PROGBITS SHT_SYMTAB SHT_STRTAB SHT_RELA SHT_NULL SHT_PROGBITS SHT_SYMTAB SHT_STRTAB SHT_RELA
SHT_HASH SHT_DYNAMIC SHT_NOTE SHT_NOBITS SHT_REL SHT_SHLIB SHT_HASH SHT_DYNAMIC SHT_NOTE SHT_NOBITS SHT_REL SHT_SHLIB
SHT_DYNSYM SHT_INIT_ARRAY SHT_FINI_ARRAY SHT_PREINIT_ARRAY SHT_DYNSYM SHT_INIT_ARRAY SHT_FINI_ARRAY SHT_PREINIT_ARRAY
@ -72,6 +86,8 @@
DT_GUILE_RTL_VERSION DT_HIGUILE DT_LOOS DT_HIOS DT_LOPROC DT_GUILE_RTL_VERSION DT_HIGUILE DT_LOOS DT_HIOS DT_LOPROC
DT_HIPROC DT_HIPROC
string-table-ref
STB_LOCAL STB_GLOBAL STB_WEAK STB_NUM STB_LOOS STB_GNU STB_LOCAL STB_GLOBAL STB_WEAK STB_NUM STB_LOOS STB_GNU
STB_HIOS STB_LOPROC STB_HIPROC STB_HIOS STB_LOPROC STB_HIPROC
@ -89,23 +105,7 @@
elf-symbol-table-ref elf-symbol-table-ref
parse-elf-note parse-elf-note
elf-note-name elf-note-desc elf-note-type elf-note-name elf-note-desc elf-note-type))
(make-string-table . make-elf-string-table)
(string-table-intern . elf-string-table-intern)
(link-string-table . link-elf-string-table)
(make-reloc . make-elf-reloc)
(make-symbol . make-elf-symbol)
(make-object . make-elf-object)
(object? . elf-object?)
(object-section . elf-object-section)
(object-bv . elf-object-bv)
(object-relocs . elf-object-relocs)
(object-symbols . elf-object-symbols)
link-elf))
;; #define EI_NIDENT 16 ;; #define EI_NIDENT 16
@ -902,354 +902,3 @@
(bytevector-copy! bv (+ offset 12) name 0 (1- namesz)) (bytevector-copy! bv (+ offset 12) name 0 (1- namesz))
(bytevector-copy! bv (+ offset 12 namesz) desc 0 descsz) (bytevector-copy! bv (+ offset 12 namesz) desc 0 descsz)
(make-elf-note (utf8->string name) desc type))))) (make-elf-note (utf8->string name) desc type)))))
;;;
;;; All of that was the parser. Now, on to a linker.
;;;
;; A relocation records a reference to a symbol. When the symbol is
;; resolved to an address, the reloc location will be updated to point
;; to the address.
;;
;; Two types. Abs32/1 and Abs64/1 are absolute offsets in bytes.
;; Rel32/4 is a relative signed offset in 32-bit units. Either can have
;; an arbitrary addend as well.
;;
(define-record-type <reloc>
(make-reloc type loc addend symbol)
reloc?
(type reloc-type) ;; rel32/4, abs32/1, abs64/1
(loc reloc-loc)
(addend reloc-addend)
(symbol reloc-symbol))
;; A symbol is an association between a name and an address. The
;; address is always in regard to some particular address space. When
;; objects come into the linker, their symbols live in the object
;; address space. When the objects are allocated into ELF segments, the
;; symbols will be relocated into memory address space, corresponding to
;; the position the ELF will be loaded at.
;;
(define-record-type <symbol>
(make-symbol name address)
symbol?
(name symbol-name)
(address symbol-address))
(define-record-type <object>
(make-object section bv relocs symbols)
object?
(section object-section)
(bv object-bv)
(relocs object-relocs)
(symbols object-symbols))
(define (make-string-table)
'(("" 0 #vu8())))
(define (string-table-length table)
(let ((last (car table)))
;; The + 1 is for the trailing NUL byte.
(+ (cadr last) (bytevector-length (caddr last)) 1)))
(define (string-table-intern table str)
(cond
((assoc str table)
=> (lambda (ent)
(values table (cadr ent))))
(else
(let* ((next (string-table-length table)))
(values (cons (list str next (string->utf8 str))
table)
next)))))
(define (link-string-table table)
(let ((out (make-bytevector (string-table-length table) 0)))
(for-each
(lambda (ent)
(let ((bytes (caddr ent)))
(bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
table)
out))
(define (segment-kind section)
(let ((flags (elf-section-flags section)))
(cons (cond
((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
((zero? (logand SHF_ALLOC flags)) PT_NOTE)
(else PT_LOAD))
(logior (if (zero? (logand SHF_ALLOC flags))
0
PF_R)
(if (zero? (logand SHF_EXECINSTR flags))
0
PF_X)
(if (zero? (logand SHF_WRITE flags))
0
PF_W)))))
(define (group-by-cars ls)
(let lp ((in ls) (k #f) (group #f) (out '()))
(cond
((null? in)
(reverse!
(if group
(cons (cons k (reverse! group)) out)
out)))
((and group (equal? k (caar in)))
(lp (cdr in) k (cons (cdar in) group) out))
(else
(lp (cdr in) (caar in) (list (cdar in))
(if group
(cons (cons k (reverse! group)) out)
out))))))
(define (collate-objects-into-segments objects)
(group-by-cars
(stable-sort!
(map (lambda (o)
(cons (segment-kind (object-section o)) o))
objects)
(lambda (x y)
(let ((x-type (caar x)) (y-type (caar y))
(x-flags (cdar x)) (y-flags (cdar y))
(x-section (object-section (cdr x)))
(y-section (object-section (cdr y))))
(cond
((not (equal? x-flags y-flags))
(< x-flags y-flags))
((not (equal? x-type y-type))
(< x-type y-type))
((not (equal? (elf-section-type x-section)
(elf-section-type y-section)))
(cond
((equal? (elf-section-type x-section) SHT_NOBITS) #t)
((equal? (elf-section-type y-section) SHT_NOBITS) #f)
(else (< (elf-section-type x-section)
(elf-section-type y-section)))))
(else
(< (elf-section-size x-section)
(elf-section-size y-section)))))))))
(define (align address alignment)
(+ address
(modulo (- alignment (modulo address alignment)) alignment)))
(define (fold1 proc ls s0)
(let lp ((ls ls) (s0 s0))
(if (null? ls)
s0
(lp (cdr ls) (proc (car ls) s0)))))
(define (fold2 proc ls s0 s1)
(let lp ((ls ls) (s0 s0) (s1 s1))
(if (null? ls)
(values s0 s1)
(receive (s0 s1) (proc (car ls) s0 s1)
(lp (cdr ls) s0 s1)))))
(define (fold4 proc ls s0 s1 s2 s3)
(let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
(if (null? ls)
(values s0 s1 s2 s3)
(receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
(lp (cdr ls) s0 s1 s2 s3)))))
(define (fold5 proc ls s0 s1 s2 s3 s4)
(let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
(if (null? ls)
(values s0 s1 s2 s3 s4)
(receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
(lp (cdr ls) s0 s1 s2 s3 s4)))))
(define (relocate-section-header sec fileaddr memaddr)
(make-elf-section (elf-section-name sec) (elf-section-type sec)
(elf-section-flags sec) memaddr
fileaddr (elf-section-size sec)
(elf-section-link sec) (elf-section-info sec)
(elf-section-addralign sec) (elf-section-entsize sec)))
(define *page-size* 4096)
;; Adds object symbols to global table, relocating them from object
;; address space to memory address space.
(define (add-symbols symbols offset symtab)
(fold1 (lambda (symbol symtab)
(let ((name (symbol-name symbol))
(addr (symbol-address symbol)))
(vhash-consq name (make-symbol name (+ addr offset)) symtab)))
symbols
symtab))
(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
(let* ((loadable? (not (zero? flags)))
(alignment (fold1 (lambda (o alignment)
(lcm (elf-section-addralign (object-section o))
alignment))
objects
alignment))
(fileaddr (align fileaddr alignment))
(memaddr (align memaddr alignment)))
(receive (objects fileend memend symtab)
(fold4 (lambda (o out fileaddr memaddr symtab)
(let* ((section (object-section o))
(fileaddr
(if (= (elf-section-type section) SHT_NOBITS)
fileaddr
(align fileaddr (elf-section-addralign section))))
(memaddr
(align memaddr (elf-section-addralign section))))
(values
(cons (make-object (relocate-section-header section fileaddr
memaddr)
(object-bv o)
(object-relocs o)
(object-symbols o))
out)
(if (= (elf-section-type section) SHT_NOBITS)
fileaddr
(+ fileaddr (elf-section-size section)))
(+ memaddr (elf-section-size section))
(add-symbols (object-symbols o) memaddr symtab))))
objects '() fileaddr memaddr symtab)
(values
(make-elf-segment* #:type type #:offset fileaddr
#:vaddr (if loadable? memaddr 0)
#:filesz (- fileend fileaddr)
#:memsz (if loadable? (- memend memaddr) 0)
#:flags flags #:align alignment)
(reverse objects)
symtab))))
(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
(let ((ent (vhash-assq (reloc-symbol reloc) symtab)))
(unless ent
(error "Undefined symbol" (reloc-symbol reloc)))
(let* ((file-loc (+ (reloc-loc reloc) file-offset))
(mem-loc (+ (reloc-loc reloc) mem-offset))
(addr (symbol-address (cdr ent))))
(case (reloc-type reloc)
((rel32/4)
(let ((diff (- addr mem-loc)))
(unless (zero? (modulo diff 4))
(error "Bad offset" reloc symbol mem-offset))
(bytevector-s32-set! bv file-loc
(+ (/ diff 4) (reloc-addend reloc))
endianness)))
((abs32/1)
(bytevector-u32-set! bv file-loc addr endianness))
((abs64/1)
(bytevector-u64-set! bv file-loc addr endianness))
(else
(error "bad reloc type" reloc))))))
(define (write-object bv o symtab endianness)
(let* ((section (object-section o))
(offset (elf-section-offset section))
(addr (elf-section-addr section))
(len (elf-section-size section))
(bytes (object-bv o))
(relocs (object-relocs o)))
(if (not (= (elf-section-type section) SHT_NOBITS))
(begin
(if (not (= (elf-section-size section) (bytevector-length bytes)))
(error "unexpected length" section bytes))
(bytevector-copy! bytes 0 bv offset len)
(for-each (lambda (reloc)
(process-reloc reloc bv offset addr symtab endianness))
relocs)))))
(define (compute-sections-by-name seglists)
(let lp ((in (apply append (map cdr seglists)))
(n 1) (out '()) (shstrtab #f))
(if (null? in)
(fold1 (lambda (x tail)
(cond
((false-if-exception
(string-table-ref shstrtab (car x)))
=> (lambda (str) (acons str (cdr x) tail)))
(else tail)))
out '())
(let* ((section (object-section (car in)))
(bv (object-bv (car in)))
(name (elf-section-name section)))
(lp (cdr in) (1+ n) (acons name n out)
(or shstrtab
(and (= (elf-section-type section) SHT_STRTAB)
(equal? (false-if-exception
(string-table-ref bv name))
".shstrtab")
bv)))))))
;; Given a list of section-header/bytevector pairs, collate the sections
;; into segments, allocate the segments, allocate the ELF bytevector,
;; and write the segments into the bytevector, relocating as we go.
;;
(define* (link-elf objects #:key
(page-aligned? #t)
(endianness (target-endianness))
(word-size (target-word-size)))
(let* ((seglists (collate-objects-into-segments objects))
(sections-by-name (compute-sections-by-name seglists))
(nsegments (length seglists))
(nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
(program-headers-offset (elf-header-len word-size))
(fileaddr (+ program-headers-offset
(* nsegments (elf-program-header-len word-size))))
(memaddr 0))
(receive (out fileend memend symtab _)
(fold5
(lambda (x out fileaddr memaddr symtab prev-flags)
(let ((type (caar x))
(flags (cdar x))
(objects (cdr x)))
(receive (segment objects symtab)
(alloc-segment type flags objects fileaddr memaddr symtab
(if (and page-aligned?
(not (= flags prev-flags)))
*page-size*
8))
(values
(cons (cons segment objects) out)
(+ (elf-segment-offset segment) (elf-segment-filesz segment))
(if (zero? (elf-segment-memsz segment))
memaddr
(+ (elf-segment-vaddr segment)
(elf-segment-memsz segment)))
symtab
flags))))
seglists '() fileaddr memaddr vlist-null 0)
(let* ((out (reverse! out))
(section-table-offset (+ (align fileend word-size)))
(fileend (+ section-table-offset
(* nsections (elf-section-header-len word-size))))
(bv (make-bytevector fileend 0)))
(write-elf-header bv #:byte-order endianness #:word-size word-size
#:phoff program-headers-offset #:phnum nsegments
#:shoff section-table-offset #:shnum nsections
#:shstrndx (or (assoc-ref sections-by-name ".shstrtab")
SHN_UNDEF))
(write-elf-section-header bv section-table-offset
endianness word-size
(make-elf-section* #:type SHT_NULL #:flags 0
#:addralign 0))
(fold2 (lambda (x phidx shidx)
(write-elf-program-header
bv (+ program-headers-offset
(* (elf-program-header-len word-size) phidx))
endianness word-size (car x))
(values
(1+ phidx)
(fold1 (lambda (o shidx)
(write-object bv o symtab endianness)
(write-elf-section-header
bv (+ section-table-offset
(* (elf-section-header-len word-size) shidx))
endianness word-size (object-section o))
(1+ shidx))
(cdr x) shidx)))
out 0 1)
bv))))

442
module/system/vm/linker.scm Normal file
View file

@ -0,0 +1,442 @@
;;; Guile ELF linker
;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Commentary:
;;;
;;; A linker combines several linker objects into an executable or a
;;; loadable library.
;;;
;;; There are several common formats for libraries out there. Since
;;; Guile includes its own linker and loader, we are free to choose any
;;; format, or make up our own.
;;;
;;; There are essentially two requirements for a linker format:
;;; libraries should be able to be loaded with the minimal amount of
;;; work; and they should support introspection in some way, in order to
;;; enable good debugging.
;;;
;;; These requirements are somewhat at odds, as loading should not have
;;; to stumble over features related to introspection. It so happens
;;; that a lot of smart people have thought about this situation, and
;;; the ELF format embodies the outcome of their thinking. Guile uses
;;; ELF as its format, regardless of the platform's native library
;;; format. It's not inconceivable that Guile could interoperate with
;;; the native dynamic loader at some point, but it's not a near-term
;;; goal.
;;;
;;; Guile's linker takes a list of objects, sorts them according to
;;; similarity from the perspective of the loader, then writes them out
;;; into one big bytevector in ELF format.
;;;
;;; It is often the case that different parts of a library need to refer
;;; to each other. For example, program text may need to refer to a
;;; constant from writable memory. When the linker places sections
;;; (linker objects) into specific locations in the linked bytevector,
;;; it needs to fix up those references. This process is called
;;; /relocation/. References needing relocations are recorded in
;;; "linker-reloc" objects, and collected in a list in each
;;; "linker-object". The actual definitions of the references are
;;; stored in "linker-symbol" objects, also collected in a list in each
;;; "linker-object".
;;;
;;; By default, the ELF files created by the linker include some padding
;;; so that different parts of the file can be loaded in with different
;;; permissions. For example, some parts of the file are read-only and
;;; thus can be shared between processes. Some parts of the file don't
;;; need to be loaded at all. However this padding can be too much for
;;; interactive compilation, when the code is never written out to disk;
;;; in that case, pass #:page-aligned? #f to `link-elf'.
;;;
;;; Code:
(define-module (system vm linker)
#:use-module (rnrs bytevectors)
#:use-module (system foreign)
#:use-module (system base target)
#:use-module (srfi srfi-9)
#:use-module (ice-9 receive)
#:use-module (ice-9 vlist)
#:use-module (system vm elf)
#:export (make-string-table
string-table-intern
link-string-table
make-linker-reloc
make-linker-symbol
make-linker-object
linker-object?
linker-object-section
linker-object-bv
linker-object-relocs
linker-object-symbols
link-elf))
;; A relocation records a reference to a symbol. When the symbol is
;; resolved to an address, the reloc location will be updated to point
;; to the address.
;;
;; Two types. Abs32/1 and Abs64/1 are absolute offsets in bytes.
;; Rel32/4 is a relative signed offset in 32-bit units. Either can have
;; an arbitrary addend as well.
;;
(define-record-type <linker-reloc>
(make-linker-reloc type loc addend symbol)
linker-reloc?
(type linker-reloc-type) ;; rel32/4, abs32/1, abs64/1
(loc linker-reloc-loc)
(addend linker-reloc-addend)
(symbol linker-reloc-symbol))
;; A symbol is an association between a name and an address. The
;; address is always in regard to some particular address space. When
;; objects come into the linker, their symbols live in the object
;; address space. When the objects are allocated into ELF segments, the
;; symbols will be relocated into memory address space, corresponding to
;; the position the ELF will be loaded at.
;;
(define-record-type <linker-symbol>
(make-linker-symbol name address)
linker-symbol?
(name linker-symbol-name)
(address linker-symbol-address))
(define-record-type <linker-object>
(make-linker-object section bv relocs symbols)
linker-object?
(section linker-object-section)
(bv linker-object-bv)
(relocs linker-object-relocs)
(symbols linker-object-symbols))
(define (make-string-table)
'(("" 0 #vu8())))
(define (string-table-length table)
(let ((last (car table)))
;; The + 1 is for the trailing NUL byte.
(+ (cadr last) (bytevector-length (caddr last)) 1)))
(define (string-table-intern table str)
(cond
((assoc str table)
=> (lambda (ent)
(values table (cadr ent))))
(else
(let* ((next (string-table-length table)))
(values (cons (list str next (string->utf8 str))
table)
next)))))
(define (link-string-table table)
(let ((out (make-bytevector (string-table-length table) 0)))
(for-each
(lambda (ent)
(let ((bytes (caddr ent)))
(bytevector-copy! bytes 0 out (cadr ent) (bytevector-length bytes))))
table)
out))
(define (segment-kind section)
(let ((flags (elf-section-flags section)))
(cons (cond
((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
((zero? (logand SHF_ALLOC flags)) PT_NOTE)
(else PT_LOAD))
(logior (if (zero? (logand SHF_ALLOC flags))
0
PF_R)
(if (zero? (logand SHF_EXECINSTR flags))
0
PF_X)
(if (zero? (logand SHF_WRITE flags))
0
PF_W)))))
(define (group-by-cars ls)
(let lp ((in ls) (k #f) (group #f) (out '()))
(cond
((null? in)
(reverse!
(if group
(cons (cons k (reverse! group)) out)
out)))
((and group (equal? k (caar in)))
(lp (cdr in) k (cons (cdar in) group) out))
(else
(lp (cdr in) (caar in) (list (cdar in))
(if group
(cons (cons k (reverse! group)) out)
out))))))
(define (collate-objects-into-segments objects)
(group-by-cars
(stable-sort!
(map (lambda (o)
(cons (segment-kind (linker-object-section o)) o))
objects)
(lambda (x y)
(let ((x-type (caar x)) (y-type (caar y))
(x-flags (cdar x)) (y-flags (cdar y))
(x-section (linker-object-section (cdr x)))
(y-section (linker-object-section (cdr y))))
(cond
((not (equal? x-flags y-flags))
(< x-flags y-flags))
((not (equal? x-type y-type))
(< x-type y-type))
((not (equal? (elf-section-type x-section)
(elf-section-type y-section)))
(cond
((equal? (elf-section-type x-section) SHT_NOBITS) #t)
((equal? (elf-section-type y-section) SHT_NOBITS) #f)
(else (< (elf-section-type x-section)
(elf-section-type y-section)))))
(else
(< (elf-section-size x-section)
(elf-section-size y-section)))))))))
(define (align address alignment)
(+ address
(modulo (- alignment (modulo address alignment)) alignment)))
(define (fold1 proc ls s0)
(let lp ((ls ls) (s0 s0))
(if (null? ls)
s0
(lp (cdr ls) (proc (car ls) s0)))))
(define (fold2 proc ls s0 s1)
(let lp ((ls ls) (s0 s0) (s1 s1))
(if (null? ls)
(values s0 s1)
(receive (s0 s1) (proc (car ls) s0 s1)
(lp (cdr ls) s0 s1)))))
(define (fold4 proc ls s0 s1 s2 s3)
(let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3))
(if (null? ls)
(values s0 s1 s2 s3)
(receive (s0 s1 s2 s3) (proc (car ls) s0 s1 s2 s3)
(lp (cdr ls) s0 s1 s2 s3)))))
(define (fold5 proc ls s0 s1 s2 s3 s4)
(let lp ((ls ls) (s0 s0) (s1 s1) (s2 s2) (s3 s3) (s4 s4))
(if (null? ls)
(values s0 s1 s2 s3 s4)
(receive (s0 s1 s2 s3 s4) (proc (car ls) s0 s1 s2 s3 s4)
(lp (cdr ls) s0 s1 s2 s3 s4)))))
(define (relocate-section-header sec fileaddr memaddr)
(make-elf-section #:name (elf-section-name sec)
#:type (elf-section-type sec)
#:flags (elf-section-flags sec)
#:addr memaddr
#:offset fileaddr
#:size (elf-section-size sec)
#:link (elf-section-link sec)
#:info (elf-section-info sec)
#:addralign (elf-section-addralign sec)
#:entsize (elf-section-entsize sec)))
(define *page-size* 4096)
;; Adds object symbols to global table, relocating them from object
;; address space to memory address space.
(define (add-symbols symbols offset symtab)
(fold1 (lambda (symbol symtab)
(let ((name (linker-symbol-name symbol))
(addr (linker-symbol-address symbol)))
(when (vhash-assq name symtab)
(error "duplicate symbol" name))
(vhash-consq name (make-linker-symbol name (+ addr offset)) symtab)))
symbols
symtab))
(define (alloc-segment type flags objects fileaddr memaddr symtab alignment)
(let* ((loadable? (not (zero? flags)))
(alignment (fold1 (lambda (o alignment)
(lcm (elf-section-addralign
(linker-object-section o))
alignment))
objects
alignment))
(fileaddr (align fileaddr alignment))
(memaddr (align memaddr alignment)))
(receive (objects fileend memend symtab)
(fold4 (lambda (o out fileaddr memaddr symtab)
(let* ((section (linker-object-section o))
(fileaddr
(if (= (elf-section-type section) SHT_NOBITS)
fileaddr
(align fileaddr (elf-section-addralign section))))
(memaddr
(align memaddr (elf-section-addralign section))))
(values
(cons (make-linker-object
(relocate-section-header section fileaddr
memaddr)
(linker-object-bv o)
(linker-object-relocs o)
(linker-object-symbols o))
out)
(if (= (elf-section-type section) SHT_NOBITS)
fileaddr
(+ fileaddr (elf-section-size section)))
(+ memaddr (elf-section-size section))
(add-symbols (linker-object-symbols o) memaddr symtab))))
objects '() fileaddr memaddr symtab)
(values
(make-elf-segment #:type type #:offset fileaddr
#:vaddr (if loadable? memaddr 0)
#:filesz (- fileend fileaddr)
#:memsz (if loadable? (- memend memaddr) 0)
#:flags flags #:align alignment)
(reverse objects)
symtab))))
(define (process-reloc reloc bv file-offset mem-offset symtab endianness)
(let ((ent (vhash-assq (linker-reloc-symbol reloc) symtab)))
(unless ent
(error "Undefined symbol" (linker-reloc-symbol reloc)))
(let* ((file-loc (+ (linker-reloc-loc reloc) file-offset))
(mem-loc (+ (linker-reloc-loc reloc) mem-offset))
(addr (linker-symbol-address (cdr ent))))
(case (linker-reloc-type reloc)
((rel32/4)
(let ((diff (- addr mem-loc)))
(unless (zero? (modulo diff 4))
(error "Bad offset" reloc symbol mem-offset))
(bytevector-s32-set! bv file-loc
(+ (/ diff 4) (linker-reloc-addend reloc))
endianness)))
((abs32/1)
(bytevector-u32-set! bv file-loc addr endianness))
((abs64/1)
(bytevector-u64-set! bv file-loc addr endianness))
(else
(error "bad reloc type" reloc))))))
(define (write-linker-object bv o symtab endianness)
(let* ((section (linker-object-section o))
(offset (elf-section-offset section))
(addr (elf-section-addr section))
(len (elf-section-size section))
(bytes (linker-object-bv o))
(relocs (linker-object-relocs o)))
(if (not (= (elf-section-type section) SHT_NOBITS))
(begin
(if (not (= (elf-section-size section) (bytevector-length bytes)))
(error "unexpected length" section bytes))
(bytevector-copy! bytes 0 bv offset len)
(for-each (lambda (reloc)
(process-reloc reloc bv offset addr symtab endianness))
relocs)))))
(define (compute-sections-by-name seglists)
(let lp ((in (apply append (map cdr seglists)))
(n 1) (out '()) (shstrtab #f))
(if (null? in)
(fold1 (lambda (x tail)
(cond
((false-if-exception
(string-table-ref shstrtab (car x)))
=> (lambda (str) (acons str (cdr x) tail)))
(else tail)))
out '())
(let* ((section (linker-object-section (car in)))
(bv (linker-object-bv (car in)))
(name (elf-section-name section)))
(lp (cdr in) (1+ n) (acons name n out)
(or shstrtab
(and (= (elf-section-type section) SHT_STRTAB)
(equal? (false-if-exception
(string-table-ref bv name))
".shstrtab")
bv)))))))
;; Given a list of section-header/bytevector pairs, collate the sections
;; into segments, allocate the segments, allocate the ELF bytevector,
;; and write the segments into the bytevector, relocating as we go.
;;
(define* (link-elf objects #:key
(page-aligned? #t)
(endianness (target-endianness))
(word-size (target-word-size)))
(let* ((seglists (collate-objects-into-segments objects))
(sections-by-name (compute-sections-by-name seglists))
(nsegments (length seglists))
(nsections (1+ (length objects))) ;; 1+ for the first reserved entry.
(program-headers-offset (elf-header-len word-size))
(fileaddr (+ program-headers-offset
(* nsegments (elf-program-header-len word-size))))
(memaddr 0))
(receive (out fileend memend symtab _)
(fold5
(lambda (x out fileaddr memaddr symtab prev-flags)
(let ((type (caar x))
(flags (cdar x))
(objects (cdr x)))
(receive (segment objects symtab)
(alloc-segment type flags objects fileaddr memaddr symtab
(if (and page-aligned?
(not (= flags prev-flags)))
*page-size*
8))
(values
(cons (cons segment objects) out)
(+ (elf-segment-offset segment) (elf-segment-filesz segment))
(if (zero? (elf-segment-memsz segment))
memaddr
(+ (elf-segment-vaddr segment)
(elf-segment-memsz segment)))
symtab
flags))))
seglists '() fileaddr memaddr vlist-null 0)
(let* ((out (reverse! out))
(section-table-offset (+ (align fileend word-size)))
(fileend (+ section-table-offset
(* nsections (elf-section-header-len word-size))))
(bv (make-bytevector fileend 0)))
(write-elf-header bv #:byte-order endianness #:word-size word-size
#:phoff program-headers-offset #:phnum nsegments
#:shoff section-table-offset #:shnum nsections
#:shstrndx (or (assoc-ref sections-by-name ".shstrtab")
SHN_UNDEF))
(write-elf-section-header bv section-table-offset
endianness word-size
(make-elf-section #:type SHT_NULL #:flags 0
#:addralign 0))
(fold2 (lambda (x phidx shidx)
(write-elf-program-header
bv (+ program-headers-offset
(* (elf-program-header-len word-size) phidx))
endianness word-size (car x))
(values
(1+ phidx)
(fold1 (lambda (o shidx)
(write-linker-object bv o symtab endianness)
(write-elf-section-header
bv (+ section-table-offset
(* (elf-section-header-len word-size) shidx))
endianness word-size (linker-object-section o))
(1+ shidx))
(cdr x) shidx)))
out 0 1)
bv))))

View file

@ -0,0 +1,86 @@
;;;; linker.test -*- scheme -*-
;;;;
;;;; Copyright 2013 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
(define-module (test-suite test-linker)
#:use-module (test-suite lib)
#:use-module (rnrs bytevectors)
#:use-module (system base target)
#:use-module (system vm elf)
#:use-module (system vm linker))
(define (link-elf-with-one-main-section name bytes)
(let ((string-table (make-string-table)))
(define (intern-string! string)
(call-with-values
(lambda () (string-table-intern string-table string))
(lambda (table idx)
(set! string-table table)
idx)))
(define (make-object name bv relocs . kwargs)
(let ((name-idx (intern-string! (symbol->string name))))
(make-linker-object (apply make-elf-section
#:name name-idx
#:size (bytevector-length bv)
kwargs)
bv relocs
(list (make-linker-symbol name 0)))))
(define (make-string-table)
(intern-string! ".shstrtab")
(make-object '.shstrtab (link-string-table string-table) '()
#:type SHT_STRTAB #:flags 0))
(let* ((word-size (target-word-size))
(endianness (target-endianness))
(sec (make-object name bytes '()))
;; This needs to be linked last, because linking other
;; sections adds entries to the string table.
(shstrtab (make-string-table)))
(link-elf (list sec shstrtab)
#:endianness endianness #:word-size word-size))))
(with-test-prefix "simple"
(define foo-bytes #vu8(0 1 2 3 4 5))
(define bytes #f)
(define elf #f)
(define (bytevectors-equal? bv-a bv-b start-a start-b size)
(or (zero? size)
(and (equal? (bytevector-u8-ref bv-a start-a)
(bytevector-u8-ref bv-b start-b))
(bytevectors-equal? bv-a bv-b (1+ start-a) (1+ start-b)
(1- size)))))
(pass-if "linking succeeds"
(begin
(set! bytes (link-elf-with-one-main-section '.foo foo-bytes))
#t))
(pass-if "parsing succeeds"
(begin
(set! elf (parse-elf bytes))
(elf? elf)))
;; 3 sections: the initial NULL section, .foo, and .shstrtab.
(pass-if-equal 3 (elf-shnum elf))
(pass-if ".foo section checks out"
(let ((sec (assoc-ref (elf-sections-by-name elf) ".foo")))
(and sec
(= (elf-section-size sec) (bytevector-length foo-bytes))
(bytevectors-equal? bytes foo-bytes
(elf-section-offset sec) 0
(bytevector-length foo-bytes))))))