mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-29 19:30:36 +02:00
These were found with: make GUILE_WARNINGS='-W1 -Wunused-module' * module/ice-9/copy-tree.scm: * module/ice-9/eval-string.scm: * module/ice-9/getopt-long.scm: * module/ice-9/poll.scm: * module/ice-9/popen.scm: * module/ice-9/sandbox.scm: * module/ice-9/threads.scm: * module/sxml/apply-templates.scm: * module/sxml/simple.scm: * module/system/base/types.scm: * module/system/repl/command.scm: * module/system/repl/common.scm: * module/system/repl/coop-server.scm: * module/system/repl/debug.scm: * module/system/repl/error-handling.scm: * module/system/repl/repl.scm: * module/system/repl/server.scm: * module/system/vm/assembler.scm: * module/system/vm/disassembler.scm: * module/system/vm/dwarf.scm: * module/system/vm/elf.scm: * module/system/vm/frame.scm: * module/system/vm/inspect.scm: * module/system/vm/linker.scm: * module/system/vm/program.scm: * module/system/vm/trace.scm: * module/system/vm/trap-state.scm: * module/system/vm/traps.scm: * module/system/xref.scm: * module/texinfo/indexing.scm: * module/texinfo/plain-text.scm: * module/texinfo/reflection.scm: * module/texinfo/string-utils.scm: * module/web/client.scm: * module/web/http.scm: * module/web/request.scm: * module/web/response.scm: Remove imports of unused modules.
407 lines
14 KiB
Scheme
407 lines
14 KiB
Scheme
;;;; (sxml simple) -- a simple interface to the SSAX parser
|
||
;;;;
|
||
;;;; Copyright (C) 2009, 2010, 2013 Free Software Foundation, Inc.
|
||
;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>.
|
||
;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm.
|
||
;;;;
|
||
;;;; This library is free software; you can redistribute it and/or
|
||
;;;; modify it under the terms of the GNU Lesser General Public
|
||
;;;; License as published by the Free Software Foundation; either
|
||
;;;; version 3 of the License, or (at your option) any later version.
|
||
;;;;
|
||
;;;; This library is distributed in the hope that it will be useful,
|
||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
;;;; Lesser General Public License for more details.
|
||
;;;;
|
||
;;;; You should have received a copy of the GNU Lesser General Public
|
||
;;;; License along with this library; if not, write to the Free Software
|
||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||
;;;;
|
||
|
||
;;; Commentary:
|
||
;;
|
||
;;A simple interface to XML parsing and serialization.
|
||
;;
|
||
;;; Code:
|
||
|
||
(define-module (sxml simple)
|
||
#:use-module (sxml ssax input-parse)
|
||
#:use-module (sxml ssax)
|
||
#:use-module (sxml transform)
|
||
#:use-module (ice-9 match)
|
||
#:export (xml->sxml sxml->xml sxml->string))
|
||
|
||
;; Helpers from upstream/SSAX.scm.
|
||
;;
|
||
|
||
; ssax:reverse-collect-str LIST-OF-FRAGS -> LIST-OF-FRAGS
|
||
; given the list of fragments (some of which are text strings)
|
||
; reverse the list and concatenate adjacent text strings.
|
||
; We can prove from the general case below that if LIST-OF-FRAGS
|
||
; has zero or one element, the result of the procedure is equal?
|
||
; to its argument. This fact justifies the shortcut evaluation below.
|
||
(define (ssax:reverse-collect-str fragments)
|
||
(cond
|
||
((null? fragments) '()) ; a shortcut
|
||
((null? (cdr fragments)) fragments) ; see the comment above
|
||
(else
|
||
(let loop ((fragments fragments) (result '()) (strs '()))
|
||
(cond
|
||
((null? fragments)
|
||
(if (null? strs) result
|
||
(cons (string-concatenate/shared strs) result)))
|
||
((string? (car fragments))
|
||
(loop (cdr fragments) result (cons (car fragments) strs)))
|
||
(else
|
||
(loop (cdr fragments)
|
||
(cons
|
||
(car fragments)
|
||
(if (null? strs) result
|
||
(cons (string-concatenate/shared strs) result)))
|
||
'())))))))
|
||
|
||
(define (read-internal-doctype-as-string port)
|
||
(string-concatenate/shared
|
||
(let loop ()
|
||
(let ((fragment
|
||
(next-token '() '(#\]) "reading internal DOCTYPE" port)))
|
||
(if (eqv? #\> (peek-next-char port))
|
||
(begin
|
||
(read-char port)
|
||
(cons fragment '()))
|
||
(cons* fragment "]" (loop)))))))
|
||
|
||
;; Ideas for the future for this interface:
|
||
;;
|
||
;; * Allow doctypes to provide parsed entities
|
||
;;
|
||
;; * Allow validation (the ELEMENTS value from the DOCTYPE handler
|
||
;; below)
|
||
;;
|
||
;; * Parse internal DTDs
|
||
;;
|
||
;; * Parse external DTDs
|
||
;;
|
||
(define* (xml->sxml #:optional (string-or-port (current-input-port)) #:key
|
||
(namespaces '())
|
||
(declare-namespaces? #t)
|
||
(trim-whitespace? #f)
|
||
(entities '())
|
||
(default-entity-handler #f)
|
||
(doctype-handler #f))
|
||
"Use SSAX to parse an XML document into SXML. Takes one optional
|
||
argument, @var{string-or-port}, which defaults to the current input
|
||
port."
|
||
;; NAMESPACES: alist of PREFIX -> URI. Specifies the symbol prefix
|
||
;; that the user wants on elements of a given namespace in the
|
||
;; resulting SXML, regardless of the abbreviated namespaces defined in
|
||
;; the document by xmlns attributes. If DECLARE-NAMESPACES? is true,
|
||
;; these namespaces are treated as if they were declared in the DTD.
|
||
|
||
;; ENTITIES: alist of SYMBOL -> STRING.
|
||
|
||
;; NAMESPACES: list of (DOC-PREFIX . (USER-PREFIX . URI)).
|
||
;; A DOC-PREFIX of #f indicates that it comes from the user.
|
||
;; Otherwise, prefixes are symbols.
|
||
(define (munge-namespaces namespaces)
|
||
(map (lambda (el)
|
||
(match el
|
||
((prefix . uri-string)
|
||
(cons* (and declare-namespaces? prefix)
|
||
prefix
|
||
(ssax:uri-string->symbol uri-string)))))
|
||
namespaces))
|
||
|
||
(define (user-namespaces)
|
||
(munge-namespaces namespaces))
|
||
|
||
(define (user-entities)
|
||
(if (and default-entity-handler
|
||
(not (assq '*DEFAULT* entities)))
|
||
(acons '*DEFAULT* default-entity-handler entities)
|
||
entities))
|
||
|
||
(define (name->sxml name)
|
||
(match name
|
||
((prefix . local-part)
|
||
(symbol-append prefix (string->symbol ":") local-part))
|
||
(_ name)))
|
||
|
||
(define (doctype-continuation seed)
|
||
(lambda* (#:key (entities '()) (namespaces '()))
|
||
(values #f
|
||
(append entities (user-entities))
|
||
(append (munge-namespaces namespaces) (user-namespaces))
|
||
seed)))
|
||
|
||
;; The SEED in this parser is the SXML: initialized to '() at each new
|
||
;; level by the fdown handlers; built in reverse by the fhere parsers;
|
||
;; and reverse-collected by the fup handlers.
|
||
(define parser
|
||
(ssax:make-parser
|
||
NEW-LEVEL-SEED ; fdown
|
||
(lambda (elem-gi attributes namespaces expected-content seed)
|
||
'())
|
||
|
||
FINISH-ELEMENT ; fup
|
||
(lambda (elem-gi attributes namespaces parent-seed seed)
|
||
(let ((seed (if trim-whitespace?
|
||
(ssax:reverse-collect-str-drop-ws seed)
|
||
(ssax:reverse-collect-str seed)))
|
||
(attrs (attlist-fold
|
||
(lambda (attr accum)
|
||
(cons (list (name->sxml (car attr)) (cdr attr))
|
||
accum))
|
||
'() attributes)))
|
||
(acons (name->sxml elem-gi)
|
||
(if (null? attrs)
|
||
seed
|
||
(cons (cons '@ attrs) seed))
|
||
parent-seed)))
|
||
|
||
CHAR-DATA-HANDLER ; fhere
|
||
(lambda (string1 string2 seed)
|
||
(if (string-null? string2)
|
||
(cons string1 seed)
|
||
(cons* string2 string1 seed)))
|
||
|
||
DOCTYPE
|
||
;; -> ELEMS ENTITIES NAMESPACES SEED
|
||
;;
|
||
;; ELEMS is for validation and currently unused.
|
||
;;
|
||
;; ENTITIES is an alist of parsed entities (symbol -> string).
|
||
;;
|
||
;; NAMESPACES is as above.
|
||
;;
|
||
;; SEED builds up the content.
|
||
(lambda (port docname systemid internal-subset? seed)
|
||
(call-with-values
|
||
(lambda ()
|
||
(cond
|
||
(doctype-handler
|
||
(doctype-handler docname systemid
|
||
(and internal-subset?
|
||
(read-internal-doctype-as-string port))))
|
||
(else
|
||
(when internal-subset?
|
||
(ssax:skip-internal-dtd port))
|
||
(values))))
|
||
(doctype-continuation seed)))
|
||
|
||
UNDECL-ROOT
|
||
;; This is like the DOCTYPE handler, but for documents that do not
|
||
;; have a <!DOCTYPE!> entry.
|
||
(lambda (elem-gi seed)
|
||
(call-with-values
|
||
(lambda ()
|
||
(if doctype-handler
|
||
(doctype-handler #f #f #f)
|
||
(values)))
|
||
(doctype-continuation seed)))
|
||
|
||
PI
|
||
((*DEFAULT*
|
||
. (lambda (port pi-tag seed)
|
||
(cons
|
||
(list '*PI* pi-tag (ssax:read-pi-body-as-string port))
|
||
seed))))))
|
||
|
||
(let* ((port (if (string? string-or-port)
|
||
(open-input-string string-or-port)
|
||
string-or-port))
|
||
(elements (reverse (parser port '()))))
|
||
`(*TOP* ,@elements)))
|
||
|
||
(define check-name
|
||
(let ((*good-cache* (make-hash-table)))
|
||
(lambda (name)
|
||
(if (not (hashq-ref *good-cache* name))
|
||
(let* ((str (symbol->string name))
|
||
(i (string-index str #\:))
|
||
(head (or (and i (substring str 0 i)) str))
|
||
(tail (and i (substring str (1+ i)))))
|
||
(and i (string-index (substring str (1+ i)) #\:)
|
||
(error "Invalid QName: more than one colon" name))
|
||
(for-each
|
||
(lambda (s)
|
||
(and s
|
||
(or (char-alphabetic? (string-ref s 0))
|
||
(eq? (string-ref s 0) #\_)
|
||
(error "Invalid name starting character" s name))
|
||
(string-for-each
|
||
(lambda (c)
|
||
(or (char-alphabetic? c) (string-index "0123456789.-_" c)
|
||
(error "Invalid name character" c s name)))
|
||
s)))
|
||
(list head tail))
|
||
(hashq-set! *good-cache* name #t))))))
|
||
|
||
;; The following two functions serialize tags and attributes. They are
|
||
;; being used in the node handlers for the post-order function, see
|
||
;; below.
|
||
|
||
(define (attribute-value->xml value port)
|
||
(cond
|
||
((pair? value)
|
||
(attribute-value->xml (car value) port)
|
||
(attribute-value->xml (cdr value) port))
|
||
((null? value)
|
||
*unspecified*)
|
||
((string? value)
|
||
(string->escaped-xml value port))
|
||
((procedure? value)
|
||
(with-output-to-port port value))
|
||
(else
|
||
(string->escaped-xml
|
||
(call-with-output-string (lambda (port) (display value port)))
|
||
port))))
|
||
|
||
(define (attribute->xml attr value port)
|
||
(check-name attr)
|
||
(display attr port)
|
||
(display "=\"" port)
|
||
(attribute-value->xml value port)
|
||
(display #\" port))
|
||
|
||
(define (element->xml tag attrs body port)
|
||
(check-name tag)
|
||
(display #\< port)
|
||
(display tag port)
|
||
(if attrs
|
||
(let lp ((attrs attrs))
|
||
(if (pair? attrs)
|
||
(let ((attr (car attrs)))
|
||
(display #\space port)
|
||
(if (pair? attr)
|
||
(attribute->xml (car attr) (cdr attr) port)
|
||
(error "bad attribute" tag attr))
|
||
(lp (cdr attrs)))
|
||
(if (not (null? attrs))
|
||
(error "bad attributes" tag attrs)))))
|
||
(if (pair? body)
|
||
(begin
|
||
(display #\> port)
|
||
(let lp ((body body))
|
||
(cond
|
||
((pair? body)
|
||
(sxml->xml (car body) port)
|
||
(lp (cdr body)))
|
||
((null? body)
|
||
(display "</" port)
|
||
(display tag port)
|
||
(display ">" port))
|
||
(else
|
||
(error "bad element body" tag body)))))
|
||
(display " />" port)))
|
||
|
||
;; FIXME: ensure name is valid
|
||
(define (entity->xml name port)
|
||
(display #\& port)
|
||
(display name port)
|
||
(display #\; port))
|
||
|
||
;; FIXME: ensure tag and str are valid
|
||
(define (pi->xml tag str port)
|
||
(display "<?" port)
|
||
(display tag port)
|
||
(display #\space port)
|
||
(display str port)
|
||
(display "?>" port))
|
||
|
||
(define* (sxml->xml tree #:optional (port (current-output-port)))
|
||
"Serialize the sxml tree @var{tree} as XML. The output will be written
|
||
to the current output port, unless the optional argument @var{port} is
|
||
present."
|
||
(cond
|
||
((pair? tree)
|
||
(if (symbol? (car tree))
|
||
;; An element.
|
||
(let ((tag (car tree)))
|
||
(case tag
|
||
((*TOP*)
|
||
(sxml->xml (cdr tree) port))
|
||
((*ENTITY*)
|
||
(if (and (list? (cdr tree)) (= (length (cdr tree)) 1))
|
||
(entity->xml (cadr tree) port)
|
||
(error "bad *ENTITY* args" (cdr tree))))
|
||
((*PI*)
|
||
(if (and (list? (cdr tree)) (= (length (cdr tree)) 2))
|
||
(pi->xml (cadr tree) (caddr tree) port)
|
||
(error "bad *PI* args" (cdr tree))))
|
||
(else
|
||
(let* ((elems (cdr tree))
|
||
(attrs (and (pair? elems) (pair? (car elems))
|
||
(eq? '@ (caar elems))
|
||
(cdar elems))))
|
||
(element->xml tag attrs (if attrs (cdr elems) elems) port)))))
|
||
;; A nodelist.
|
||
(for-each (lambda (x) (sxml->xml x port)) tree)))
|
||
((string? tree)
|
||
(string->escaped-xml tree port))
|
||
((null? tree) *unspecified*)
|
||
((not tree) *unspecified*)
|
||
((eqv? tree #t) *unspecified*)
|
||
((procedure? tree)
|
||
(with-output-to-port port tree))
|
||
(else
|
||
(string->escaped-xml
|
||
(call-with-output-string (lambda (port) (display tree port)))
|
||
port))))
|
||
|
||
(define (sxml->string sxml)
|
||
"Detag an sxml tree @var{sxml} into a string. Does not perform any
|
||
formatting."
|
||
(string-concatenate-reverse
|
||
(foldts
|
||
(lambda (seed tree) ; fdown
|
||
'())
|
||
(lambda (seed kid-seed tree) ; fup
|
||
(append! kid-seed seed))
|
||
(lambda (seed tree) ; fhere
|
||
(if (string? tree) (cons tree seed) seed))
|
||
'()
|
||
sxml)))
|
||
|
||
(define (make-char-quotator char-encoding)
|
||
(let ((bad-chars (list->char-set (map car char-encoding))))
|
||
|
||
;; Check to see if str contains one of the characters in charset,
|
||
;; from the position i onward. If so, return that character's index.
|
||
;; otherwise, return #f
|
||
(define (index-cset str i charset)
|
||
(string-index str charset i))
|
||
|
||
;; The body of the function
|
||
(lambda (str port)
|
||
(let ((bad-pos (index-cset str 0 bad-chars)))
|
||
(if (not bad-pos)
|
||
(display str port) ; str had all good chars
|
||
(let loop ((from 0) (to bad-pos))
|
||
(cond
|
||
((>= from (string-length str)) *unspecified*)
|
||
((not to)
|
||
(display (substring str from (string-length str)) port))
|
||
(else
|
||
(let ((quoted-char
|
||
(cdr (assv (string-ref str to) char-encoding)))
|
||
(new-to
|
||
(index-cset str (+ 1 to) bad-chars)))
|
||
(if (< from to)
|
||
(display (substring str from to) port))
|
||
(display quoted-char port)
|
||
(loop (1+ to) new-to))))))))))
|
||
|
||
;; Given a string, check to make sure it does not contain characters
|
||
;; such as '<' or '&' that require encoding. Return either the original
|
||
;; string, or a list of string fragments with special characters
|
||
;; replaced by appropriate character entities.
|
||
|
||
(define string->escaped-xml
|
||
(make-char-quotator
|
||
'((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """))))
|
||
|
||
;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac
|
||
;;; simple.scm ends here
|
||
|