1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-10 07:50:24 +02:00

sxml->xml writes directly to a port

* module/sxml/simple.scm: Remove "universal-sxslt-rules" -- it was a bad
  interface, and I couldn't find any users of it.
  (sxml->xml): Rewrite so that instead of generating another tree of
  data, we write the data directly to a port.
This commit is contained in:
Andy Wingo 2010-12-02 17:25:46 +01:00
parent 8bf6cfea71
commit d9fff48e4c

View file

@ -30,41 +30,146 @@
#:use-module (sxml transform) #:use-module (sxml transform)
#:use-module (ice-9 optargs) #:use-module (ice-9 optargs)
#:use-module (srfi srfi-13) #:use-module (srfi srfi-13)
#:export (xml->sxml sxml->xml sxml->string universal-sxslt-rules)) #:export (xml->sxml sxml->xml sxml->string))
(define* (xml->sxml #:optional (port (current-input-port))) (define* (xml->sxml #:optional (port (current-input-port)))
"Use SSAX to parse an XML document into SXML. Takes one optional "Use SSAX to parse an XML document into SXML. Takes one optional
argument, @var{port}, which defaults to the current input port." argument, @var{port}, which defaults to the current input port."
(ssax:xml->sxml port '())) (ssax:xml->sxml port '()))
;; Universal transformation rules. Works for all XML. (define check-name
(define universal-sxslt-rules (let ((*good-cache* (make-hash-table)))
#; (lambda (name)
"A set of @code{pre-post-order} rules that transform any SXML tree (if (not (hashq-ref *good-cache* name))
into a form suitable for XML serialization by @code{(sxml transform)}'s (let* ((str (symbol->string name))
@code{SRV:send-reply}. Used internally by @code{sxml->xml}." (i (string-index str #\:))
`((@ (head (or (and i (substring str 0 i)) str))
((*default* . ,(lambda (attr-key . value) ((enattr attr-key) value)))) (tail (and i (substring str (1+ i)))))
. ,(lambda (trigger . value) (list '@ value))) (and i (string-index (substring str (1+ i)) #\:)
(*TOP* . ,(lambda (tag . xml) xml)) (error "Invalid QName: more than one colon" name))
(*ENTITY* . ,(lambda (tag name) (list "&" name ";"))) (for-each
(*PI* . ,(lambda (pi tag str) (list "<?" tag " " str "?>"))) (lambda (s)
;; Is this right for entities? I don't have a reference for (and s
;; public-id/system-id at the moment... (or (char-alphabetic? (string-ref s 0))
(*default* . ,(lambda (tag . elems) (apply (entag tag) elems))) (eq? (string-ref s 0) #\_)
(*text* . ,(lambda (trigger str) (error "Invalid name starting character" s name))
(if (string? str) (string->escaped-xml str) str))))) (string-for-each
(lambda (c)
(or (char-alphabetic? c) (string-index "0123456789.-_" c)
(error "Invalid name character" c s name)))
s)))
(list head tail))
(hashq-set! *good-cache* name #t))))))
;; The following two functions serialize tags and attributes. They are
;; being used in the node handlers for the post-order function, see
;; below.
(define (attribute-value->xml value port)
(cond
((pair? value)
(attribute-value->xml (car value) port)
(attribute-value->xml (cdr value) port))
((string? value)
(string->escaped-xml value port))
((procedure? value)
(with-output-to-port port value))
(else
(string->escaped-xml
(call-with-output-string (lambda (port) (display value port)))
port))))
(define (attribute->xml attr value port)
(check-name attr)
(display attr port)
(display "=\"" port)
(attribute-value->xml value port)
(display #\" port))
(define (element->xml tag attrs body port)
(check-name tag)
(display #\< port)
(display tag port)
(if attrs
(let lp ((attrs attrs))
(if (pair? attrs)
(let ((attr (car attrs)))
(display #\space port)
(if (pair? attr)
(attribute->xml (car attr) (cdr attr) port)
(error "bad attribute" tag attr))
(lp (cdr attrs)))
(if (not (null? attrs))
(error "bad attributes" tag attrs)))))
(if (pair? body)
(begin
(display #\> port)
(let lp ((body body))
(cond
((pair? body)
(sxml->xml (car body) port)
(lp (cdr body)))
((null? body)
(display "</" port)
(display tag port)
(display ">" port))
(else
(error "bad element body" tag body)))))
(display " />" port)))
;; FIXME: ensure name is valid
(define (entity->xml name port)
(display #\& port)
(display name port)
(display #\; port))
;; FIXME: ensure tag and str are valid
(define (pi->xml tag str port)
(display "<?" port)
(display tag port)
(display #\space port)
(display str port)
(display "?>" port))
(define* (sxml->xml tree #:optional (port (current-output-port))) (define* (sxml->xml tree #:optional (port (current-output-port)))
"Serialize the sxml tree @var{tree} as XML. The output will be written "Serialize the sxml tree @var{tree} as XML. The output will be written
to the current output port, unless the optional argument @var{port} is to the current output port, unless the optional argument @var{port} is
present." present."
(with-output-to-port port (cond
(lambda () ((pair? tree)
(SRV:send-reply (if (symbol? (car tree))
(post-order ;; An element.
tree (let ((tag (car tree)))
universal-sxslt-rules))))) (case tag
((*TOP*)
(sxml->xml (cdr tree) port))
((*ENTITY*)
(if (and (list? (cdr tree)) (= (length (cdr tree)) 1))
(entity->xml (cadr tree) port)
(error "bad *ENTITY* args" (cdr tree))))
((*PI*)
(if (and (list? (cdr tree)) (= (length (cdr tree)) 2))
(pi->xml (cadr tree) (caddr tree) port)
(error "bad *PI* args" (cdr tree))))
(else
(let* ((elems (cdr tree))
(attrs (and (pair? elems) (pair? (car elems))
(eq? '@ (caar elems))
(cdar elems))))
(element->xml tag attrs (if attrs (cdr elems) elems) port)))))
;; A nodelist.
(for-each (lambda (x) (sxml->xml x port)) tree)))
((string? tree)
(string->escaped-xml tree port))
((null? tree) *unspecified*)
((not tree) *unspecified*)
((eqv? tree #t) *unspecified*)
((procedure? tree)
(with-output-to-port port tree))
(else
(string->escaped-xml
(call-with-output-string (lambda (port) (display tree port)))
port))))
(define (sxml->string sxml) (define (sxml->string sxml)
"Detag an sxml tree @var{sxml} into a string. Does not perform any "Detag an sxml tree @var{sxml} into a string. Does not perform any
@ -80,81 +185,34 @@ formatting."
'() '()
sxml))) sxml)))
;; The following two functions serialize tags and attributes. They are
;; being used in the node handlers for the post-order function, see
;; above.
(define (check-name name)
(let* ((str (symbol->string name))
(i (string-index str #\:))
(head (or (and i (substring str 0 i)) str))
(tail (and i (substring str (1+ i)))))
(and i (string-index (substring str (1+ i)) #\:)
(error "Invalid QName: more than one colon" name))
(for-each
(lambda (s)
(and s
(or (char-alphabetic? (string-ref s 0))
(eq? (string-ref s 0) #\_)
(error "Invalid name starting character" s name))
(string-for-each
(lambda (c)
(or (char-alphabetic? c) (string-index "0123456789.-_" c)
(error "Invalid name character" c s name)))
s)))
(list head tail))))
(define (entag tag)
(check-name tag)
(lambda elems
(if (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems)))
(list #\< tag (cdar elems)
(if (pair? (cdr elems))
(list #\> (cdr elems) "</" tag #\>)
" />"))
(list #\< tag
(if (pair? elems)
(list #\> elems "</" tag #\>)
" />")))))
(define (enattr attr-key)
(check-name attr-key)
(let ((attr-str (symbol->string attr-key)))
(lambda (value)
(list #\space attr-str
"=\"" (and (not (null? value)) value) #\"))))
(define (make-char-quotator char-encoding) (define (make-char-quotator char-encoding)
(let ((bad-chars (map car char-encoding))) (let ((bad-chars (list->char-set (map car char-encoding))))
;; Check to see if str contains one of the characters in charset, ;; Check to see if str contains one of the characters in charset,
;; from the position i onward. If so, return that character's index. ;; from the position i onward. If so, return that character's index.
;; otherwise, return #f ;; otherwise, return #f
(define (index-cset str i charset) (define (index-cset str i charset)
(let loop ((i i)) (string-index str charset i))
(and (< i (string-length str))
(if (memv (string-ref str i) charset) i
(loop (+ 1 i))))))
;; The body of the function ;; The body of the function
(lambda (str) (lambda (str port)
(let ((bad-pos (index-cset str 0 bad-chars))) (let ((bad-pos (index-cset str 0 bad-chars)))
(if (not bad-pos) str ; str had all good chars (if (not bad-pos)
(string-concatenate-reverse (display str port) ; str had all good chars
(let loop ((from 0) (to bad-pos) (out '())) (let loop ((from 0) (to bad-pos))
(cond (cond
((>= from (string-length str)) out) ((>= from (string-length str)) *unspecified*)
((not to) ((not to)
(cons (substring str from (string-length str)) out)) (display (substring str from (string-length str)) port))
(else (else
(let ((quoted-char (let ((quoted-char
(cdr (assv (string-ref str to) char-encoding))) (cdr (assv (string-ref str to) char-encoding)))
(new-to (new-to
(index-cset str (+ 1 to) bad-chars))) (index-cset str (+ 1 to) bad-chars)))
(loop (1+ to) new-to (if (< from to)
(if (< from to) (display (substring str from to) port))
(cons* quoted-char (substring str from to) out) (display quoted-char port)
(cons quoted-char out))))))))))))) (loop (1+ to) new-to))))))))))
;; Given a string, check to make sure it does not contain characters ;; Given a string, check to make sure it does not contain characters
;; such as '<' or '&' that require encoding. Return either the original ;; such as '<' or '&' that require encoding. Return either the original