diff --git a/module/sxml/simple.scm b/module/sxml/simple.scm index 115098c18..be1dc4e02 100644 --- a/module/sxml/simple.scm +++ b/module/sxml/simple.scm @@ -30,41 +30,146 @@ #:use-module (sxml transform) #:use-module (ice-9 optargs) #:use-module (srfi srfi-13) - #:export (xml->sxml sxml->xml sxml->string universal-sxslt-rules)) + #:export (xml->sxml sxml->xml sxml->string)) (define* (xml->sxml #:optional (port (current-input-port))) "Use SSAX to parse an XML document into SXML. Takes one optional argument, @var{port}, which defaults to the current input port." (ssax:xml->sxml port '())) -;; Universal transformation rules. Works for all XML. -(define universal-sxslt-rules - #; - "A set of @code{pre-post-order} rules that transform any SXML tree -into a form suitable for XML serialization by @code{(sxml transform)}'s -@code{SRV:send-reply}. Used internally by @code{sxml->xml}." - `((@ - ((*default* . ,(lambda (attr-key . value) ((enattr attr-key) value)))) - . ,(lambda (trigger . value) (list '@ value))) - (*TOP* . ,(lambda (tag . xml) xml)) - (*ENTITY* . ,(lambda (tag name) (list "&" name ";"))) - (*PI* . ,(lambda (pi tag str) (list ""))) - ;; Is this right for entities? I don't have a reference for - ;; public-id/system-id at the moment... - (*default* . ,(lambda (tag . elems) (apply (entag tag) elems))) - (*text* . ,(lambda (trigger str) - (if (string? str) (string->escaped-xml str) str))))) +(define check-name + (let ((*good-cache* (make-hash-table))) + (lambda (name) + (if (not (hashq-ref *good-cache* name)) + (let* ((str (symbol->string name)) + (i (string-index str #\:)) + (head (or (and i (substring str 0 i)) str)) + (tail (and i (substring str (1+ i))))) + (and i (string-index (substring str (1+ i)) #\:) + (error "Invalid QName: more than one colon" name)) + (for-each + (lambda (s) + (and s + (or (char-alphabetic? (string-ref s 0)) + (eq? (string-ref s 0) #\_) + (error "Invalid name starting character" s name)) + (string-for-each + (lambda (c) + (or (char-alphabetic? c) (string-index "0123456789.-_" c) + (error "Invalid name character" c s name))) + s))) + (list head tail)) + (hashq-set! *good-cache* name #t)))))) + +;; The following two functions serialize tags and attributes. They are +;; being used in the node handlers for the post-order function, see +;; below. + +(define (attribute-value->xml value port) + (cond + ((pair? value) + (attribute-value->xml (car value) port) + (attribute-value->xml (cdr value) port)) + ((string? value) + (string->escaped-xml value port)) + ((procedure? value) + (with-output-to-port port value)) + (else + (string->escaped-xml + (call-with-output-string (lambda (port) (display value port))) + port)))) + +(define (attribute->xml attr value port) + (check-name attr) + (display attr port) + (display "=\"" port) + (attribute-value->xml value port) + (display #\" port)) + +(define (element->xml tag attrs body port) + (check-name tag) + (display #\< port) + (display tag port) + (if attrs + (let lp ((attrs attrs)) + (if (pair? attrs) + (let ((attr (car attrs))) + (display #\space port) + (if (pair? attr) + (attribute->xml (car attr) (cdr attr) port) + (error "bad attribute" tag attr)) + (lp (cdr attrs))) + (if (not (null? attrs)) + (error "bad attributes" tag attrs))))) + (if (pair? body) + (begin + (display #\> port) + (let lp ((body body)) + (cond + ((pair? body) + (sxml->xml (car body) port) + (lp (cdr body))) + ((null? body) + (display "" port)) + (else + (error "bad element body" tag body))))) + (display " />" port))) + +;; FIXME: ensure name is valid +(define (entity->xml name port) + (display #\& port) + (display name port) + (display #\; port)) + +;; FIXME: ensure tag and str are valid +(define (pi->xml tag str port) + (display "" port)) (define* (sxml->xml tree #:optional (port (current-output-port))) "Serialize the sxml tree @var{tree} as XML. The output will be written to the current output port, unless the optional argument @var{port} is present." - (with-output-to-port port - (lambda () - (SRV:send-reply - (post-order - tree - universal-sxslt-rules))))) + (cond + ((pair? tree) + (if (symbol? (car tree)) + ;; An element. + (let ((tag (car tree))) + (case tag + ((*TOP*) + (sxml->xml (cdr tree) port)) + ((*ENTITY*) + (if (and (list? (cdr tree)) (= (length (cdr tree)) 1)) + (entity->xml (cadr tree) port) + (error "bad *ENTITY* args" (cdr tree)))) + ((*PI*) + (if (and (list? (cdr tree)) (= (length (cdr tree)) 2)) + (pi->xml (cadr tree) (caddr tree) port) + (error "bad *PI* args" (cdr tree)))) + (else + (let* ((elems (cdr tree)) + (attrs (and (pair? elems) (pair? (car elems)) + (eq? '@ (caar elems)) + (cdar elems)))) + (element->xml tag attrs (if attrs (cdr elems) elems) port))))) + ;; A nodelist. + (for-each (lambda (x) (sxml->xml x port)) tree))) + ((string? tree) + (string->escaped-xml tree port)) + ((null? tree) *unspecified*) + ((not tree) *unspecified*) + ((eqv? tree #t) *unspecified*) + ((procedure? tree) + (with-output-to-port port tree)) + (else + (string->escaped-xml + (call-with-output-string (lambda (port) (display tree port))) + port)))) (define (sxml->string sxml) "Detag an sxml tree @var{sxml} into a string. Does not perform any @@ -80,81 +185,34 @@ formatting." '() sxml))) -;; The following two functions serialize tags and attributes. They are -;; being used in the node handlers for the post-order function, see -;; above. - -(define (check-name name) - (let* ((str (symbol->string name)) - (i (string-index str #\:)) - (head (or (and i (substring str 0 i)) str)) - (tail (and i (substring str (1+ i))))) - (and i (string-index (substring str (1+ i)) #\:) - (error "Invalid QName: more than one colon" name)) - (for-each - (lambda (s) - (and s - (or (char-alphabetic? (string-ref s 0)) - (eq? (string-ref s 0) #\_) - (error "Invalid name starting character" s name)) - (string-for-each - (lambda (c) - (or (char-alphabetic? c) (string-index "0123456789.-_" c) - (error "Invalid name character" c s name))) - s))) - (list head tail)))) - -(define (entag tag) - (check-name tag) - (lambda elems - (if (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems))) - (list #\< tag (cdar elems) - (if (pair? (cdr elems)) - (list #\> (cdr elems) ") - " />")) - (list #\< tag - (if (pair? elems) - (list #\> elems ") - " />"))))) - -(define (enattr attr-key) - (check-name attr-key) - (let ((attr-str (symbol->string attr-key))) - (lambda (value) - (list #\space attr-str - "=\"" (and (not (null? value)) value) #\")))) - (define (make-char-quotator char-encoding) - (let ((bad-chars (map car char-encoding))) + (let ((bad-chars (list->char-set (map car char-encoding)))) ;; Check to see if str contains one of the characters in charset, ;; from the position i onward. If so, return that character's index. ;; otherwise, return #f (define (index-cset str i charset) - (let loop ((i i)) - (and (< i (string-length str)) - (if (memv (string-ref str i) charset) i - (loop (+ 1 i)))))) - + (string-index str charset i)) + ;; The body of the function - (lambda (str) + (lambda (str port) (let ((bad-pos (index-cset str 0 bad-chars))) - (if (not bad-pos) str ; str had all good chars - (string-concatenate-reverse - (let loop ((from 0) (to bad-pos) (out '())) - (cond - ((>= from (string-length str)) out) - ((not to) - (cons (substring str from (string-length str)) out)) - (else - (let ((quoted-char - (cdr (assv (string-ref str to) char-encoding))) - (new-to - (index-cset str (+ 1 to) bad-chars))) - (loop (1+ to) new-to - (if (< from to) - (cons* quoted-char (substring str from to) out) - (cons quoted-char out))))))))))))) + (if (not bad-pos) + (display str port) ; str had all good chars + (let loop ((from 0) (to bad-pos)) + (cond + ((>= from (string-length str)) *unspecified*) + ((not to) + (display (substring str from (string-length str)) port)) + (else + (let ((quoted-char + (cdr (assv (string-ref str to) char-encoding))) + (new-to + (index-cset str (+ 1 to) bad-chars))) + (if (< from to) + (display (substring str from to) port)) + (display quoted-char port) + (loop (1+ to) new-to)))))))))) ;; Given a string, check to make sure it does not contain characters ;; such as '<' or '&' that require encoding. Return either the original