diff --git a/NEWS b/NEWS index 8ed1b8de8..80f1cd4de 100644 --- a/NEWS +++ b/NEWS @@ -19,6 +19,13 @@ Guile's reader conform more closely to the R6RS syntax. In particular: - It enables the `square-brackets', `hungry-eol-escapes' and `r6rs-hex-escapes' reader options. +* Bug fixes + +** Don't replace + with space when splitting and decoding URI paths + +See the documentation for `uri-decode', for more on the new +`#:decode-plus-to-space?' keyword argument. + Changes in 2.0.11 (since 2.0.10): diff --git a/doc/ref/web.texi b/doc/ref/web.texi index 9e6e0fd84..bb478e018 100644 --- a/doc/ref/web.texi +++ b/doc/ref/web.texi @@ -245,7 +245,7 @@ serialization. Declare a default port for the given URI scheme. @end deffn -@deffn {Scheme Procedure} uri-decode str [#:encoding=@code{"utf-8"}] +@deffn {Scheme Procedure} uri-decode str [#:encoding=@code{"utf-8"}] [#:decode-plus-to-space? #t] Percent-decode the given @var{str}, according to @var{encoding}, which should be the name of a character encoding. @@ -262,6 +262,11 @@ decoded bytes are not valid for the given encoding. Pass @code{#f} for @xref{Ports, @code{set-port-encoding!}}, for more information on character encodings. +If @var{decode-plus-to-space?} is true, which is the default, also +replace instances of the plus character @samp{+} with a space character. +This is needed when parsing @code{application/x-www-form-urlencoded} +data. + Returns a string of the decoded characters, or a bytevector if @var{encoding} was @code{#f}. @end deffn diff --git a/module/web/uri.scm b/module/web/uri.scm index 3ab820d14..179618dfd 100644 --- a/module/web/uri.scm +++ b/module/web/uri.scm @@ -304,7 +304,7 @@ serialization." (define hex-chars (string->char-set "0123456789abcdefABCDEF")) -(define* (uri-decode str #:key (encoding "utf-8")) +(define* (uri-decode str #:key (encoding "utf-8") (decode-plus-to-space? #t)) "Percent-decode the given STR, according to ENCODING, which should be the name of a character encoding. @@ -320,6 +320,10 @@ bytes are not valid for the given encoding. Pass ‘#f’ for ENCODING if you want decoded bytes as a bytevector directly. ‘set-port-encoding!’, for more information on character encodings. +If DECODE-PLUS-TO-SPACE? is true, which is the default, also replace +instances of the plus character (+) with a space character. This is +needed when parsing application/x-www-form-urlencoded data. + Returns a string of the decoded characters, or a bytevector if ENCODING was ‘#f’." (let* ((len (string-length str)) @@ -330,7 +334,7 @@ ENCODING was ‘#f’." (if (< i len) (let ((ch (string-ref str i))) (cond - ((eqv? ch #\+) + ((and (eqv? ch #\+) decode-plus-to-space?) (put-u8 port (char->integer #\space)) (lp (1+ i))) ((and (< (+ i 2) len) (eqv? ch #\%) @@ -413,7 +417,8 @@ removing empty components. For example, ‘\"/foo/bar%20baz/\"’ decodes to the two-element list, ‘(\"foo\" \"bar baz\")’." (filter (lambda (x) (not (string-null? x))) - (map uri-decode (string-split path #\/)))) + (map (lambda (s) (uri-decode s #:decode-plus-to-space? #f)) + (string-split path #\/)))) (define (encode-and-join-uri-path parts) "URI-encode each element of PARTS, which should be a list of diff --git a/test-suite/tests/web-uri.test b/test-suite/tests/web-uri.test index 3d14d9d46..e1b6ca3eb 100644 --- a/test-suite/tests/web-uri.test +++ b/test-suite/tests/web-uri.test @@ -255,7 +255,10 @@ (equal? "foo bar" (uri-decode "foo%20bar"))) (pass-if "foo+bar" - (equal? "foo bar" (uri-decode "foo+bar")))) + (equal? "foo bar" (uri-decode "foo+bar"))) + + (pass-if "foo+bar" + (equal? '("foo+bar") (split-and-decode-uri-path "foo+bar")))) (with-test-prefix "encode" (pass-if (equal? "foo%20bar" (uri-encode "foo bar")))