From 4dbac5e08b13e4aa4ddb40e16034605757057290 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 7 Jan 2013 23:19:10 +0100 Subject: [PATCH 01/12] Fix partial commit of documentation update * doc/ref/api-procedures.texi (Compiled Procedures): Fix partially-committed doc update. --- doc/ref/api-procedures.texi | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/doc/ref/api-procedures.texi b/doc/ref/api-procedures.texi index d77a2bdcc..e749fdc98 100644 --- a/doc/ref/api-procedures.texi +++ b/doc/ref/api-procedures.texi @@ -274,7 +274,9 @@ sense at certain points in the program, delimited by these Return an association list describing the arguments that @var{program} accepts, or @code{#f} if the information cannot be obtained. -For example: +The alist keys that are currently defined are `required', `optional', +`keyword', `allow-other-keys?', and `rest'. For example: + @example (program-arguments-alist (lambda* (a b #:optional c #:key (d 1) #:rest e) @@ -285,17 +287,19 @@ For example: (allow-other-keys? . #f) (rest . d)) @end example +@end deffn -The alist keys that are currently defined are `required', `optional', -`keyword', `allow-other-keys?', and `rest'. +@deffn {Scheme Procedure} program-lambda-list program [ip] +Return a representation of the arguments of @var{program} as a lambda +list, or @code{#f} if this information is not available. -@deffnx {Scheme Procedure} program-lambda-list program [ip] -Accessors for a representation of the arguments of a program, with both -names and types (ie. either required, optional or keywords) +For example: -@code{program-arguments-alist} returns this information in the form of -an association list while @code{program-lambda-list} returns the same -information in a form similar to a lambda definition. +@example +(program-lambda-alist + (lambda* (a b #:optional c #:key (d 1) #:rest e) + #t)) @result{} +@end example @end deffn @node Optional Arguments From ed3e8b8e06adaaa1df5085a0f730d42efa3f5c30 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 7 Jan 2013 23:21:16 +0100 Subject: [PATCH 02/12] http-get: don't shutdown write end of socket * module/web/http.scm ("Connection"): Write the "close" token in lower-case. * module/web/client.scm (http-get): Don't shutdown the writing side of the pipe if we are not doing a keepalive, as this may prevent the request from being sent at all. Prevented http://friendfeed.com/ from being correctly fetched. --- module/web/client.scm | 2 -- module/web/http.scm | 16 +++++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/module/web/client.scm b/module/web/client.scm index 6aedb751e..d3502cc52 100644 --- a/module/web/client.scm +++ b/module/web/client.scm @@ -129,8 +129,6 @@ Otherwise it will be returned as a bytevector." extra-headers))))) (write-request req port) (force-output port) - (if (not keep-alive?) - (shutdown port 1)) (let* ((res (read-response port)) (body (read-response-body res))) (if (not keep-alive?) diff --git a/module/web/http.scm b/module/web/http.scm index 216fddd3e..976f0fb9e 100644 --- a/module/web/http.scm +++ b/module/web/http.scm @@ -1,6 +1,6 @@ ;;; HTTP messages -;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. +;; Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc. ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -1307,9 +1307,19 @@ treated specially, and is just returned as a plain string." ;; Connection = "Connection" ":" 1#(connection-token) ;; connection-token = token ;; e.g. -;; Connection: close, foo-header +;; Connection: close, Foo-Header ;; -(declare-header-list-header! "Connection") +(declare-header! "Connection" + split-header-names + list-of-header-names? + (lambda (val port) + (write-list val port + (lambda (x port) + (display (if (eq? x 'close) + "close" + (header->string x)) + port)) + ", "))) ;; Date = "Date" ":" HTTP-date ;; e.g. From 03a2f59851ff9e9ae751c92c5608ef2a197c4938 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 8 Jan 2013 12:58:05 +0100 Subject: [PATCH 03/12] slight open-pipe* / open-process refactor * libguile/posix.c (scm_open_process): Return the ports as values instead of calling out to Scheme again to make-rw-port. This function is private to (ice-9 popen). * module/ice-9/popen.scm (open-pipe*): Adapt to change. --- libguile/posix.c | 25 ++++--------------------- module/ice-9/popen.scm | 18 ++++++++++++------ 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/libguile/posix.c b/libguile/posix.c index ce64256db..4d5fdcf96 100644 --- a/libguile/posix.c +++ b/libguile/posix.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - * 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -1351,7 +1351,7 @@ scm_open_process (SCM mode, SCM prog, SCM args) if (pid) /* Parent. */ { - SCM read_port = SCM_BOOL_F, write_port = SCM_BOOL_F, port; + SCM read_port = SCM_BOOL_F, write_port = SCM_BOOL_F; /* There is no sense in catching errors on close(). */ if (reading) @@ -1367,25 +1367,8 @@ scm_open_process (SCM mode, SCM prog, SCM args) scm_setvbuf (write_port, scm_from_int (_IONBF), SCM_UNDEFINED); } - if (reading && writing) - { - static SCM make_rw_port = SCM_BOOL_F; - - if (scm_is_false (make_rw_port)) - make_rw_port = scm_c_private_variable ("ice-9 popen", - "make-rw-port"); - - port = scm_call_2 (scm_variable_ref (make_rw_port), - read_port, write_port); - } - else if (reading) - port = read_port; - else if (writing) - port = write_port; - else - port = scm_sys_make_void_port (mode); - - return scm_cons (port, scm_from_int (pid)); + return scm_values + (scm_list_3 (read_port, write_port, scm_from_int (pid))); } /* The child. */ diff --git a/module/ice-9/popen.scm b/module/ice-9/popen.scm index 7ca486805..7d0549eb9 100644 --- a/module/ice-9/popen.scm +++ b/module/ice-9/popen.scm @@ -1,6 +1,6 @@ ;; popen emulation, for non-stdio based ports. -;;;; Copyright (C) 1998, 1999, 2000, 2001, 2003, 2006, 2010, 2011, 2012 Free Software Foundation, Inc. +;;;; Copyright (C) 1998, 1999, 2000, 2001, 2003, 2006, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -49,11 +49,17 @@ A port to the process (based on pipes) is created and returned. @var{mode} specifies whether an input, an output or an input-output port to the process is created: it should be the value of @code{OPEN_READ}, @code{OPEN_WRITE} or @code{OPEN_BOTH}." - (let* ((port/pid (apply open-process mode command args)) - (port (car port/pid))) - (pipe-guardian port) - (hashq-set! port/pid-table port (cdr port/pid)) - port)) + (call-with-values (lambda () + (apply open-process mode command args)) + (lambda (read-port write-port pid) + (let ((port (or (and read-port write-port + (make-rw-port read-port write-port)) + read-port + write-port + (%make-void-port mode)))) + (pipe-guardian port) + (hashq-set! port/pid-table port pid) + port)))) (define (open-pipe command mode) "Executes the shell command @var{command} (a string) in a subprocess. From 921cd222b992f719dc870239bc196688b8d3d507 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 10 Jan 2013 16:01:06 +0100 Subject: [PATCH 04/12] deprecate SCM_CHAR_CODE_LIMIT and char-code-limit * libguile/__scm.h: * libguile/deprecated.h (SCM_CHAR_CODE_LIMIT): Move declaration here from __scm.h. * libguile/feature.c: * module/ice-9/deprecated.scm (char-code-limit): Move definition here. * test-suite/tests/regexp.test: Update to not use char-code-limit. --- doc/guile-api.alist | 1 - libguile/__scm.h | 8 +------- libguile/deprecated.h | 10 +++++++++- libguile/feature.c | 4 +--- module/ice-9/deprecated.scm | 7 ++++++- test-suite/tests/regexp.test | 23 ++++++++++++----------- 6 files changed, 29 insertions(+), 24 deletions(-) diff --git a/doc/guile-api.alist b/doc/guile-api.alist index 5f73cae3a..5830c917a 100644 --- a/doc/guile-api.alist +++ b/doc/guile-api.alist @@ -466,7 +466,6 @@ (char-ci=? (groups Scheme) (scan-data "#")) (char-ci>=? (groups Scheme) (scan-data "#=?>")) (char-ci>? (groups Scheme) (scan-data "#?>")) -(char-code-limit (groups Scheme) (scan-data "")) (char-downcase (groups Scheme) (scan-data "#")) (char-is-both? (groups Scheme) (scan-data "#")) (char-lower-case? (groups Scheme) (scan-data "#")) diff --git a/libguile/__scm.h b/libguile/__scm.h index 1c20bd71a..47ed12b75 100644 --- a/libguile/__scm.h +++ b/libguile/__scm.h @@ -4,7 +4,7 @@ #define SCM___SCM_H /* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2006, - * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -436,12 +436,6 @@ # define SCM_LONG_BIT (SCM_CHAR_BIT * sizeof (long) / sizeof (char)) #endif -#ifdef UCHAR_MAX -# define SCM_CHAR_CODE_LIMIT (UCHAR_MAX + 1L) -#else -# define SCM_CHAR_CODE_LIMIT 256L -#endif - #define SCM_I_UTYPE_MAX(type) ((type)-1) #define SCM_I_TYPE_MAX(type,umax) ((type)((umax)/2)) #define SCM_I_TYPE_MIN(type,umax) (-((type)((umax)/2))-1) diff --git a/libguile/deprecated.h b/libguile/deprecated.h index ae0891f88..de85c6f4e 100644 --- a/libguile/deprecated.h +++ b/libguile/deprecated.h @@ -5,7 +5,7 @@ #ifndef SCM_DEPRECATED_H #define SCM_DEPRECATED_H -/* Copyright (C) 2003,2004, 2005, 2006, 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. +/* Copyright (C) 2003,2004, 2005, 2006, 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -839,6 +839,14 @@ SCM_DEPRECATED SCM scm_struct_vtable_tag (SCM handle); +#ifdef UCHAR_MAX +# define SCM_CHAR_CODE_LIMIT (UCHAR_MAX + 1L) +#else +# define SCM_CHAR_CODE_LIMIT 256L +#endif + + + void scm_i_init_deprecated (void); #endif diff --git a/libguile/feature.c b/libguile/feature.c index f3bddc788..464697508 100644 --- a/libguile/feature.c +++ b/libguile/feature.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - * 2006, 2007, 2009, 2011 Free Software Foundation, Inc. + * 2006, 2007, 2009, 2011, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -123,8 +123,6 @@ scm_init_feature() scm_add_feature ("threads"); #endif - scm_c_define ("char-code-limit", scm_from_int (SCM_CHAR_CODE_LIMIT)); - #include "libguile/feature.x" } diff --git a/module/ice-9/deprecated.scm b/module/ice-9/deprecated.scm index 9d80cfe65..3d401934b 100644 --- a/module/ice-9/deprecated.scm +++ b/module/ice-9/deprecated.scm @@ -70,7 +70,8 @@ read-hash-procedures process-define-module fluid-let-syntax - set-system-module!)) + set-system-module! + char-code-limit)) ;;;; Deprecated definitions. @@ -896,3 +897,7 @@ it.") (issue-deprecation-warning "`module-eval-closure' is deprecated. Use module-variable or module-define! instead.") (standard-eval-closure m))) + +;; Legacy definition. We can't make it identifier-syntax yet though, +;; because compiled code might rely on it. +(define char-code-limit 256) diff --git a/test-suite/tests/regexp.test b/test-suite/tests/regexp.test index d549df2cb..b5c59f030 100644 --- a/test-suite/tests/regexp.test +++ b/test-suite/tests/regexp.test @@ -2,7 +2,7 @@ ;;;; Jim Blandy --- September 1999 ;;;; ;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010, -;;;; 2012 Free Software Foundation, Inc. +;;;; 2012, 2013 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -171,10 +171,10 @@ (let ((lst `((regexp/basic ,regexp/basic) (regexp/extended ,regexp/extended))) - ;; string of all characters, except #\nul which doesn't work because - ;; it's the usual end-of-string for the underlying C regexec() - (allchars (list->string (map integer->char - (cdr (iota char-code-limit)))))) + ;; String of all latin-1 characters, except #\nul which doesn't + ;; work because it's the usual end-of-string for the underlying + ;; C regexec(). + (allchars (list->string (map integer->char (cdr (iota 256)))))) (for-each (lambda (elem) (let ((name (car elem)) @@ -182,9 +182,9 @@ (with-test-prefix name - ;; try on each individual character, except #\nul + ;; Try on each individual latin-1 character, except #\nul. (do ((i 1 (1+ i))) - ((>= i char-code-limit)) + ((>= i 256)) (let* ((c (integer->char i)) (s (string c))) (pass-if (list "char" i (format #f "~s ~s" c s)) @@ -194,11 +194,12 @@ (and (= 0 (match:start m)) (= 1 (match:end m)))))))) - ;; try on pattern "aX" where X is each character, except #\nul - ;; this exposes things like "?" which are special only when they - ;; follow a pattern to repeat or whatever ("a" in this case) + ;; Try on pattern "aX" where X is each latin-1 character, + ;; except #\nul. This exposes things like "?" which are + ;; special only when they follow a pattern to repeat or + ;; whatever ("a" in this case). (do ((i 1 (1+ i))) - ((>= i char-code-limit)) + ((>= i 256)) (let* ((c (integer->char i)) (s (string #\a c)) (q (with-unicode (regexp-quote s)))) From b194b59fa10574868f7b1663a1f2d447baa18c5e Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 10 Jan 2013 17:30:38 +0100 Subject: [PATCH 05/12] fix ice-9/slib * module/ice-9/slib.scm: Change to just load up slib.init directly. The recently submitted patch to slib-discuss and guile-user should make this work correctly. --- module/ice-9/slib.scm | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/module/ice-9/slib.scm b/module/ice-9/slib.scm index 78c734e2a..766418010 100644 --- a/module/ice-9/slib.scm +++ b/module/ice-9/slib.scm @@ -1,6 +1,6 @@ ;;;; slib.scm --- definitions needed to get SLIB to work with Guile ;;;; -;;;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software Foundation, Inc. +;;;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2013 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -16,27 +16,18 @@ ;;;; License along with this library; if not, write to the Free Software ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;;;; -(define-module (ice-9 slib) - :export (slib:load slib:load-source defmacro:load - implementation-vicinity library-vicinity home-vicinity - scheme-implementation-type scheme-implementation-version - output-port-width output-port-height array-indexes - make-random-state - -1+ ? >=? - require slib:error slib:exit slib:warn slib:eval - defmacro:eval logical:logand logical:logior logical:logxor - logical:lognot logical:ash logical:logcount logical:integer-length - logical:bit-extract logical:integer-expt logical:ipow-by-squaring - slib:eval-load slib:tab slib:form-feed difftime offset-time - software-type) - :no-backtrace) - -;; Initialize SLIB. -(load-from-path "slib/guile.init") +;;; Look for slib.init in the $datadir, in /usr/share, and finally in +;;; the load path. It's not usually in the load path on common distros, +;;; but it could be if the user put it there. The init file takes care +;;; of defining the module. -;; SLIB redefines a few core symbols based on their default definition. -;; Thus, we only replace them at this point so that their previous definition -;; is visible when `guile.init' is loaded. -(module-replace! (current-module) - '(delete-file open-file provide provided? system)) +(let ((try-load (lambda (dir) + (let ((init (string-append dir "/slib/guile.init"))) + (and (file-exists? init) + (begin + (load init) + #t)))))) + (or (try-load (assq-ref %guile-build-info 'datadir)) + (try-load "/usr/share") + (load-from-path "slib/guile.init"))) From f05bb8494c9636cd7a44aaf7d4e08f4b66004b6e Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 10 Jan 2013 22:50:27 +0100 Subject: [PATCH 06/12] add bytevector->string and string->bytevector in new (ice-9 iconv) module * module/Makefile.am: * module/ice-9/iconv.scm: New module implementing procedures to encode and decode representations of strings as bytes. * test-suite/Makefile.am: * test-suite/tests/iconv.test: Add tests. * doc/ref/api-data.texi: Add docs. --- doc/ref/api-data.texi | 80 +++++++++++++++++++++++-- module/Makefile.am | 3 +- module/ice-9/iconv.scm | 82 +++++++++++++++++++++++++ test-suite/Makefile.am | 3 +- test-suite/tests/iconv.test | 115 ++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+), 6 deletions(-) create mode 100644 module/ice-9/iconv.scm create mode 100644 test-suite/tests/iconv.test diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 6d8de2bd6..3bd38d28b 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -1,6 +1,6 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012 +@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 @c Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @@ -2881,6 +2881,7 @@ Guile provides all procedures of SRFI-13 and a few more. * Reversing and Appending Strings:: Appending strings to form a new string. * Mapping Folding and Unfolding:: Iterating over strings. * Miscellaneous String Operations:: Replicating, insertion, parsing, ... +* Representing Strings as Bytes:: Encoding and decoding strings. * Conversion to/from C:: * String Internals:: The storage strategy for strings. @end menu @@ -4163,6 +4164,70 @@ a predicate, if it is a character, it is tested for equality and if it is a character set, it is tested for membership. @end deffn +@node Representing Strings as Bytes +@subsubsection Representing Strings as Bytes + +Out in the cold world outside of Guile, not all strings are treated in +the same way. Out there there are only bytes, and there are many ways +of representing a strings (sequences of characters) as binary data +(sequences of bytes). + +As a user, usually you don't have to think about this very much. When +you type on your keyboard, your system encodes your keystrokes as bytes +according to the locale that you have configured on your computer. +Guile uses the locale to decode those bytes back into characters -- +hopefully the same characters that you typed in. + +All is not so clear when dealing with a system with multiple users, such +as a web server. Your web server might get a request from one user for +data encoded in the ISO-8859-1 character set, and then another request +from a different user for UTF-8 data. + +@cindex iconv +@cindex character encoding +Guile provides an @dfn{iconv} module for converting between strings and +sequences of bytes. @xref{Bytevectors}, for more on how Guile +represents raw byte sequences. This module gets its name from the +common @sc{unix} command of the same name. + +Unlike the rest of the procedures in this section, you have to load the +@code{iconv} module before having access to these procedures: + +@example +(use-modules (ice-9 iconv)) +@end example + +@deffn string->bytevector string encoding [#:conversion-strategy='error] +Encode @var{string} as a sequence of bytes. + +The string will be encoded in the character set specified by the +@var{encoding} string. If the string has characters that cannot be +represented in the encoding, by default this procedure raises an +@code{encoding-error}, though the @code{#:conversion-strategy} keyword +can specify other behaviors. + +The return value is a bytevector. @xref{Bytevectors}, for more on +bytevectors. @xref{Ports}, for more on character encodings and +conversion strategies. +@end deffn + +@deffn bytevector->string bytevector encoding +Decode @var{bytevector} into a string. + +The bytes will be decoded from the character set by the @var{encoding} +string. If the bytes do not form a valid encoding, by default this +procedure raises an @code{decoding-error}, though that may be overridden +with the @code{#:conversion-strategy} keyword. @xref{Ports}, for more +on character encodings and conversion strategies. +@end deffn + +@deffn call-with-output-encoded-string encoding proc [#:conversion-strategy='error] +Like @code{call-with-output-string}, but instead of returning a string, +returns a encoding of the string according to @var{encoding}, as a +bytevector. This procedure can be more efficient than collecting a +string and then converting it via @code{string->bytevector}. +@end deffn + @node Conversion to/from C @subsubsection Conversion to/from C @@ -4172,9 +4237,9 @@ important. In C, a string is just a sequence of bytes, and the character encoding describes the relation between these bytes and the actual characters -that make up the string. For Scheme strings, character encoding is -not an issue (most of the time), since in Scheme you never get to see -the bytes, only the characters. +that make up the string. For Scheme strings, character encoding is not +an issue (most of the time), since in Scheme you usually treat strings +as character sequences, not byte sequences. Converting to C and converting from C each have their own challenges. @@ -4305,6 +4370,9 @@ into @var{encoding}. If @var{lenp} is @code{NULL}, this function will return a null-terminated C string. It will throw an error if the string contains a null character. + +The Scheme interface to this function is @code{encode-string}, from the +@code{ice-9 iconv} module. @xref{Representing Strings as Bytes}. @end deftypefn @deftypefn {C Function} SCM scm_from_stringn (const char *str, size_t len, const char *encoding, scm_t_string_failed_conversion_handler handler) @@ -4313,6 +4381,9 @@ length in bytes of the C string is input as @var{len}. The encoding of the C string is passed as the ASCII, null-terminated C string @code{encoding}. The @var{handler} parameters suggests a strategy for dealing with unconvertable characters. + +The Scheme interface to this function is @code{decode-string}. +@xref{Representing Strings as Bytes}. @end deftypefn The following conversion functions are provided as a convenience for the @@ -4810,6 +4881,7 @@ the host's native endianness. Bytevector contents can also be interpreted as Unicode strings encoded in one of the most commonly available encoding formats. +@xref{Representing Strings as Bytes}, for a more generic interface. @lisp (utf8->string (u8-list->bytevector '(99 97 102 101))) diff --git a/module/Makefile.am b/module/Makefile.am index 3d3eae364..472bc4838 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -1,6 +1,6 @@ ## Process this file with automake to produce Makefile.in. ## -## Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc. +## Copyright (C) 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. ## ## This file is part of GUILE. ## @@ -210,6 +210,7 @@ ICE_9_SOURCES = \ ice-9/getopt-long.scm \ ice-9/hcons.scm \ ice-9/i18n.scm \ + ice-9/iconv.scm \ ice-9/lineio.scm \ ice-9/ls.scm \ ice-9/mapping.scm \ diff --git a/module/ice-9/iconv.scm b/module/ice-9/iconv.scm new file mode 100644 index 000000000..40d595473 --- /dev/null +++ b/module/ice-9/iconv.scm @@ -0,0 +1,82 @@ +;;; Encoding and decoding byte representations of strings + +;; Copyright (C) 2013 Free Software Foundation, Inc. + +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +;;; Code: + +(define-module (ice-9 iconv) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 binary-ports) + #:use-module ((ice-9 rdelim) #:select (read-delimited)) + #:export (string->bytevector + bytevector->string + call-with-encoded-output-string)) + +;; Like call-with-output-string, but actually closes the port. +(define (call-with-output-string* proc) + (let ((port (open-output-string))) + (proc port) + (let ((str (get-output-string port))) + (close-port port) + str))) + +(define (call-with-output-bytevector* proc) + (call-with-values (lambda () (open-bytevector-output-port)) + (lambda (port get-bytevector) + (proc port) + (let ((bv (get-bytevector))) + (close-port port) + bv)))) + +(define* (call-with-encoded-output-string encoding proc + #:key (conversion-strategy 'error)) + (if (string-ci=? encoding "utf-8") + ;; I don't know why, but this appears to be faster; at least for + ;; serving examples/debug-sxml.scm (1464 reqs/s versus 850 + ;; reqs/s). + (string->utf8 (call-with-output-string* proc)) + (call-with-output-bytevector* + (lambda (port) + (set-port-encoding! port encoding) + (if conversion-strategy + (set-port-conversion-strategy! port conversion-strategy)) + (proc port))))) + +;; TODO: Provide C implementations that call scm_from_stringn and +;; friends? + +(define* (string->bytevector str encoding #:key (conversion-strategy 'error)) + (if (string-ci=? encoding "utf-8") + (string->utf8 str) + (call-with-encoded-output-string + encoding + (lambda (port) + (display str port)) + #:conversion-strategy conversion-strategy))) + +(define* (bytevector->string bv encoding #:key (conversion-strategy 'error)) + (if (string-ci=? encoding "utf-8") + (utf8->string bv) + (let ((p (open-bytevector-input-port bv))) + (set-port-encoding! p encoding) + (if conversion-strategy + (set-port-conversion-strategy! p conversion-strategy)) + (let ((res (read-delimited "" p))) + (close-port p) + (if (eof-object? res) + "" + res))))) diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am index a843fcd39..880e1e2cf 100644 --- a/test-suite/Makefile.am +++ b/test-suite/Makefile.am @@ -1,7 +1,7 @@ ## Process this file with automake to produce Makefile.in. ## ## Copyright 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, -## 2010, 2011, 2012 Software Foundation, Inc. +## 2010, 2011, 2012, 2013 Software Foundation, Inc. ## ## This file is part of GUILE. ## @@ -62,6 +62,7 @@ SCM_TESTS = tests/00-initial-env.test \ tests/hash.test \ tests/hooks.test \ tests/i18n.test \ + tests/iconv.test \ tests/import.test \ tests/interp.test \ tests/keywords.test \ diff --git a/test-suite/tests/iconv.test b/test-suite/tests/iconv.test new file mode 100644 index 000000000..e6ee90d1d --- /dev/null +++ b/test-suite/tests/iconv.test @@ -0,0 +1,115 @@ +;;;; iconv.test --- Exercise the iconv API. -*- coding: utf-8; mode: scheme; -*- +;;;; +;;;; Copyright (C) 2013 Free Software Foundation, Inc. +;;;; Andy Wingo +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +(define-module (test-suite iconv) + #:use-module (ice-9 iconv) + #:use-module (rnrs bytevectors) + #:use-module (test-suite lib)) + + +(define exception:encoding-error + '(encoding-error . "")) + +(define exception:decoding-error + '(decoding-error . "")) + + +(with-test-prefix "ascii string" + (let ((s "Hello, World!")) + ;; For ASCII, all of these encodings should be the same. + + (pass-if "to ascii bytevector" + (equal? (string->bytevector s "ASCII") + #vu8(72 101 108 108 111 44 32 87 111 114 108 100 33))) + + (pass-if "to ascii bytevector (length check)" + (equal? (string-length s) + (bytevector-length (string->bytevector s "ascii")))) + + (pass-if "from ascii bytevector" + (equal? s + (bytevector->string (string->bytevector s "ascii") "ascii"))) + + (pass-if "to utf-8 bytevector" + (equal? (string->bytevector s "ASCII") + (string->bytevector s "utf-8"))) + + (pass-if "to UTF-8 bytevector (testing encoding case sensitivity)" + (equal? (string->bytevector s "ascii") + (string->bytevector s "UTF-8"))) + + (pass-if "from utf-8 bytevector" + (equal? s + (bytevector->string (string->bytevector s "utf-8") "utf-8"))) + + (pass-if "to latin1 bytevector" + (equal? (string->bytevector s "ASCII") + (string->bytevector s "latin1"))) + + (pass-if "from latin1 bytevector" + (equal? s + (bytevector->string (string->bytevector s "utf-8") "utf-8"))))) + +(with-test-prefix "narrow non-ascii string" + (let ((s "été")) + (pass-if "to latin1 bytevector" + (equal? (string->bytevector s "latin1") + #vu8(233 116 233))) + + (pass-if "to latin1 bytevector (length check)" + (equal? (string-length s) + (bytevector-length (string->bytevector s "latin1")))) + + (pass-if "from latin1 bytevector" + (equal? s + (bytevector->string (string->bytevector s "latin1") "latin1"))) + + (pass-if "to utf-8 bytevector" + (equal? (string->bytevector s "utf-8") + #vu8(195 169 116 195 169))) + + (pass-if "from utf-8 bytevector" + (equal? s + (bytevector->string (string->bytevector s "utf-8") "utf-8"))) + + (pass-if-exception "encode latin1 as ascii" exception:encoding-error + (string->bytevector s "ascii")) + + (pass-if-exception "misparse latin1 as utf8" exception:decoding-error + (bytevector->string (string->bytevector s "latin1") "utf-8")) + + (pass-if-exception "misparse latin1 as ascii" exception:decoding-error + (bytevector->string (string->bytevector s "latin1") "ascii")))) + + +(with-test-prefix "wide non-ascii string" + (let ((s "ΧΑΟΣ")) + (pass-if "to utf-8 bytevector" + (equal? (string->bytevector s "utf-8") + #vu8(206 167 206 145 206 159 206 163) )) + + (pass-if "from utf-8 bytevector" + (equal? s + (bytevector->string (string->bytevector s "utf-8") "utf-8"))) + + (pass-if-exception "encode as ascii" exception:encoding-error + (string->bytevector s "ascii")) + + (pass-if-exception "encode as latin1" exception:encoding-error + (string->bytevector s "latin1")))) From 8cb9a30c17827bc875516b2abedee36a05f886e0 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 11 Jan 2013 11:10:24 +0100 Subject: [PATCH 07/12] (web server) uses (ice-9 iconv) * module/web/server.scm (sanitize-response): Use the procedures from (ice-9 iconv) to encode the response. --- module/web/server.scm | 40 +++------------------------------------- 1 file changed, 3 insertions(+), 37 deletions(-) diff --git a/module/web/server.scm b/module/web/server.scm index 23f344e56..54ab9e335 100644 --- a/module/web/server.scm +++ b/module/web/server.scm @@ -1,6 +1,6 @@ ;;; Web server -;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. +;; Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc. ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -80,6 +80,7 @@ #:use-module (web response) #:use-module (system repl error-handling) #:use-module (ice-9 control) + #:use-module (ice-9 iconv) #:export (define-server-impl lookup-server-impl open-server @@ -162,41 +163,6 @@ values." #:on-error (if (batch-mode?) 'backtrace 'debug) #:post-error (lambda _ (values #f #f #f)))) -;; like call-with-output-string, but actually closes the port (doh) -(define (call-with-output-string* proc) - (let ((port (open-output-string))) - (proc port) - (let ((str (get-output-string port))) - (close-port port) - str))) - -(define (call-with-output-bytevector* proc) - (call-with-values - (lambda () - (open-bytevector-output-port)) - (lambda (port get-bytevector) - (proc port) - (let ((bv (get-bytevector))) - (close-port port) - bv)))) - -(define (call-with-encoded-output-string charset proc) - (if (string-ci=? charset "utf-8") - ;; I don't know why, but this appears to be faster; at least for - ;; examples/debug-sxml.scm (1464 reqs/s versus 850 reqs/s). - (string->utf8 (call-with-output-string* proc)) - (call-with-output-bytevector* - (lambda (port) - (set-port-encoding! port charset) - (proc port))))) - -(define (encode-string str charset) - (if (string-ci=? charset "utf-8") - (string->utf8 str) - (call-with-encoded-output-string charset - (lambda (port) - (display str port))))) - (define (extend-response r k v . additional) (let ((r (build-response #:version (response-version r) #:code (response-code r) @@ -249,7 +215,7 @@ on the procedure being called at any particular time." response (extend-response response 'content-type `(,@type (charset . ,charset)))) - (encode-string body charset)))) + (string->bytevector body charset)))) ((procedure? body) (let* ((type (response-content-type response '(text/plain))) From 67e5ab8ac64dffe814e1ea3b08eeab679899b924 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 11 Jan 2013 11:10:43 +0100 Subject: [PATCH 08/12] remove (web http) TODO list * module/web/http.scm: Remove outdated TODO list. --- module/web/http.scm | 7 ------- 1 file changed, 7 deletions(-) diff --git a/module/web/http.scm b/module/web/http.scm index 976f0fb9e..c79d57d78 100644 --- a/module/web/http.scm +++ b/module/web/http.scm @@ -69,13 +69,6 @@ make-chunked-output-port)) -;;; TODO -;;; -;;; Look at quality lists with more insight. -;;; Think about `accept' a bit more. -;;; - - (define (string->header name) "Parse NAME to a symbolic header name." (string->symbol (string-downcase name))) From 2ac3c0a590ec93f40b2c1ce34bd24b83f1ae1a5d Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 11 Jan 2013 11:30:29 +0100 Subject: [PATCH 09/12] read-response-body always returns bytevector or #f * module/web/response.scm (read-response-body): Fix to always return either a bytevector or #f. Previously, reading a 0-length body could return the EOF object. --- module/web/response.scm | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/module/web/response.scm b/module/web/response.scm index 5ca727409..7e14f4dc0 100644 --- a/module/web/response.scm +++ b/module/web/response.scm @@ -1,6 +1,6 @@ ;;; HTTP response objects -;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. +;; Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc. ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -294,7 +294,13 @@ response port." (define (read-response-body r) "Reads the response body from R, as a bytevector. Returns ‘#f’ if there was no response body." - (and=> (response-body-port r #:decode? #f) get-bytevector-all)) + (let ((body (and=> (response-body-port r #:decode? #f) + get-bytevector-all))) + ;; Reading a body of length 0 will result in get-bytevector-all + ;; returning the EOF object. + (if (eof-object? body) + #vu8() + body))) (define (write-response-body r bv) "Write BV, a bytevector, to the port corresponding to the HTTP From 990b11c53f8da2a6c14e1190bc4e76939db32d07 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 11 Jan 2013 11:15:28 +0100 Subject: [PATCH 10/12] Add http-post, http-put, et cetera * module/web/client.scm (ensure-uri): New helper. (open-socket-for-uri): Accept a URI as a string or as a URI object. (extend-request, sanitize-request): New helpers, like the corresponding functions in (web server). (decode-response-body): Add a reference to the HTTP/1.1 spec, and use (ice-9 iconv). (request): New helper, factoring all aspects of sending an HTTP request and getting a response. (http-get): Redefine in terms of http-get. Deprecate the #:extra-headers argument in favor of #:headers. Allow a body. Add a #:streaming? argument, subsuming the functionality of http-get*. (http-get*): Deprecate. (http-head, http-post, http-put, http-delete, http-trace) (http-options): Define interfaces for all HTTP verbs. * test-suite/tests/web-client.test: Add tests. * doc/ref/web.texi: Update documentation. Thanks to Gregory Benison for the initial patch. --- doc/ref/web.texi | 66 +++- module/web/client.scm | 323 ++++++++++++++--- test-suite/tests/web-client.test | 577 +++++++++++++++++++++++++++++++ 3 files changed, 894 insertions(+), 72 deletions(-) create mode 100644 test-suite/tests/web-client.test diff --git a/doc/ref/web.texi b/doc/ref/web.texi index e892453e3..0f69089d0 100644 --- a/doc/ref/web.texi +++ b/doc/ref/web.texi @@ -1,6 +1,6 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. +@c Copyright (C) 2010, 2011, 2012, 2013 Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @node Web @@ -1388,23 +1388,59 @@ the lower-level HTTP, request, and response modules. Return an open input/output port for a connection to URI. @end deffn -@deffn {Scheme Procedure} http-get uri [#:port=(open-socket-for-uri uri)] [#:version='(1 . 1)] [#:keep-alive?=#f] [#:extra-headers='()] [#:decode-body?=#t] -Connect to the server corresponding to @var{uri} and ask for the -resource, using the @code{GET} method. If you already have a port open, -pass it as @var{port}. The port will be closed at the end of the -request unless @var{keep-alive?} is true. Any extra headers in the -alist @var{extra-headers} will be added to the request. +@deffn {Scheme Procedure} http-get uri arg... +@deffnx {Scheme Procedure} http-head uri arg... +@deffnx {Scheme Procedure} http-post uri arg... +@deffnx {Scheme Procedure} http-put uri arg... +@deffnx {Scheme Procedure} http-delete uri arg... +@deffnx {Scheme Procedure} http-trace uri arg... +@deffnx {Scheme Procedure} http-options uri arg... + +Connect to the server corresponding to @var{uri} and make a request over +HTTP, using the appropriate method (@code{GET}, @code{HEAD}, etc.). + +All of these procedures have the same prototype: a URI followed by an +optional sequence of keyword arguments. These keyword arguments allow +you to modify the requests in various ways, for example attaching a body +to the request, or setting specific headers. The following table lists +the keyword arguments and their default values. + +@table @code +@item #:body #f +@item #:port (open-socket-for-uri @var{uri})] +@item #:version '(1 . 1) +@item #:keep-alive? #f +@item #:headers '() +@item #:decode-body? #t +@item #:streaming? #f +@end table + +If you already have a port open, pass it as @var{port}. Otherwise, a +connection will be opened to the server corresponding to @var{uri}. Any +extra headers in the alist @var{headers} will be added to the request. + +If @var{body} is not #f, a message body will also be sent with the HTTP +request. If @var{body} is a string, it is encoded according to the +content-type in @var{headers}, defaulting to UTF-8. Otherwise +@var{body} should be a bytevector, or @code{#f} for no body. Although a +message body may be sent with any request, usually only @code{POST} and +@code{PUT} requests have bodies. If @var{decode-body?} is true, as is the default, the body of the response will be decoded to string, if it is a textual content-type. Otherwise it will be returned as a bytevector. -@end deffn -@deffn {Scheme Procedure} http-get* uri [#:port=(open-socket-for-uri uri)] [#:version='(1 . 1)] [#:keep-alive?=#f] [#:extra-headers='()] [#:decode-body?=#t] -Like @code{http-get}, but return an input port from which to read. When -@var{decode-body?} is true, as is the default, the returned port has its -encoding set appropriately if the data at @var{uri} is textual. Closing the -returned port closes @var{port}, unless @var{keep-alive?} is true. +However, if @var{streaming?} is true, instead of eagerly reading the +response body from the server, this function only reads off the headers. +The response body will be returned as a port on which the data may be +read. + +Unless @var{keep-alive?} is true, the port will be closed after the full +response body has been read. + +Returns two values: the response read from the server, and the response +body as a string, bytevector, #f value, or as a port (if +@var{streaming?} is true). @end deffn @code{http-get} is useful for making one-off requests to web sites. If @@ -1415,10 +1451,6 @@ fetcher, similar in structure to the web server (@pxref{Web Server}). Another option, good but not as performant, would be to use threads, possibly via par-map or futures. -More helper procedures for the other common HTTP verbs would be a good -addition to this module. Send your code to -@email{guile-user@@gnu.org}. - @node Web Server @subsection Web Server diff --git a/module/web/client.scm b/module/web/client.scm index d3502cc52..ce93cd841 100644 --- a/module/web/client.scm +++ b/module/web/client.scm @@ -1,6 +1,6 @@ ;;; Web client -;; Copyright (C) 2011, 2012 Free Software Foundation, Inc. +;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -34,6 +34,7 @@ (define-module (web client) #:use-module (rnrs bytevectors) #:use-module (ice-9 binary-ports) + #:use-module (ice-9 iconv) #:use-module (ice-9 rdelim) #:use-module (web request) #:use-module (web response) @@ -41,10 +42,23 @@ #:use-module (srfi srfi-1) #:export (open-socket-for-uri http-get - http-get*)) + http-get* + http-head + http-post + http-put + http-delete + http-trace + http-options)) -(define (open-socket-for-uri uri) +(define (ensure-uri uri-or-string) + (cond + ((string? uri-or-string) (string->uri uri-or-string)) + ((uri? uri-or-string) uri-or-string) + (else (error "Invalid URI" uri-or-string)))) + +(define (open-socket-for-uri uri-or-string) "Return an open input/output port for a connection to URI." + (define uri (ensure-uri uri-or-string)) (define addresses (let ((port (uri-port uri))) (delete-duplicates @@ -78,17 +92,79 @@ (apply throw args) (loop (cdr addresses)))))))) -(define (decode-string bv encoding) - (if (string-ci=? encoding "utf-8") - (utf8->string bv) - (let ((p (open-bytevector-input-port bv))) - (set-port-encoding! p encoding) - (let ((res (read-delimited "" p))) - (close-port p) - res)))) +(define (extend-request r k v . additional) + (let ((r (build-request (request-uri r) #:version (request-version r) + #:headers + (assoc-set! (copy-tree (request-headers r)) + k v) + #:port (request-port r)))) + (if (null? additional) + r + (apply extend-request r additional)))) + +;; -> request body +(define (sanitize-request request body) + "\"Sanitize\" the given request and body, ensuring that they are +complete and coherent. This method is most useful for methods that send +data to the server, like POST, but can be used for any method. Return +two values: a request and a bytevector, possibly the same ones that were +passed as arguments. + +If BODY is a string, encodes the string to a bytevector, in an encoding +appropriate for REQUEST. Adds a ‘content-length’ and ‘content-type’ +header, as necessary. + +If BODY is a procedure, it is called with a port as an argument, and the +output collected as a bytevector. In the future we might try to instead +use a compressing, chunk-encoded port, and call this procedure later. +Authors are advised not to rely on the procedure being called at any +particular time. + +Note that we rely on the request itself already having been validated, +as is the case by default with a request returned by `build-request'." + (cond + ((not body) + (let ((length (request-content-length request))) + (if length + (unless (zero? length) + (error "content-length, but no body")) + (when (assq 'transfer-encoding (request-headers request)) + (error "transfer-encoding not allowed with no body"))) + (values request #vu8()))) + ((string? body) + (let* ((type (request-content-type request '(text/plain))) + (declared-charset (assq-ref (cdr type) 'charset)) + (charset (or declared-charset "utf-8"))) + (sanitize-request + (if declared-charset + request + (extend-request request 'content-type + `(,@type (charset . ,charset)))) + (string->bytevector body charset)))) + ((procedure? body) + (let* ((type (request-content-type request + '(text/plain))) + (declared-charset (assq-ref (cdr type) 'charset)) + (charset (or declared-charset "utf-8"))) + (sanitize-request + (if declared-charset + request + (extend-request request 'content-type + `(,@type (charset . ,charset)))) + (call-with-encoded-output-string charset body)))) + ((not (bytevector? body)) + (error "unexpected body type")) + (else + (values (let ((rlen (request-content-length request)) + (blen (bytevector-length body))) + (cond + (rlen (if (= rlen blen) + request + (error "bad content-length" rlen blen))) + ((zero? blen) request) + (else (extend-request request 'content-length blen)))) + body)))) -;; Logically the inverse of (web server)'s `sanitize-response'. -;; (define (decode-response-body response body) ;; `body' is either #f or a bytevector. (cond @@ -103,59 +179,196 @@ => (lambda (type) (cond ((text-content-type? (car type)) - (decode-string body (or (assq-ref (cdr type) 'charset) - "iso-8859-1"))) + ;; RFC 2616 3.7.1: "When no explicit charset parameter is + ;; provided by the sender, media subtypes of the "text" + ;; type are defined to have a default charset value of + ;; "ISO-8859-1" when received via HTTP." + (bytevector->string body (or (assq-ref (cdr type) 'charset) + "iso-8859-1"))) (else body)))) (else body)))) (else (error "unexpected body type" body)))) -(define* (http-get uri #:key (port (open-socket-for-uri uri)) - (version '(1 . 1)) (keep-alive? #f) (extra-headers '()) - (decode-body? #t)) +;; We could expose this to user code if there is demand. +(define* (request uri #:key + (body #f) + (port (open-socket-for-uri uri)) + (method "GET") + (version '(1 . 1)) + (keep-alive? #f) + (headers '()) + (decode-body? #t) + (streaming? #f) + (request + (build-request + (ensure-uri uri) + #:method method + #:version version + #:headers (if keep-alive? + headers + (cons '(connection close) headers)) + #:port port))) + (call-with-values (lambda () (sanitize-request request body)) + (lambda (request body) + (let ((request (write-request request port))) + (when body + (write-request-body request body)) + (force-output (request-port request)) + (let ((response (read-response port))) + (cond + ((equal? (request-method request) "HEAD") + (unless keep-alive? + (close-port port)) + (values response #f)) + (streaming? + (values response + (response-body-port response + #:keep-alive? keep-alive? + #:decode? decode-body?))) + (else + (let ((body (read-response-body response))) + (unless keep-alive? + (close-port port)) + (values response + (if decode-body? + (decode-response-body response body) + body)))))))))) + +(define* (http-get uri #:key + (body #f) + (port (open-socket-for-uri uri)) + (version '(1 . 1)) (keep-alive? #f) + ;; #:headers is the new name of #:extra-headers. + (extra-headers #f) (headers (or extra-headers '())) + (decode-body? #t) (streaming? #f)) "Connect to the server corresponding to URI and ask for the resource, using the ‘GET’ method. If you already have a port open, pass it as PORT. The port will be closed at the end of the request unless KEEP-ALIVE? is true. Any extra headers in the -alist EXTRA-HEADERS will be added to the request. +alist HEADERS will be added to the request. + +If BODY is not #f, a message body will also be sent with the HTTP +request. If BODY is a string, it is encoded according to the +content-type in HEADERS, defaulting to UTF-8. Otherwise BODY should be +a bytevector, or #f for no body. Although it's allowed to send a +message body along with any request, usually only POST and PUT requests +have bodies. See ‘http-put’ and ‘http-post’ documentation, for more. If DECODE-BODY? is true, as is the default, the body of the response will be decoded to string, if it is a textual content-type. -Otherwise it will be returned as a bytevector." - (let ((req (build-request uri #:version version - #:headers (if keep-alive? - extra-headers - (cons '(connection close) - extra-headers))))) - (write-request req port) - (force-output port) - (let* ((res (read-response port)) - (body (read-response-body res))) - (if (not keep-alive?) - (close-port port)) - (values res - (if decode-body? - (decode-response-body res body) - body))))) +Otherwise it will be returned as a bytevector. -(define* (http-get* uri #:key (port (open-socket-for-uri uri)) - (version '(1 . 1)) (keep-alive? #f) (extra-headers '()) +However, if STREAMING? is true, instead of eagerly reading the response +body from the server, this function only reads off the headers. The +response body will be returned as a port on which the data may be read. +Unless KEEP-ALIVE? is true, the port will be closed after the full +response body has been read. + +Returns two values: the response read from the server, and the response +body as a string, bytevector, #f value, or as a port (if STREAMING? is +true)." + (when extra-headers + (issue-deprecation-warning + "The #:extra-headers argument to http-get has been renamed to #:headers. " + "Please update your code.")) + (request uri #:method "GET" #:body body + #:port port #:version version #:keep-alive? keep-alive? + #:headers headers #:decode-body? decode-body? + #:streaming? streaming?)) + +(define* (http-get* uri #:key + (body #f) + (port (open-socket-for-uri uri)) + (version '(1 . 1)) (keep-alive? #f) + ;; #:headers is the new name of #:extra-headers. + (extra-headers #f) (headers (or extra-headers '())) (decode-body? #t)) - "Like ‘http-get’, but return an input port from which to read. When -DECODE-BODY? is true, as is the default, the returned port has its -encoding set appropriately if the data at URI is textual. Closing the -returned port closes PORT, unless KEEP-ALIVE? is true." - (let ((req (build-request uri #:version version - #:headers (if keep-alive? - extra-headers - (cons '(connection close) - extra-headers))))) - (write-request req port) - (force-output port) - (unless keep-alive? - (shutdown port 1)) - (let* ((res (read-response port)) - (body (response-body-port res - #:keep-alive? keep-alive? - #:decode? decode-body?))) - (values res body)))) + "Deprecated in favor of (http-get #:streaming? #t)." + (when extra-headers + (issue-deprecation-warning + "`http-get*' has been deprecated. " + "Instead, use `http-get' with the #:streaming? #t keyword argument.")) + (http-get uri #:body body + #:port port #:version version #:keep-alive? keep-alive? + #:headers headers #:decode-body? #t #:streaming? #t)) + +(define-syntax-rule (define-http-verb http-verb method doc) + (define* (http-verb uri #:key + (body #f) + (port (open-socket-for-uri uri)) + (version '(1 . 1)) + (keep-alive? #f) + (headers '()) + (decode-body? #t) + (streaming? #f)) + doc + (request uri + #:body body #:method method + #:port port #:version version #:keep-alive? keep-alive? + #:headers headers #:decode-body? decode-body? + #:streaming? streaming?))) + +(define-http-verb http-head + "HEAD" + "Fetch message headers for the given URI using the HTTP \"HEAD\" +method. + +This function is similar to ‘http-get’, except it uses the \"HEAD\" +method. See ‘http-get’ for full documentation on the various keyword +arguments that are accepted by this function. + +Returns two values: the resulting response, and #f. Responses to HEAD +requests do not have a body. The second value is only returned so that +other procedures can treat all of the http-foo verbs identically.") + +(define-http-verb http-post + "POST" + "Post data to the given URI using the HTTP \"POST\" method. + +This function is similar to ‘http-get’, except it uses the \"POST\" +method. See ‘http-get’ for full documentation on the various keyword +arguments that are accepted by this function. + +Returns two values: the resulting response, and the response body.") + +(define-http-verb http-put + "PUT" + "Put data at the given URI using the HTTP \"PUT\" method. + +This function is similar to ‘http-get’, except it uses the \"PUT\" +method. See ‘http-get’ for full documentation on the various keyword +arguments that are accepted by this function. + +Returns two values: the resulting response, and the response body.") + +(define-http-verb http-delete + "DELETE" + "Delete data at the given URI using the HTTP \"DELETE\" method. + +This function is similar to ‘http-get’, except it uses the \"DELETE\" +method. See ‘http-get’ for full documentation on the various keyword +arguments that are accepted by this function. + +Returns two values: the resulting response, and the response body.") + +(define-http-verb http-trace + "TRACE" + "Send an HTTP \"TRACE\" request. + +This function is similar to ‘http-get’, except it uses the \"TRACE\" +method. See ‘http-get’ for full documentation on the various keyword +arguments that are accepted by this function. + +Returns two values: the resulting response, and the response body.") + +(define-http-verb http-options + "OPTIONS" + "Query characteristics of an HTTP resource using the HTTP \"OPTIONS\" +method. + +This function is similar to ‘http-get’, except it uses the \"OPTIONS\" +method. See ‘http-get’ for full documentation on the various keyword +arguments that are accepted by this function. + +Returns two values: the resulting response, and the response body.") diff --git a/test-suite/tests/web-client.test b/test-suite/tests/web-client.test new file mode 100644 index 000000000..3133b73c8 --- /dev/null +++ b/test-suite/tests/web-client.test @@ -0,0 +1,577 @@ +;;;; web-client.test --- HTTP client -*- mode: scheme; coding: utf-8; -*- +;;;; +;;;; Copyright (C) 2013 Free Software Foundation, Inc. +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +(define-module (test-suite web-client) + #:use-module (web client) + #:use-module (web request) + #:use-module (web response) + #:use-module (ice-9 iconv) + #:use-module (ice-9 binary-ports) + #:use-module (test-suite lib)) + + +(define get-request-headers:www.gnu.org/software/guile/ + "GET /software/guile/ HTTP/1.1 +Host: www.gnu.org +Connection: close + +") + +(define get-response-headers:www.gnu.org/software/guile/ + "HTTP/1.1 200 OK +Date: Fri, 11 Jan 2013 10:59:11 GMT +Server: Apache/2.2.14 +Accept-Ranges: bytes +Cache-Control: max-age=0 +Expires: Fri, 11 Jan 2013 10:59:11 GMT +Vary: Accept-Encoding +Content-Length: 8077 +Connection: close +Content-Type: text/html +Content-Language: en + +") + +(define get-response-body:www.gnu.org/software/guile/ + " + + + GNU Guile (About Guile) + + + + + + + + + + + + + + + + +
+\t +\t \"Guile\" +\t + +\t

The GNU extension language

+\t

About Guile

+
+
+ + + + + + + + + +
+\t +\t +\t + +\t +\t +\t + +\t +\t + + + + + +\t +\t
+\t

About Guile
+\t\tWhat is Guile?
+\t\tNews
+\t\tCommunity
+\t

+\t +\t

Documentation
+\t\tManuals
+\t\tFAQ's
+\t

+ +\t

Download
+\t\tReleases
+\t\tRepository
+\t\tSnapshots
+\t

+ +\t

Projects
+\t\tCore
+\t\tGUI
+\t\tFile Formats
+\t\tNetworking
+\t\tTools
+\t\tApplications
+\t

+\t +\t

Development
+\t\tProject summary
+\t\tHelping out
+\t\tCool ideas
+\t

+ +\t

Resources
+\t\tGuile Resources
+\t\tScheme Resources
+\t

+\t
+

GNU Project home page

+

What is Guile?

+

Getting Guile

+
+
+ +\t +

What is Guile? What can it do for you?

+

+ Guile is the GNU Ubiquitous Intelligent Language for Extensions, + the official extension language for the + GNU operating system. +

+ +

+ Guile is a library designed to help programmers create flexible + applications. Using Guile in an application allows the application's + functionality to be extended by users or other programmers with + plug-ins, modules, or scripts. Guile provides what might be described as + \"practical software freedom,\" making it possible for users to customize an + application to meet their needs without digging into the application's + internals. +

+ +

+ There is a long list of proven applications that employ extension languages. + Successful and long-lived examples of Free Software projects that use + Guile are TeXmacs, + LilyPond, and + GnuCash. +

+ +

Guile is a programming language

+ +

+ Guile is an interpreter and compiler for + the Scheme programming language, a clean + and elegant dialect of Lisp. Guile is up to date with recent Scheme + standards, supporting the + Revised5 + and most of the Revised6 language + reports (including hygienic macros), as well as many + SRFIs. It also comes with a library + of modules that offer additional features, like an HTTP server and client, + XML parsing, and object-oriented programming. +

+ +

Guile is an extension language platform

+ +

+ Guile is an efficient virtual machine that executes a portable instruction + set generated by its optimizing compiler, and integrates very easily with C + and C++ application code. In addition to Scheme, Guile includes compiler + front-ends for + ECMAScript + and Emacs Lisp + (support for Lua is underway), which means + your application can be extended in the language (or languages) most + appropriate for your user base. And Guile's tools for parsing and compiling + are exposed as part of its standard module set, so support for additional + languages can be added without writing a single line of C. +

+ +

Guile gives your programs more power

+ +

+ Using Guile with your program makes it more usable. Users don't + need to learn the plumbing of your application to customize it; they just + need to understand Guile, and the access you've provided. They can easily + trade and share features by downloading and creating scripts, instead of + trading complex patches and recompiling their applications. They don't need + to coordinate with you or anyone else. Using Guile, your application has a + full-featured scripting language right from the beginning, so you can focus + on the novel and attention-getting parts of your application. +

+ +

How do I get Guile?

+ +
    +
  • The current stable release is + 2.0.7. +
  • +
+ +

+ See the Download page for additional ways of + getting Guile. +

+ + + +
+ +
+
+ +

+ Please send FSF & GNU inquiries & questions to + gnu@gnu.org. There are also + other ways to contact the FSF. +

+ +

+ Please send comments on these web pages to + bug-guile@gnu.org, send + other questions to gnu@gnu.org. +

+ +

+ Copyright (C) 2012 Free Software Foundation, Inc. +

+ +

+ Verbatim copying and distribution of this entire web page is + permitted in any medium, provided this notice is preserved.

+ Updated: + + + $Date: 2012/11/30 00:16:15 $ $Author: civodul $ + +

+ +
+ + + +") + +(define head-request-headers:www.gnu.org/software/guile/ + "HEAD /software/guile/ HTTP/1.1 +Host: www.gnu.org +Connection: close + +") + +(define head-response-headers:www.gnu.org/software/guile/ + "HTTP/1.1 200 OK +Date: Fri, 11 Jan 2013 11:03:14 GMT +Server: Apache/2.2.14 +Accept-Ranges: bytes +Cache-Control: max-age=0 +Expires: Fri, 11 Jan 2013 11:03:14 GMT +Vary: Accept-Encoding +Content-Length: 8077 +Connection: close +Content-Type: text/html +Content-Language: en + +") + +;; Unfortunately, POST to http://www.gnu.org/software/guile/ succeeds! +(define post-request-headers:www.apache.org/ + "POST / HTTP/1.1 +Host: www.apache.org +Connection: close + +") + +(define post-response-headers:www.apache.org/ + "HTTP/1.1 405 Method Not Allowed +Date: Fri, 11 Jan 2013 11:04:34 GMT +Server: Apache/2.4.3 (Unix) OpenSSL/1.0.0g +Allow: TRACE +Content-Length: 314 +Connection: close +Content-Type: text/html; charset=iso-8859-1 + +") + +(define post-response-body:www.apache.org/ +" + +405 Method Not Allowed + +

Method Not Allowed

+

The requested method POST is not allowed for the URL /.

+
+
Apache/2.4.3 (Unix) OpenSSL/1.0.0g Server at www.apache.org Port 80
+ +") + +(define put-request-headers:www.apache.org/ + "PUT / HTTP/1.1 +Host: www.apache.org +Connection: close + +") + +(define put-response-headers:www.apache.org/ + "HTTP/1.1 405 Method Not Allowed +Date: Fri, 11 Jan 2013 11:04:34 GMT +Server: Apache/2.4.3 (Unix) OpenSSL/1.0.0g +Allow: TRACE +Content-Length: 313 +Connection: close +Content-Type: text/html; charset=iso-8859-1 + +") + +(define put-response-body:www.apache.org/ + " + +405 Method Not Allowed + +

Method Not Allowed

+

The requested method PUT is not allowed for the URL /.

+
+
Apache/2.4.3 (Unix) OpenSSL/1.0.0g Server at www.apache.org Port 80
+ +") + +(define delete-request-headers:www.apache.org/ + "DELETE / HTTP/1.1 +Host: www.apache.org +Connection: close + +") + +(define delete-response-headers:www.apache.org/ + "HTTP/1.1 405 Method Not Allowed +Date: Fri, 11 Jan 2013 11:07:19 GMT +Server: Apache/2.4.3 (Unix) OpenSSL/1.0.0g +Allow: TRACE +Content-Length: 316 +Connection: close +Content-Type: text/html; charset=iso-8859-1 + +") + + + +(define delete-response-body:www.apache.org/ + " + +405 Method Not Allowed + +

Method Not Allowed

+

The requested method DELETE is not allowed for the URL /.

+
+
Apache/2.4.3 (Unix) OpenSSL/1.0.0g Server at www.apache.org Port 80
+ +") + +(define options-request-headers:www.apache.org/ + "OPTIONS / HTTP/1.1 +Host: www.apache.org +Connection: close + +") + +(define options-response-headers:www.apache.org/ + "HTTP/1.1 200 OK +Date: Fri, 11 Jan 2013 11:08:31 GMT +Server: Apache/2.4.3 (Unix) OpenSSL/1.0.0g +Allow: OPTIONS,GET,HEAD,POST,TRACE +Cache-Control: max-age=3600 +Expires: Fri, 11 Jan 2013 12:08:31 GMT +Content-Length: 0 +Connection: close +Content-Type: text/html; charset=utf-8 + +") + +;; This depends on the exact request that we send. I copied this off +;; the console with an "nc" session, so it doesn't include the CR bytes. +;; But that's OK -- we just have to decode the body as an HTTP request +;; and check that it's the same. +(define trace-request-headers:www.apache.org/ + "TRACE / HTTP/1.1\r +Host: www.apache.org\r +Connection: close\r +\r +") + +(define trace-response-headers:www.apache.org/ + "HTTP/1.1 200 OK\r +Date: Fri, 11 Jan 2013 12:36:13 GMT\r +Server: Apache/2.4.3 (Unix) OpenSSL/1.0.0g\r +Connection: close\r +Transfer-Encoding: chunked\r +Content-Type: message/http\r +\r +") + +(define trace-response-body:www.apache.org/ + "3d\r +TRACE / HTTP/1.1\r +Host: www.apache.org\r +Connection: close\r +\r +\r +0\r +\r +") + +(define (requests-equal? r1 r2) + (and (equal? (request-method r1) (request-method r2)) + (equal? (request-uri r1) (request-uri r2)) + (equal? (request-version r1) (request-version r2)) + (equal? (request-headers r1) (request-headers r2)))) + +(define (responses-equal? r1 r2) + (and (equal? (response-code r1) (response-code r2)) + (equal? (response-version r1) (response-version r2)) + (equal? (response-headers r1) (response-headers r2)))) + +(define* (run-with-http-transcript + expected-request expected-request-body request-body-encoding + response response-body response-body-encoding + proc) + (let ((reading? #f) + (writing? #t) + (response-port (open-input-string response)) + (response-body-port (open-bytevector-input-port + (string->bytevector response-body + response-body-encoding)))) + (call-with-values (lambda () (open-bytevector-output-port)) + (lambda (request-port get-bytevector) + (define (put-char c) + (unless writing? + (error "Port closed for writing")) + (put-u8 request-port (char->integer c))) + (define (put-string s) + (string-for-each put-char s)) + (define (flush) + (set! writing? #f) + (set! reading? #t) + (let* ((p (open-bytevector-input-port (get-bytevector))) + (actual-request (read-request p)) + (actual-body (read-request-body actual-request))) + (pass-if "requests equal" + (requests-equal? actual-request + (call-with-input-string expected-request + read-request))) + (pass-if "request bodies equal" + (equal? (or actual-body #vu8()) + (string->bytevector expected-request-body + request-body-encoding))))) + (define (get-char) + (unless reading? + (error "Port closed for reading")) + (let ((c (read-char response-port))) + (if (char? c) + c + (let ((u8 (get-u8 response-body-port))) + (if (eof-object? u8) + u8 + (integer->char u8)))))) + (define (close) + (when writing? + (unless (eof-object? (get-u8 response-body-port)) + (error "Failed to consume all of body")))) + (proc (make-soft-port (vector put-char put-string flush get-char close) + "rw")))))) + +(define* (check-transaction method uri + request-headers request-body request-body-encoding + response-headers response-body response-body-encoding + proc + #:key (response-body-comparison response-body)) + (with-test-prefix (string-append method " " uri) + (run-with-http-transcript + request-headers request-body request-body-encoding + response-headers response-body response-body-encoding + (lambda (port) + (call-with-values (lambda () + (proc uri #:port port)) + (lambda (response body) + (pass-if "response equal" + (responses-equal? + response + (call-with-input-string response-headers read-response))) + (pass-if "response body equal" + (equal? (or body "") response-body-comparison)))))))) + +(check-transaction + "GET" "http://www.gnu.org/software/guile/" + get-request-headers:www.gnu.org/software/guile/ "" "iso-8859-1" + get-response-headers:www.gnu.org/software/guile/ + get-response-body:www.gnu.org/software/guile/ "iso-8859-1" + http-get) + +(check-transaction + "HEAD" "http://www.gnu.org/software/guile/" + head-request-headers:www.gnu.org/software/guile/ "" "iso-8859-1" + head-response-headers:www.gnu.org/software/guile/ "" "iso-8859-1" + http-head) + +(check-transaction + "POST" "http://www.apache.org/" + post-request-headers:www.apache.org/ "" "iso-8859-1" + post-response-headers:www.apache.org/ + post-response-body:www.apache.org/ "iso-8859-1" + http-post) + +(check-transaction + "PUT" "http://www.apache.org/" + put-request-headers:www.apache.org/ "" "iso-8859-1" + put-response-headers:www.apache.org/ + put-response-body:www.apache.org/ "iso-8859-1" + http-put) + +(check-transaction + "DELETE" "http://www.apache.org/" + delete-request-headers:www.apache.org/ "" "iso-8859-1" + delete-response-headers:www.apache.org/ + delete-response-body:www.apache.org/ "iso-8859-1" + http-delete) + +(check-transaction + "OPTIONS" "http://www.apache.org/" + options-request-headers:www.apache.org/ "" "utf-8" + options-response-headers:www.apache.org/ "" "utf-8" + http-options) + +(check-transaction + "TRACE" "http://www.apache.org/" + trace-request-headers:www.apache.org/ "" "iso-8859-1" + trace-response-headers:www.apache.org/ + trace-response-body:www.apache.org/ "iso-8859-1" + http-trace + #:response-body-comparison + ;; The body will be message/http, which is logically a sequence of + ;; bytes, not characters. It happens that iso-8859-1 can encode our + ;; body and is compatible with the headers as well. + (string->bytevector trace-request-headers:www.apache.org/ + "iso-8859-1")) From 5ed4ea90a9abe64c024bbc0c664476b0673556b3 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 11 Jan 2013 21:15:28 +0100 Subject: [PATCH 11/12] Change iconv procedures to take optional instead of keyword arg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * module/ice-9/iconv.scm (call-with-encoded-output-string): (string->bytevector, bytevector->string): Take an optional instead of a keyword argument. * doc/ref/api-data.texi (Representing Strings as Bytes): Adapt docs to change, and fix a number of errors. Thanks to Ludovic Courtès for the pointers. * test-suite/tests/iconv.test ("wide non-ascii string"): Add a test for the 'substitute path. --- doc/ref/api-data.texi | 26 ++++++++++++++++---------- module/ice-9/iconv.scm | 11 +++++++---- test-suite/tests/iconv.test | 7 ++++++- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 3bd38d28b..21398f48d 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -4190,6 +4190,11 @@ sequences of bytes. @xref{Bytevectors}, for more on how Guile represents raw byte sequences. This module gets its name from the common @sc{unix} command of the same name. +Note that often it is sufficient to just read and write strings from +ports instead of using these functions. To do this, specify the port +encoding using @code{set-port-encoding!}. @xref{Ports}, for more on +ports and character encodings. + Unlike the rest of the procedures in this section, you have to load the @code{iconv} module before having access to these procedures: @@ -4197,31 +4202,32 @@ Unlike the rest of the procedures in this section, you have to load the (use-modules (ice-9 iconv)) @end example -@deffn string->bytevector string encoding [#:conversion-strategy='error] +@deffn string->bytevector string encoding [conversion-strategy] Encode @var{string} as a sequence of bytes. The string will be encoded in the character set specified by the @var{encoding} string. If the string has characters that cannot be represented in the encoding, by default this procedure raises an -@code{encoding-error}, though the @code{#:conversion-strategy} keyword -can specify other behaviors. +@code{encoding-error}. Pass a @var{conversion-strategy} argument to +specify other behaviors. The return value is a bytevector. @xref{Bytevectors}, for more on bytevectors. @xref{Ports}, for more on character encodings and conversion strategies. @end deffn -@deffn bytevector->string bytevector encoding +@deffn bytevector->string bytevector encoding [conversion-strategy] Decode @var{bytevector} into a string. The bytes will be decoded from the character set by the @var{encoding} string. If the bytes do not form a valid encoding, by default this -procedure raises an @code{decoding-error}, though that may be overridden -with the @code{#:conversion-strategy} keyword. @xref{Ports}, for more -on character encodings and conversion strategies. +procedure raises an @code{decoding-error}. As with +@code{string->bytevector}, pass the optional @var{conversion-strategy} +argument to modify this behavior. @xref{Ports}, for more on character +encodings and conversion strategies. @end deffn -@deffn call-with-output-encoded-string encoding proc [#:conversion-strategy='error] +@deffn call-with-output-encoded-string encoding proc [conversion-strategy] Like @code{call-with-output-string}, but instead of returning a string, returns a encoding of the string according to @var{encoding}, as a bytevector. This procedure can be more efficient than collecting a @@ -4371,7 +4377,7 @@ If @var{lenp} is @code{NULL}, this function will return a null-terminated C string. It will throw an error if the string contains a null character. -The Scheme interface to this function is @code{encode-string}, from the +The Scheme interface to this function is @code{string->bytevector}, from the @code{ice-9 iconv} module. @xref{Representing Strings as Bytes}. @end deftypefn @@ -4382,7 +4388,7 @@ string is passed as the ASCII, null-terminated C string @code{encoding}. The @var{handler} parameters suggests a strategy for dealing with unconvertable characters. -The Scheme interface to this function is @code{decode-string}. +The Scheme interface to this function is @code{bytevector->string}. @xref{Representing Strings as Bytes}. @end deftypefn diff --git a/module/ice-9/iconv.scm b/module/ice-9/iconv.scm index 40d595473..0f0c1a3cf 100644 --- a/module/ice-9/iconv.scm +++ b/module/ice-9/iconv.scm @@ -43,7 +43,8 @@ bv)))) (define* (call-with-encoded-output-string encoding proc - #:key (conversion-strategy 'error)) + #:optional + (conversion-strategy 'error)) (if (string-ci=? encoding "utf-8") ;; I don't know why, but this appears to be faster; at least for ;; serving examples/debug-sxml.scm (1464 reqs/s versus 850 @@ -59,16 +60,18 @@ ;; TODO: Provide C implementations that call scm_from_stringn and ;; friends? -(define* (string->bytevector str encoding #:key (conversion-strategy 'error)) +(define* (string->bytevector str encoding + #:optional (conversion-strategy 'error)) (if (string-ci=? encoding "utf-8") (string->utf8 str) (call-with-encoded-output-string encoding (lambda (port) (display str port)) - #:conversion-strategy conversion-strategy))) + conversion-strategy))) -(define* (bytevector->string bv encoding #:key (conversion-strategy 'error)) +(define* (bytevector->string bv encoding + #:optional (conversion-strategy 'error)) (if (string-ci=? encoding "utf-8") (utf8->string bv) (let ((p (open-bytevector-input-port bv))) diff --git a/test-suite/tests/iconv.test b/test-suite/tests/iconv.test index e6ee90d1d..9083cd256 100644 --- a/test-suite/tests/iconv.test +++ b/test-suite/tests/iconv.test @@ -112,4 +112,9 @@ (string->bytevector s "ascii")) (pass-if-exception "encode as latin1" exception:encoding-error - (string->bytevector s "latin1")))) + (string->bytevector s "latin1")) + + (pass-if "encode as ascii with substitutions" + (equal? (make-string (string-length s) #\?) + (bytevector->string (string->bytevector s "ascii" 'substitute) + "ascii"))))) From 18c5bffe96947ee82a29b115e758d7357cefbbe9 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 11 Jan 2013 21:28:51 +0100 Subject: [PATCH 12/12] Docstrings in (ice-9 iconv) * module/ice-9/iconv.scm: Add docstrings. --- module/ice-9/iconv.scm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/module/ice-9/iconv.scm b/module/ice-9/iconv.scm index 0f0c1a3cf..a8b745896 100644 --- a/module/ice-9/iconv.scm +++ b/module/ice-9/iconv.scm @@ -45,6 +45,8 @@ (define* (call-with-encoded-output-string encoding proc #:optional (conversion-strategy 'error)) + "Call PROC on a fresh port. Encode the resulting string as a +bytevector according to ENCODING, and return the bytevector." (if (string-ci=? encoding "utf-8") ;; I don't know why, but this appears to be faster; at least for ;; serving examples/debug-sxml.scm (1464 reqs/s versus 850 @@ -62,6 +64,8 @@ (define* (string->bytevector str encoding #:optional (conversion-strategy 'error)) + "Encode STRING according to ENCODING, which should be a string naming +a character encoding, like \"utf-8\"." (if (string-ci=? encoding "utf-8") (string->utf8 str) (call-with-encoded-output-string @@ -72,6 +76,9 @@ (define* (bytevector->string bv encoding #:optional (conversion-strategy 'error)) + "Decode the string represented by BV. The bytes in the bytevector +will be interpreted according to ENCODING, which should be a string +naming a character encoding, like \"utf-8\"." (if (string-ci=? encoding "utf-8") (utf8->string bv) (let ((p (open-bytevector-input-port bv)))