diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 21398f48d..28160c88c 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -4428,6 +4428,19 @@ returned is the number of bytes for @code{scm_to_latin1_stringn} and for @code{scm_to_utf32_stringn}. @end deftypefn +It is not often the case, but sometimes when you are dealing with the +implementation details of a port, you need to encode and decode strings +according to the encoding and conversion strategy of the port. There +are some convenience functions for that purpose as well. + +@deftypefn {C Function} SCM scm_from_port_string (const char *str, SCM port) +@deftypefnx {C Function} SCM scm_from_port_stringn (const char *str, size_t len, SCM port) +@deftypefnx {C Function} char* scm_to_port_string (SCM str, SCM port) +@deftypefnx {C Function} char* scm_to_port_stringn (SCM str, size_t *lenp, SCM port) +Like @code{scm_from_stringn} and friends, except they take their +encoding and conversion strategy from a given port object. +@end deftypefn + @node String Internals @subsubsection String Internals diff --git a/guile-readline/readline.c b/guile-readline/readline.c index 0e4ad2902..1e697eb5d 100644 --- a/guile-readline/readline.c +++ b/guile-readline/readline.c @@ -1,6 +1,6 @@ /* readline.c --- line editing support for Guile */ -/* Copyright (C) 1997,1999,2000,2001, 2002, 2003, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1997,1999,2000,2001, 2002, 2003, 2006, 2007, 2008, 2009, 2010, 2013 Free Software Foundation, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -257,12 +257,7 @@ internal_readline (SCM text) promptp = 1; s = readline (prompt); if (s) - { - scm_t_port *pt = SCM_PTAB_ENTRY (output_port); - - ret = scm_from_stringn (s, strlen (s), pt->encoding, - SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE); - } + ret = scm_from_port_string (s, output_port); else ret = SCM_EOF_VAL; diff --git a/libguile/read.c b/libguile/read.c index d977cff82..833ec2342 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -704,7 +704,6 @@ scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) SCM result, str = SCM_EOL; char local_buffer[READER_BUFFER_SIZE], *buffer; size_t bytes_read; - scm_t_port *pt = SCM_PTAB_ENTRY (port); /* Need to capture line and column numbers here. */ long line = SCM_LINUM (port); @@ -714,7 +713,7 @@ scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &bytes_read); - str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); + str = scm_from_port_stringn (buffer, bytes_read, port); result = scm_string_to_number (str, SCM_UNDEFINED); if (scm_is_false (result)) @@ -739,7 +738,6 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) size_t bytes_read; int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX); char local_buffer[READER_BUFFER_SIZE], *buffer; - scm_t_port *pt = SCM_PTAB_ENTRY (port); SCM str; scm_ungetc_unlocked (chr, port); @@ -750,8 +748,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) if (postfix && ends_with_colon && (bytes_read > 1)) { - str = scm_from_stringn (buffer, bytes_read - 1, - pt->encoding, pt->ilseq_handler); + str = scm_from_port_stringn (buffer, bytes_read - 1, port); if (opts->case_insensitive_p) str = scm_string_downcase_x (str); @@ -759,8 +756,7 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) } else { - str = scm_from_stringn (buffer, bytes_read, - pt->encoding, pt->ilseq_handler); + str = scm_from_port_stringn (buffer, bytes_read, port); if (opts->case_insensitive_p) str = scm_string_downcase_x (str); @@ -780,7 +776,6 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) char local_buffer[READER_BUFFER_SIZE], *buffer; unsigned int radix; SCM str; - scm_t_port *pt; switch (chr) { @@ -813,8 +808,7 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer, &read); - pt = SCM_PTAB_ENTRY (port); - str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler); + str = scm_from_port_stringn (buffer, read, port); result = scm_string_to_number (str, scm_from_uint (radix)); @@ -1006,8 +1000,7 @@ scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) /* Otherwise, convert the buffer into a proper scheme string for processing. */ - charname = scm_from_stringn (buffer, bytes_read, pt->encoding, - pt->ilseq_handler); + charname = scm_from_port_stringn (buffer, bytes_read, port); charname_len = scm_i_string_length (charname); SCM_COL (port) += charname_len; cp = scm_i_string_ref (charname, 0); diff --git a/libguile/strings.c b/libguile/strings.c index 1e89e63d6..3db526e92 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1712,6 +1712,26 @@ scm_from_utf32_stringn (const scm_t_wchar *str, size_t len) return result; } +SCM +scm_from_port_string (const char *str, SCM port) +{ + return scm_from_port_stringn (str, -1, port); +} + +SCM +scm_from_port_stringn (const char *str, size_t len, SCM port) +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); + + if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1) + return scm_from_latin1_stringn (str, len); + else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8 + && pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR) + return scm_from_utf8_stringn (str, len); + else + return scm_from_stringn (str, len, pt->encoding, pt->ilseq_handler); +} + /* Create a new scheme string from the C string STR. The memory of STR may be used directly as storage for the new string. */ /* FIXME: GC-wise, the only way to use the memory area pointed to by STR @@ -2097,6 +2117,26 @@ scm_to_utf32_stringn (SCM str, size_t *lenp) } #undef FUNC_NAME +char * +scm_to_port_string (SCM str, SCM port) +{ + return scm_to_port_stringn (str, NULL, port); +} + +char * +scm_to_port_stringn (SCM str, size_t *lenp, SCM port) +{ + scm_t_port *pt = SCM_PTAB_ENTRY (port); + + if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1 + && pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR) + return scm_to_latin1_stringn (str, lenp); + else if (pt->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8) + return scm_to_utf8_stringn (str, lenp); + else + return scm_to_stringn (str, lenp, pt->encoding, pt->ilseq_handler); +} + /* Return a malloc(3)-allocated buffer containing the contents of STR encoded according to ENCODING. If LENP is non-NULL, set it to the size in bytes of the returned buffer. If the conversion to ENCODING fails, apply the strategy diff --git a/libguile/strings.h b/libguile/strings.h index 04a976211..445d9c878 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -3,7 +3,7 @@ #ifndef SCM_STRINGS_H #define SCM_STRINGS_H -/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -153,6 +153,11 @@ SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp); SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str); SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len); +SCM_API char *scm_to_port_string (SCM str, SCM port); +SCM_API char *scm_to_port_stringn (SCM str, size_t *lenp, SCM port); +SCM_API SCM scm_from_port_string (const char *str, SCM port); +SCM_API SCM scm_from_port_stringn (const char *str, size_t len, SCM port); + SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding, scm_t_string_failed_conversion_handler handler); SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); diff --git a/libguile/strports.c b/libguile/strports.c index 702022740..425b08929 100644 --- a/libguile/strports.c +++ b/libguile/strports.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2006, - * 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -318,8 +318,7 @@ scm_strport_to_string (SCM port) if (pt->read_buf_size == 0) return scm_nullstr; - return scm_from_stringn ((char *)pt->read_buf, pt->read_buf_size, - pt->encoding, pt->ilseq_handler); + return scm_from_port_stringn ((char *)pt->read_buf, pt->read_buf_size, port); } SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,