1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 20:30:28 +02:00

Port conversion strategies internally are symbols

* libguile/ports.h (scm_t_port): Represent the conversion strategy as a
  symbol, to make things easier for Scheme.  Rename to
  "conversion_strategy".
  (scm_c_make_port_with_encoding): Change to take encoding and
  conversion_strategy arguments as symbols.
  (scm_i_string_failed_conversion_handler): New internal helper, to turn
  a symbol to a scm_t_string_failed_conversion_handler.
  (scm_i_default_port_encoding): Return the default port encoding as a
  symbol.
  (scm_i_default_port_conversion_strategy)
  (scm_i_set_default_port_conversion_strategy): Rename from
  scm_i_default_port_conversion_handler et al.  Take and return Scheme
  symbols.
* libguile/foreign.c (scm_string_to_pointer, scm_pointer_to_string): Use
  scm_i_default_string_failed_conversion_handler instead of
  scm_i_default_port_conversion_handler.
* libguile/print.c (PORT_CONVERSION_HANDLER): Update definition.
  (print_normal_symbol): Use PORT_CONVERSION_HANDLER.
* libguile/r6rs-ports.c (make_bytevector_input_port):
  (make_custom_binary_input_port, make_bytevector_output_port): Adapt to
  changes in scm_c_make_port_with_encoding.
* libguile/strings.h:
* libguile/strings.c (scm_i_default_string_failed_conversion_handler):
  New helper.
  (scm_from_locale_stringn, scm_from_port_stringn):
  (scm_to_locale_stringn, scm_to_port_stringn): Adapt to interface
  changes.
* libguile/strports.c (scm_mkstrport): Adapt to
  scm_c_make_port_with_encoding change.
* libguile/ports.c (scm_c_make_port): Adapt to
  scm_c_make_port_with_encoding change.
  (ascii_toupper, encoding_matches, canonicalize_encoding): Move down in
  the file.
  (peek_codepoint, get_codepoint, scm_ungetc): Adapt to port conversion
  strategy change.  Remove duplicate case in get_codepoint.
  (scm_init_ports): Move symbol initializations to the same place.
This commit is contained in:
Andy Wingo 2016-05-04 10:31:21 +02:00
parent d8711b9759
commit 383df7976f
8 changed files with 162 additions and 175 deletions

View file

@ -370,7 +370,7 @@ SCM_DEFINE (scm_string_to_pointer, "string->pointer", 1, 1, 0,
ret = scm_from_pointer ret = scm_from_pointer
(scm_to_stringn (string, NULL, enc, (scm_to_stringn (string, NULL, enc,
scm_i_default_port_conversion_handler ()), scm_i_default_string_failed_conversion_handler ()),
free); free);
scm_dynwind_end (); scm_dynwind_end ();
@ -415,7 +415,7 @@ SCM_DEFINE (scm_pointer_to_string, "pointer->string", 1, 2, 0,
scm_dynwind_free (enc); scm_dynwind_free (enc);
ret = scm_from_stringn (SCM_POINTER_VALUE (pointer), len, enc, ret = scm_from_stringn (SCM_POINTER_VALUE (pointer), len, enc,
scm_i_default_port_conversion_handler ()); scm_i_default_string_failed_conversion_handler ());
scm_dynwind_end (); scm_dynwind_end ();

View file

@ -103,57 +103,10 @@ static SCM sym_UTF_32;
static SCM sym_UTF_32LE; static SCM sym_UTF_32LE;
static SCM sym_UTF_32BE; static SCM sym_UTF_32BE;
/* Port encodings are case-insensitive ASCII strings. */ /* Port conversion strategies. */
static char static SCM sym_error;
ascii_toupper (char c) static SCM sym_substitute;
{ static SCM sym_escape;
return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
}
/* It is only necessary to use this function on encodings that come from
the user and have not been canonicalized yet. Encodings that are set
on ports or in the default encoding fluid are in upper-case, and can
be compared with strcmp. */
static int
encoding_matches (const char *enc, SCM upper_symbol)
{
const char *upper = scm_i_symbol_chars (upper_symbol);
if (!enc)
enc = "ISO-8859-1";
while (*enc)
if (ascii_toupper (*enc++) != *upper++)
return 0;
return !*upper;
}
static SCM
canonicalize_encoding (const char *enc)
{
char *ret;
int i;
if (!enc || encoding_matches (enc, sym_ISO_8859_1))
return sym_ISO_8859_1;
if (encoding_matches (enc, sym_UTF_8))
return sym_UTF_8;
ret = scm_gc_strdup (enc, "port");
for (i = 0; ret[i]; i++)
{
if (ret[i] > 127)
/* Restrict to ASCII. */
scm_misc_error (NULL, "invalid character encoding ~s",
scm_list_1 (scm_from_latin1_string (enc)));
else
ret[i] = ascii_toupper (ret[i]);
}
return scm_from_latin1_symbol (ret);
}
@ -750,8 +703,7 @@ initialize_port_buffers (SCM port)
SCM SCM
scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits, scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
const char *encoding, SCM encoding, SCM conversion_strategy,
scm_t_string_failed_conversion_handler handler,
scm_t_bits stream) scm_t_bits stream)
{ {
SCM ret; SCM ret;
@ -774,9 +726,8 @@ scm_c_make_port_with_encoding (scm_t_bits tag, unsigned long mode_bits,
entry->rw_random = ptob->seek != NULL; entry->rw_random = ptob->seek != NULL;
entry->port = ret; entry->port = ret;
entry->stream = stream; entry->stream = stream;
entry->encoding = canonicalize_encoding (encoding); entry->encoding = encoding;
entry->conversion_strategy = conversion_strategy;
entry->ilseq_handler = handler;
pti->iconv_descriptors = NULL; pti->iconv_descriptors = NULL;
pti->at_stream_start_for_bom_read = 1; pti->at_stream_start_for_bom_read = 1;
@ -800,7 +751,7 @@ scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, scm_t_bits stream)
{ {
return scm_c_make_port_with_encoding (tag, mode_bits, return scm_c_make_port_with_encoding (tag, mode_bits,
scm_i_default_port_encoding (), scm_i_default_port_encoding (),
scm_i_default_port_conversion_handler (), scm_i_default_port_conversion_strategy (),
stream); stream);
} }
@ -962,6 +913,58 @@ SCM_DEFINE (scm_close_output_port, "close-output-port", 1, 0, 0,
/* Encoding characters to byte streams, and decoding byte streams to /* Encoding characters to byte streams, and decoding byte streams to
characters. */ characters. */
/* Port encodings are case-insensitive ASCII strings. */
static char
ascii_toupper (char c)
{
return (c < 'a' || c > 'z') ? c : ('A' + (c - 'a'));
}
/* It is only necessary to use this function on encodings that come from
the user and have not been canonicalized yet. Encodings that are set
on ports or in the default encoding fluid are in upper-case, and can
be compared with strcmp. */
static int
encoding_matches (const char *enc, SCM upper_symbol)
{
const char *upper = scm_i_symbol_chars (upper_symbol);
if (!enc)
enc = "ISO-8859-1";
while (*enc)
if (ascii_toupper (*enc++) != *upper++)
return 0;
return !*upper;
}
static SCM
canonicalize_encoding (const char *enc)
{
char *ret;
int i;
if (!enc || encoding_matches (enc, sym_ISO_8859_1))
return sym_ISO_8859_1;
if (encoding_matches (enc, sym_UTF_8))
return sym_UTF_8;
ret = scm_gc_strdup (enc, "port");
for (i = 0; ret[i]; i++)
{
if (ret[i] > 127)
/* Restrict to ASCII. */
scm_misc_error (NULL, "invalid character encoding ~s",
scm_list_1 (scm_from_latin1_string (enc)));
else
ret[i] = ascii_toupper (ret[i]);
}
return scm_from_latin1_symbol (ret);
}
/* A fluid specifying the default encoding for newly created ports. If it is /* A fluid specifying the default encoding for newly created ports. If it is
a string, that is the encoding. If it is #f, it is in the "native" a string, that is the encoding. If it is #f, it is in the "native"
(Latin-1) encoding. */ (Latin-1) encoding. */
@ -979,73 +982,50 @@ scm_i_set_default_port_encoding (const char *encoding)
} }
/* Return the name of the default encoding for newly created ports. */ /* Return the name of the default encoding for newly created ports. */
const char * SCM
scm_i_default_port_encoding (void) scm_i_default_port_encoding (void)
{ {
SCM encoding; SCM encoding;
encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var)); encoding = scm_fluid_ref (SCM_VARIABLE_REF (default_port_encoding_var));
if (!scm_is_string (encoding)) if (!scm_is_string (encoding))
return "ISO-8859-1"; return sym_ISO_8859_1;
else else
return scm_i_string_chars (encoding); return canonicalize_encoding (scm_i_string_chars (encoding));
} }
/* A fluid specifying the default conversion handler for newly created /* A fluid specifying the default conversion handler for newly created
ports. Its value should be one of the symbols below. */ ports. Its value should be one of the symbols below. */
static SCM default_conversion_strategy_var; static SCM default_conversion_strategy_var;
/* The possible conversion strategies. */
static SCM sym_error;
static SCM sym_substitute;
static SCM sym_escape;
/* Return the default failed encoding conversion policy for new created /* Return the default failed encoding conversion policy for new created
ports. */ ports. */
scm_t_string_failed_conversion_handler SCM
scm_i_default_port_conversion_handler (void) scm_i_default_port_conversion_strategy (void)
{ {
SCM value; SCM value;
value = scm_fluid_ref (SCM_VARIABLE_REF (default_conversion_strategy_var)); value = scm_fluid_ref (SCM_VARIABLE_REF (default_conversion_strategy_var));
if (scm_is_eq (sym_substitute, value)) if (scm_is_eq (sym_substitute, value) || scm_is_eq (sym_escape, value))
return SCM_FAILED_CONVERSION_QUESTION_MARK; return value;
else if (scm_is_eq (sym_escape, value))
return SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE; /* Default to 'error also when the fluid's value is not one of the
else valid symbols. */
/* Default to 'error also when the fluid's value is not one of return sym_error;
the valid symbols. */
return SCM_FAILED_CONVERSION_ERROR;
} }
/* Use HANDLER as the default conversion strategy for future ports. */ /* Use HANDLER as the default conversion strategy for future ports. */
void void
scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler scm_i_set_default_port_conversion_strategy (SCM sym)
handler)
{ {
SCM strategy; if (!scm_is_eq (sym, sym_error)
&& !scm_is_eq (sym, sym_substitute)
switch (handler) && !scm_is_eq (sym, sym_escape))
{ /* Internal error. */
case SCM_FAILED_CONVERSION_ERROR:
strategy = sym_error;
break;
case SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE:
strategy = sym_escape;
break;
case SCM_FAILED_CONVERSION_QUESTION_MARK:
strategy = sym_substitute;
break;
default:
abort (); abort ();
}
scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), scm_fluid_set_x (SCM_VARIABLE_REF (default_conversion_strategy_var), sym);
strategy);
} }
/* If the next LEN bytes from PORT are equal to those in BYTES, then /* If the next LEN bytes from PORT are equal to those in BYTES, then
@ -1276,6 +1256,18 @@ SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
} }
#undef FUNC_NAME #undef FUNC_NAME
scm_t_string_failed_conversion_handler
scm_i_string_failed_conversion_handler (SCM conversion_strategy)
{
if (scm_is_eq (conversion_strategy, sym_substitute))
return SCM_FAILED_CONVERSION_QUESTION_MARK;
if (scm_is_eq (conversion_strategy, sym_escape))
return SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
/* Default to error. */
return SCM_FAILED_CONVERSION_ERROR;
}
SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy", SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1, 0, 0, (SCM port), 1, 0, 0, (SCM port),
"Returns the behavior of the port when handling a character that\n" "Returns the behavior of the port when handling a character that\n"
@ -1291,10 +1283,8 @@ SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
"when they are created.\n") "when they are created.\n")
#define FUNC_NAME s_scm_port_conversion_strategy #define FUNC_NAME s_scm_port_conversion_strategy
{ {
scm_t_string_failed_conversion_handler h;
if (scm_is_false (port)) if (scm_is_false (port))
h = scm_i_default_port_conversion_handler (); return scm_i_default_port_conversion_strategy ();
else else
{ {
scm_t_port *pt; scm_t_port *pt;
@ -1302,20 +1292,8 @@ SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
SCM_VALIDATE_OPPORT (1, port); SCM_VALIDATE_OPPORT (1, port);
pt = SCM_PTAB_ENTRY (port); pt = SCM_PTAB_ENTRY (port);
h = pt->ilseq_handler; return pt->conversion_strategy;
} }
if (h == SCM_FAILED_CONVERSION_ERROR)
return scm_from_latin1_symbol ("error");
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
return scm_from_latin1_symbol ("substitute");
else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
return scm_from_latin1_symbol ("escape");
else
abort ();
/* Never gets here. */
return SCM_UNDEFINED;
} }
#undef FUNC_NAME #undef FUNC_NAME
@ -1339,23 +1317,17 @@ SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
"this thread.\n") "this thread.\n")
#define FUNC_NAME s_scm_set_port_conversion_strategy_x #define FUNC_NAME s_scm_set_port_conversion_strategy_x
{ {
scm_t_string_failed_conversion_handler handler; if (!scm_is_eq (sym, sym_error)
&& !scm_is_eq (sym, sym_substitute)
if (scm_is_eq (sym, sym_error)) && !scm_is_eq (sym, sym_escape))
handler = SCM_FAILED_CONVERSION_ERROR;
else if (scm_is_eq (sym, sym_substitute))
handler = SCM_FAILED_CONVERSION_QUESTION_MARK;
else if (scm_is_eq (sym, sym_escape))
handler = SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE;
else
SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym)); SCM_MISC_ERROR ("unknown conversion strategy ~s", scm_list_1 (sym));
if (scm_is_false (port)) if (scm_is_false (port))
scm_i_set_default_port_conversion_handler (handler); scm_i_set_default_port_conversion_strategy (sym);
else else
{ {
SCM_VALIDATE_OPPORT (1, port); SCM_VALIDATE_OPPORT (1, port);
SCM_PTAB_ENTRY (port)->ilseq_handler = handler; SCM_PTAB_ENTRY (port)->conversion_strategy = sym;
} }
return SCM_UNSPECIFIED; return SCM_UNSPECIFIED;
@ -1866,7 +1838,7 @@ peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
} }
} }
} }
else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK) else if (scm_is_eq (pt->conversion_strategy, sym_substitute))
{ {
*codepoint = '?'; *codepoint = '?';
err = 0; err = 0;
@ -1884,11 +1856,6 @@ get_codepoint (SCM port, scm_t_wchar *codepoint)
err = peek_codepoint (port, codepoint, &len); err = peek_codepoint (port, codepoint, &len);
scm_port_buffer_did_take (pt->read_buf, len); scm_port_buffer_did_take (pt->read_buf, len);
if (err != 0 && pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
{
*codepoint = '?';
err = 0;
}
if (*codepoint == EOF) if (*codepoint == EOF)
scm_i_clear_pending_eof (port); scm_i_clear_pending_eof (port);
update_port_lf (*codepoint, port); update_port_lf (*codepoint, port);
@ -2028,10 +1995,15 @@ scm_ungetc (scm_t_wchar c, SCM port)
len = 1; len = 1;
} }
else else
{
scm_t_string_failed_conversion_handler handler =
scm_i_string_failed_conversion_handler (pt->conversion_strategy);
result = u32_conv_to_encoding (scm_i_symbol_chars (pt->encoding), result = u32_conv_to_encoding (scm_i_symbol_chars (pt->encoding),
(enum iconv_ilseq_handler) pt->ilseq_handler, (enum iconv_ilseq_handler) handler,
(uint32_t *) &c, 1, NULL, (uint32_t *) &c, 1, NULL,
result_buf, &len); result_buf, &len);
}
if (SCM_UNLIKELY (result == NULL || len == 0)) if (SCM_UNLIKELY (result == NULL || len == 0))
scm_encoding_error (FUNC_NAME, errno, scm_encoding_error (FUNC_NAME, errno,
@ -3152,6 +3124,10 @@ scm_init_ports (void)
sym_UTF_32LE = scm_from_latin1_symbol ("UTF-32LE"); sym_UTF_32LE = scm_from_latin1_symbol ("UTF-32LE");
sym_UTF_32BE = scm_from_latin1_symbol ("UTF-32BE"); sym_UTF_32BE = scm_from_latin1_symbol ("UTF-32BE");
sym_substitute = scm_from_latin1_symbol ("substitute");
sym_escape = scm_from_latin1_symbol ("escape");
sym_error = scm_from_latin1_symbol ("error");
trampoline_to_c_read_subr = trampoline_to_c_read_subr =
scm_c_make_gsubr ("port-read", 4, 0, 0, scm_c_make_gsubr ("port-read", 4, 0, 0,
(scm_t_subr) trampoline_to_c_read); (scm_t_subr) trampoline_to_c_read);
@ -3170,10 +3146,6 @@ scm_init_ports (void)
cur_warnport_fluid = scm_make_fluid (); cur_warnport_fluid = scm_make_fluid ();
cur_loadport_fluid = scm_make_fluid (); cur_loadport_fluid = scm_make_fluid ();
sym_substitute = scm_from_latin1_symbol ("substitute");
sym_escape = scm_from_latin1_symbol ("escape");
sym_error = scm_from_latin1_symbol ("error");
/* Use Latin-1 as the default port encoding. */ /* Use Latin-1 as the default port encoding. */
default_port_encoding_var = default_port_encoding_var =
scm_c_define ("%default-port-encoding", scm_c_define ("%default-port-encoding",

View file

@ -115,7 +115,7 @@ typedef struct
/* Character encoding support. */ /* Character encoding support. */
SCM encoding; /* A symbol of upper-case ASCII. */ SCM encoding; /* A symbol of upper-case ASCII. */
scm_t_string_failed_conversion_handler ilseq_handler; SCM conversion_strategy; /* A symbol; either substitute, error, or escape. */
} scm_t_port; } scm_t_port;
@ -255,11 +255,10 @@ SCM_API long scm_mode_bits (char *modes);
SCM_API SCM scm_port_mode (SCM port); SCM_API SCM scm_port_mode (SCM port);
/* Low-level constructors. */ /* Low-level constructors. */
SCM_API SCM SCM_API SCM scm_c_make_port_with_encoding (scm_t_bits tag,
scm_c_make_port_with_encoding (scm_t_bits tag,
unsigned long mode_bits, unsigned long mode_bits,
const char *encoding, SCM encoding,
scm_t_string_failed_conversion_handler handler, SCM conversion_strategy,
scm_t_bits stream); scm_t_bits stream);
SCM_API SCM scm_c_make_port (scm_t_bits tag, unsigned long mode_bits, SCM_API SCM scm_c_make_port (scm_t_bits tag, unsigned long mode_bits,
scm_t_bits stream); scm_t_bits stream);
@ -279,12 +278,12 @@ SCM_API SCM scm_close_output_port (SCM port);
/* Encoding characters to byte streams, and decoding byte streams to /* Encoding characters to byte streams, and decoding byte streams to
characters. */ characters. */
SCM_INTERNAL const char *scm_i_default_port_encoding (void);
SCM_INTERNAL void scm_i_set_default_port_encoding (const char *);
SCM_INTERNAL scm_t_string_failed_conversion_handler SCM_INTERNAL scm_t_string_failed_conversion_handler
scm_i_default_port_conversion_handler (void); scm_i_string_failed_conversion_handler (SCM conversion_strategy);
SCM_INTERNAL void SCM_INTERNAL SCM scm_i_default_port_encoding (void);
scm_i_set_default_port_conversion_handler (scm_t_string_failed_conversion_handler); SCM_INTERNAL void scm_i_set_default_port_encoding (const char *encoding);
SCM_INTERNAL SCM scm_i_default_port_conversion_strategy (void);
SCM_INTERNAL void scm_i_set_default_port_conversion_strategy (SCM strategy);
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str); SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
SCM_API SCM scm_port_encoding (SCM port); SCM_API SCM scm_port_encoding (SCM port);
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding); SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);

View file

@ -62,7 +62,7 @@
/* Character printers. */ /* Character printers. */
#define PORT_CONVERSION_HANDLER(port) \ #define PORT_CONVERSION_HANDLER(port) \
SCM_PTAB_ENTRY (port)->ilseq_handler scm_i_string_failed_conversion_handler (scm_port_conversion_strategy (port))
SCM_SYMBOL (sym_UTF_8, "UTF-8"); SCM_SYMBOL (sym_UTF_8, "UTF-8");
SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1"); SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
@ -441,7 +441,7 @@ print_normal_symbol (SCM sym, SCM port)
scm_t_string_failed_conversion_handler strategy; scm_t_string_failed_conversion_handler strategy;
len = scm_i_symbol_length (sym); len = scm_i_symbol_length (sym);
strategy = SCM_PTAB_ENTRY (port)->ilseq_handler; strategy = PORT_CONVERSION_HANDLER (port);
if (scm_i_is_narrow_symbol (sym)) if (scm_i_is_narrow_symbol (sym))
display_string (scm_i_symbol_chars (sym), 1, len, port, strategy); display_string (scm_i_symbol_chars (sym), 1, len, port, strategy);

View file

@ -37,6 +37,12 @@
#include "libguile/ports-internal.h" #include "libguile/ports-internal.h"
SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
SCM_SYMBOL (sym_error, "error");
/* Unimplemented features. */ /* Unimplemented features. */
@ -92,10 +98,8 @@ make_bytevector_input_port (SCM bv)
stream = scm_gc_typed_calloc (struct bytevector_input_port); stream = scm_gc_typed_calloc (struct bytevector_input_port);
stream->bytevector = bv; stream->bytevector = bv;
stream->pos = 0; stream->pos = 0;
return scm_c_make_port_with_encoding (bytevector_input_port_type, return scm_c_make_port_with_encoding (bytevector_input_port_type, mode_bits,
mode_bits, sym_ISO_8859_1, sym_error,
NULL, /* encoding */
SCM_FAILED_CONVERSION_ERROR,
(scm_t_bits) stream); (scm_t_bits) stream);
} }
@ -273,8 +277,7 @@ make_custom_binary_input_port (SCM read_proc, SCM get_position_proc,
return scm_c_make_port_with_encoding (custom_binary_input_port_type, return scm_c_make_port_with_encoding (custom_binary_input_port_type,
mode_bits, mode_bits,
NULL, /* encoding */ sym_ISO_8859_1, sym_error,
SCM_FAILED_CONVERSION_ERROR,
(scm_t_bits) stream); (scm_t_bits) stream);
} }
@ -739,8 +742,7 @@ make_bytevector_output_port (void)
port = scm_c_make_port_with_encoding (bytevector_output_port_type, port = scm_c_make_port_with_encoding (bytevector_output_port_type,
mode_bits, mode_bits,
NULL, /* encoding */ sym_ISO_8859_1, sym_error,
SCM_FAILED_CONVERSION_ERROR,
(scm_t_bits)buf); (scm_t_bits)buf);
buf->port = port; buf->port = port;
@ -877,8 +879,7 @@ make_custom_binary_output_port (SCM write_proc, SCM get_position_proc,
return scm_c_make_port_with_encoding (custom_binary_output_port_type, return scm_c_make_port_with_encoding (custom_binary_output_port_type,
mode_bits, mode_bits,
NULL, /* encoding */ sym_ISO_8859_1, sym_error,
SCM_FAILED_CONVERSION_ERROR,
(scm_t_bits) stream); (scm_t_bits) stream);
} }

View file

@ -53,6 +53,7 @@
SCM_SYMBOL (sym_UTF_8, "UTF-8"); SCM_SYMBOL (sym_UTF_8, "UTF-8");
SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1"); SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
SCM_SYMBOL (sym_error, "error");
/* Stringbufs /* Stringbufs
* *
@ -1613,11 +1614,18 @@ scm_from_locale_string (const char *str)
return scm_from_locale_stringn (str, -1); return scm_from_locale_stringn (str, -1);
} }
scm_t_string_failed_conversion_handler
scm_i_default_string_failed_conversion_handler (void)
{
return scm_i_string_failed_conversion_handler
(scm_i_default_port_conversion_strategy ());
}
SCM SCM
scm_from_locale_stringn (const char *str, size_t len) scm_from_locale_stringn (const char *str, size_t len)
{ {
return scm_from_stringn (str, len, locale_charset (), return scm_from_stringn (str, len, locale_charset (),
scm_i_default_port_conversion_handler ()); scm_i_default_string_failed_conversion_handler ());
} }
SCM SCM
@ -1764,12 +1772,13 @@ scm_from_port_stringn (const char *str, size_t len, SCM port)
if (scm_is_eq (pt->encoding, sym_ISO_8859_1)) if (scm_is_eq (pt->encoding, sym_ISO_8859_1))
return scm_from_latin1_stringn (str, len); return scm_from_latin1_stringn (str, len);
else if (scm_is_eq (pt->encoding, sym_UTF_8) else if (scm_is_eq (pt->encoding, sym_UTF_8)
&& (pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR && (scm_is_eq (pt->conversion_strategy, sym_error)
|| (u8_check ((uint8_t *) str, len) == NULL))) || (u8_check ((uint8_t *) str, len) == NULL)))
return scm_from_utf8_stringn (str, len); return scm_from_utf8_stringn (str, len);
else else
return scm_from_stringn (str, len, scm_i_symbol_chars (pt->encoding), return scm_from_stringn (str, len, scm_i_symbol_chars (pt->encoding),
pt->ilseq_handler); scm_i_string_failed_conversion_handler
(scm_port_conversion_strategy (port)));
} }
/* Create a new scheme string from the C string STR. The memory of /* Create a new scheme string from the C string STR. The memory of
@ -1940,7 +1949,7 @@ scm_to_locale_stringn (SCM str, size_t *lenp)
{ {
return scm_to_stringn (str, lenp, return scm_to_stringn (str, lenp,
locale_charset (), locale_charset (),
scm_i_default_port_conversion_handler ()); scm_i_default_string_failed_conversion_handler ());
} }
char * char *
@ -2169,13 +2178,14 @@ scm_to_port_stringn (SCM str, size_t *lenp, SCM port)
scm_t_port *pt = SCM_PTAB_ENTRY (port); scm_t_port *pt = SCM_PTAB_ENTRY (port);
if (scm_is_eq (pt->encoding, sym_ISO_8859_1) if (scm_is_eq (pt->encoding, sym_ISO_8859_1)
&& pt->ilseq_handler == SCM_FAILED_CONVERSION_ERROR) && scm_is_eq (pt->conversion_strategy, sym_error))
return scm_to_latin1_stringn (str, lenp); return scm_to_latin1_stringn (str, lenp);
else if (scm_is_eq (pt->encoding, sym_UTF_8)) else if (scm_is_eq (pt->encoding, sym_UTF_8))
return scm_to_utf8_stringn (str, lenp); return scm_to_utf8_stringn (str, lenp);
else else
return scm_to_stringn (str, lenp, scm_i_symbol_chars (pt->encoding), return scm_to_stringn (str, lenp, scm_i_symbol_chars (pt->encoding),
pt->ilseq_handler); scm_i_string_failed_conversion_handler
(scm_port_conversion_strategy (port)));
} }
/* Return a malloc(3)-allocated buffer containing the contents of STR encoded /* Return a malloc(3)-allocated buffer containing the contents of STR encoded

View file

@ -100,6 +100,9 @@ typedef enum
SCM_INTERNAL SCM scm_nullstr; SCM_INTERNAL SCM scm_nullstr;
SCM_INTERNAL scm_t_string_failed_conversion_handler
scm_i_default_string_failed_conversion_handler (void);
SCM_API SCM scm_string_p (SCM x); SCM_API SCM scm_string_p (SCM x);
SCM_API SCM scm_string (SCM chrs); SCM_API SCM scm_string (SCM chrs);
SCM_API SCM scm_make_string (SCM k, SCM chr); SCM_API SCM scm_make_string (SCM k, SCM chr);

View file

@ -52,6 +52,8 @@
* *
*/ */
SCM_SYMBOL (sym_UTF_8, "UTF-8");
scm_t_bits scm_tc16_strport; scm_t_bits scm_tc16_strport;
struct string_port { struct string_port {
@ -178,9 +180,9 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
stream->pos = byte_pos; stream->pos = byte_pos;
stream->len = len; stream->len = len;
return scm_c_make_port_with_encoding (scm_tc16_strport, modes, return
"UTF-8", scm_c_make_port_with_encoding (scm_tc16_strport, modes, sym_UTF_8,
scm_i_default_port_conversion_handler (), scm_i_default_port_conversion_strategy (),
(scm_t_bits) stream); (scm_t_bits) stream);
} }