mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 20:00:19 +02:00
Thread-safety fixes for iconv and ports
* libguile/ports-internal.h (scm_t_port): Rework to store iconv descriptors inline, so that the port finalizer doesn't race with the iconv descriptor finalizer. Access is serialized through a lock. Fixes a bug whereby if the port finalizer and the descriptor finalizer run on different threads, the close-port run by the port finalizer could try to free the iconv descriptors at the same time as the descriptor finalizer. * libguile/ports.c (iconv_lock): New static variable. (scm_c_make_port_with_encoding): Initialize iconv-related fields. (scm_close_port): Lock while frobbing iconv descriptors. (prepare_iconv_descriptors): Adapt. (scm_specialize_port_encoding_x, scm_i_set_port_encoding_x): Lock while preparing iconv descriptors. (scm_port_acquire_iconv_descriptors) (scm_port_release_iconv_descriptors): New functions, which replace scm_i_port_iconv_descriptors. (scm_port_decode_char): Lock around iconv operations. (port_clear_stream_start_for_bom_write): Acquire iconv descriptors before checking precise_encoding, to make sure precise_encoding is initialized. * libguile/print.c (display_string_using_iconv): Adapt to use the new interface to get iconv descriptors from a port.
This commit is contained in:
parent
c95a19376b
commit
6bf7ec0c9c
3 changed files with 128 additions and 130 deletions
|
@ -23,6 +23,7 @@
|
||||||
#define SCM_PORTS_INTERNAL
|
#define SCM_PORTS_INTERNAL
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
#include <iconv.h>
|
||||||
|
|
||||||
#include "libguile/_scm.h"
|
#include "libguile/_scm.h"
|
||||||
#include "libguile/ports.h"
|
#include "libguile/ports.h"
|
||||||
|
@ -302,24 +303,6 @@ scm_port_buffer_putback (SCM buf, const scm_t_uint8 *src, size_t count)
|
||||||
src, count);
|
src, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This is a separate object so that only those ports that use iconv
|
|
||||||
cause finalizers to be registered. */
|
|
||||||
struct scm_iconv_descriptors
|
|
||||||
{
|
|
||||||
/* This is the same as pt->encoding, except if pt->encoding is UTF-16
|
|
||||||
or UTF-32, in which case this is UTF-16LE or a similar
|
|
||||||
byte-order-specialed version of UTF-16 or UTF-32. We don't re-set
|
|
||||||
pt->encoding because being just plain UTF-16 or UTF-32 has an
|
|
||||||
additional meaning, being that we should consume and produce byte
|
|
||||||
order marker codepoints as appropriate. */
|
|
||||||
SCM precise_encoding;
|
|
||||||
/* input/output iconv conversion descriptors */
|
|
||||||
void *input_cd;
|
|
||||||
void *output_cd;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
|
|
||||||
|
|
||||||
struct scm_t_port
|
struct scm_t_port
|
||||||
{
|
{
|
||||||
/* Source location information. */
|
/* Source location information. */
|
||||||
|
@ -342,15 +325,26 @@ struct scm_t_port
|
||||||
/* True if the port is random access. Implies that the buffers must
|
/* True if the port is random access. Implies that the buffers must
|
||||||
be flushed before switching between reading and writing, seeking,
|
be flushed before switching between reading and writing, seeking,
|
||||||
and so on. */
|
and so on. */
|
||||||
int rw_random;
|
unsigned rw_random : 1;
|
||||||
|
unsigned at_stream_start_for_bom_read : 1;
|
||||||
|
unsigned at_stream_start_for_bom_write : 1;
|
||||||
|
|
||||||
/* Character encoding support. */
|
/* Character encoding support. */
|
||||||
SCM encoding; /* A symbol of upper-case ASCII. */
|
SCM encoding; /* A symbol of upper-case ASCII. */
|
||||||
SCM conversion_strategy; /* A symbol; either substitute, error, or escape. */
|
SCM conversion_strategy; /* A symbol; either substitute, error, or escape. */
|
||||||
|
|
||||||
unsigned at_stream_start_for_bom_read : 1;
|
/* This is the same as pt->encoding, except if `encoding' is UTF-16 or
|
||||||
unsigned at_stream_start_for_bom_write : 1;
|
UTF-32, in which case this is UTF-16LE or a similar
|
||||||
scm_t_iconv_descriptors *iconv_descriptors;
|
byte-order-specialed version of UTF-16 or UTF-32. This is a
|
||||||
|
separate field from `encoding' because being just plain UTF-16 or
|
||||||
|
UTF-32 has an additional meaning, being that we should consume and
|
||||||
|
produce byte order marker codepoints as appropriate. Set to #f
|
||||||
|
before the iconv descriptors have been opened. */
|
||||||
|
SCM precise_encoding; /* with iconv_lock */
|
||||||
|
iconv_t input_cd; /* with iconv_lock */
|
||||||
|
iconv_t output_cd; /* with iconv_lock */
|
||||||
|
|
||||||
|
/* Port properties. */
|
||||||
SCM alist;
|
SCM alist;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -359,6 +353,9 @@ struct scm_t_port
|
||||||
#define SCM_FILENAME(x) (SCM_PORT (x)->file_name)
|
#define SCM_FILENAME(x) (SCM_PORT (x)->file_name)
|
||||||
#define SCM_SET_FILENAME(x, n) (SCM_PORT (x)->file_name = (n))
|
#define SCM_SET_FILENAME(x, n) (SCM_PORT (x)->file_name = (n))
|
||||||
|
|
||||||
SCM_INTERNAL scm_t_iconv_descriptors * scm_i_port_iconv_descriptors (SCM port);
|
SCM_INTERNAL void scm_port_acquire_iconv_descriptors (SCM port,
|
||||||
|
iconv_t *input_cd,
|
||||||
|
iconv_t *output_cd);
|
||||||
|
SCM_INTERNAL void scm_port_release_iconv_descriptors (SCM port);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
201
libguile/ports.c
201
libguile/ports.c
|
@ -110,6 +110,11 @@ static SCM sym_substitute;
|
||||||
static SCM sym_escape;
|
static SCM sym_escape;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* We have to serialize operations on any given iconv descriptor. */
|
||||||
|
static scm_i_pthread_mutex_t iconv_lock = SCM_I_PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* See Unicode 8.0 section 5.22, "Best Practice for U+FFFD
|
/* See Unicode 8.0 section 5.22, "Best Practice for U+FFFD
|
||||||
|
@ -675,12 +680,15 @@ scm_c_make_port_with_encoding (scm_t_port_type *ptob, unsigned long mode_bits,
|
||||||
pt->encoding = encoding;
|
pt->encoding = encoding;
|
||||||
pt->conversion_strategy = conversion_strategy;
|
pt->conversion_strategy = conversion_strategy;
|
||||||
pt->file_name = SCM_BOOL_F;
|
pt->file_name = SCM_BOOL_F;
|
||||||
pt->iconv_descriptors = NULL;
|
|
||||||
pt->position = scm_cons (SCM_INUM0, SCM_INUM0);
|
pt->position = scm_cons (SCM_INUM0, SCM_INUM0);
|
||||||
|
|
||||||
pt->at_stream_start_for_bom_read = 1;
|
pt->at_stream_start_for_bom_read = 1;
|
||||||
pt->at_stream_start_for_bom_write = 1;
|
pt->at_stream_start_for_bom_write = 1;
|
||||||
|
|
||||||
|
pt->precise_encoding = SCM_BOOL_F;
|
||||||
|
pt->input_cd = (iconv_t) -1;
|
||||||
|
pt->output_cd = (iconv_t) -1;
|
||||||
|
|
||||||
pt->alist = SCM_EOL;
|
pt->alist = SCM_EOL;
|
||||||
|
|
||||||
if (SCM_PORT_TYPE (ret)->flags & SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC)
|
if (SCM_PORT_TYPE (ret)->flags & SCM_PORT_TYPE_NEEDS_CLOSE_ON_GC)
|
||||||
|
@ -770,12 +778,6 @@ SCM_DEFINE (scm_eof_object_p, "eof-object?", 1, 0, 0,
|
||||||
|
|
||||||
/* Closing ports. */
|
/* Closing ports. */
|
||||||
|
|
||||||
static void close_iconv_descriptors (scm_t_iconv_descriptors *id);
|
|
||||||
|
|
||||||
/* scm_close_port
|
|
||||||
* Call the close operation on a port object.
|
|
||||||
* see also scm_close.
|
|
||||||
*/
|
|
||||||
SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
|
SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
|
||||||
(SCM port),
|
(SCM port),
|
||||||
"Close the specified port object. Return @code{#t} if it\n"
|
"Close the specified port object. Return @code{#t} if it\n"
|
||||||
|
@ -809,13 +811,17 @@ SCM_DEFINE (scm_close_port, "close-port", 1, 0, 0,
|
||||||
should be resilient to non-local exits. */
|
should be resilient to non-local exits. */
|
||||||
SCM_PORT_TYPE (port)->close (port);
|
SCM_PORT_TYPE (port)->close (port);
|
||||||
|
|
||||||
if (pt->iconv_descriptors)
|
scm_i_pthread_mutex_lock (&iconv_lock);
|
||||||
|
if (scm_is_true (pt->precise_encoding))
|
||||||
{
|
{
|
||||||
/* If we don't get here, the iconv_descriptors finalizer will
|
if (pt->input_cd != (iconv_t) -1)
|
||||||
clean up. */
|
iconv_close (pt->input_cd);
|
||||||
close_iconv_descriptors (pt->iconv_descriptors);
|
if (pt->output_cd != (iconv_t) -1)
|
||||||
pt->iconv_descriptors = NULL;
|
iconv_close (pt->output_cd);
|
||||||
|
pt->precise_encoding = SCM_BOOL_F;
|
||||||
|
pt->input_cd = pt->output_cd = (iconv_t) -1;
|
||||||
}
|
}
|
||||||
|
scm_i_pthread_mutex_unlock (&iconv_lock);
|
||||||
|
|
||||||
return SCM_BOOL_T;
|
return SCM_BOOL_T;
|
||||||
}
|
}
|
||||||
|
@ -979,51 +985,53 @@ static const unsigned char scm_utf16le_bom[2] = {0xFF, 0xFE};
|
||||||
static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
|
static const unsigned char scm_utf32be_bom[4] = {0x00, 0x00, 0xFE, 0xFF};
|
||||||
static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
|
static const unsigned char scm_utf32le_bom[4] = {0xFF, 0xFE, 0x00, 0x00};
|
||||||
|
|
||||||
|
/* Called with the iconv lock. Will release the lock before throwing
|
||||||
|
any error. */
|
||||||
static void
|
static void
|
||||||
finalize_iconv_descriptors (void *ptr, void *data)
|
prepare_iconv_descriptors (SCM port, SCM precise_encoding)
|
||||||
{
|
{
|
||||||
close_iconv_descriptors (ptr);
|
scm_t_port *pt = SCM_PORT (port);
|
||||||
}
|
|
||||||
|
|
||||||
static scm_t_iconv_descriptors *
|
|
||||||
open_iconv_descriptors (SCM precise_encoding, int reading, int writing)
|
|
||||||
{
|
|
||||||
const char *encoding;
|
|
||||||
scm_t_iconv_descriptors *id;
|
|
||||||
iconv_t input_cd, output_cd;
|
iconv_t input_cd, output_cd;
|
||||||
|
const char *encoding;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
input_cd = (iconv_t) -1;
|
/* If the specified encoding is UTF-16 or UTF-32, then default to
|
||||||
output_cd = (iconv_t) -1;
|
big-endian byte order. This fallback isn't necessary if you read
|
||||||
|
on the port before writing to it, as the read will sniff the BOM if
|
||||||
|
any and specialize the encoding; see the manual. */
|
||||||
|
if (scm_is_eq (precise_encoding, sym_UTF_16))
|
||||||
|
precise_encoding = sym_UTF_16BE;
|
||||||
|
else if (scm_is_eq (precise_encoding, sym_UTF_32))
|
||||||
|
precise_encoding = sym_UTF_32BE;
|
||||||
|
|
||||||
|
if (scm_is_eq (pt->precise_encoding, precise_encoding))
|
||||||
|
return;
|
||||||
|
|
||||||
|
input_cd = output_cd = (iconv_t) -1;
|
||||||
|
|
||||||
|
if (!scm_is_symbol (precise_encoding))
|
||||||
|
goto invalid_encoding;
|
||||||
|
|
||||||
encoding = scm_i_symbol_chars (precise_encoding);
|
encoding = scm_i_symbol_chars (precise_encoding);
|
||||||
for (i = 0; encoding[i]; i++)
|
for (i = 0; encoding[i]; i++)
|
||||||
if (encoding[i] > 127)
|
if (encoding[i] > 127)
|
||||||
goto invalid_encoding;
|
goto invalid_encoding;
|
||||||
|
|
||||||
if (reading)
|
/* Open a iconv conversion descriptors between ENCODING and UTF-8. We
|
||||||
|
choose UTF-8, not UTF-32, because iconv implementations can
|
||||||
|
typically convert from anything to UTF-8, but not to UTF-32 (see
|
||||||
|
http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html,
|
||||||
|
for more details). */
|
||||||
|
|
||||||
|
if (SCM_INPUT_PORT_P (port))
|
||||||
{
|
{
|
||||||
/* Open an input iconv conversion descriptor, from ENCODING
|
|
||||||
to UTF-8. We choose UTF-8, not UTF-32, because iconv
|
|
||||||
implementations can typically convert from anything to
|
|
||||||
UTF-8, but not to UTF-32 (see
|
|
||||||
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
|
|
||||||
|
|
||||||
/* Assume opening an iconv descriptor causes about 16 KB of
|
|
||||||
allocation. */
|
|
||||||
scm_gc_register_allocation (16 * 1024);
|
|
||||||
|
|
||||||
input_cd = iconv_open ("UTF-8", encoding);
|
input_cd = iconv_open ("UTF-8", encoding);
|
||||||
if (input_cd == (iconv_t) -1)
|
if (input_cd == (iconv_t) -1)
|
||||||
goto invalid_encoding;
|
goto invalid_encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (writing)
|
if (SCM_OUTPUT_PORT_P (port))
|
||||||
{
|
{
|
||||||
/* Assume opening an iconv descriptor causes about 16 KB of
|
|
||||||
allocation. */
|
|
||||||
scm_gc_register_allocation (16 * 1024);
|
|
||||||
|
|
||||||
output_cd = iconv_open (encoding, "UTF-8");
|
output_cd = iconv_open (encoding, "UTF-8");
|
||||||
if (output_cd == (iconv_t) -1)
|
if (output_cd == (iconv_t) -1)
|
||||||
{
|
{
|
||||||
|
@ -1033,55 +1041,27 @@ open_iconv_descriptors (SCM precise_encoding, int reading, int writing)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
id = scm_gc_malloc_pointerless (sizeof (*id), "iconv descriptors");
|
if (pt->input_cd != (iconv_t) -1)
|
||||||
id->precise_encoding = precise_encoding;
|
iconv_close (pt->input_cd);
|
||||||
id->input_cd = input_cd;
|
if (pt->output_cd != (iconv_t) -1)
|
||||||
id->output_cd = output_cd;
|
iconv_close (pt->output_cd);
|
||||||
|
|
||||||
/* Register a finalizer to close the descriptors. */
|
pt->precise_encoding = precise_encoding;
|
||||||
scm_i_set_finalizer (id, finalize_iconv_descriptors, NULL);
|
pt->input_cd = input_cd;
|
||||||
|
pt->output_cd = output_cd;
|
||||||
|
|
||||||
return id;
|
/* Make sure this port has a finalizer. */
|
||||||
|
scm_i_set_finalizer (SCM2PTR (port), finalize_port, NULL);
|
||||||
|
|
||||||
|
return;
|
||||||
|
|
||||||
invalid_encoding:
|
invalid_encoding:
|
||||||
|
scm_i_pthread_mutex_unlock (&iconv_lock);
|
||||||
scm_misc_error ("open_iconv_descriptors",
|
scm_misc_error ("open_iconv_descriptors",
|
||||||
"invalid or unknown character encoding ~s",
|
"invalid or unknown character encoding ~s",
|
||||||
scm_list_1 (precise_encoding));
|
scm_list_1 (precise_encoding));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
close_iconv_descriptors (scm_t_iconv_descriptors *id)
|
|
||||||
{
|
|
||||||
if (id->input_cd != (iconv_t) -1)
|
|
||||||
iconv_close (id->input_cd);
|
|
||||||
if (id->output_cd != (iconv_t) -1)
|
|
||||||
iconv_close (id->output_cd);
|
|
||||||
id->input_cd = (void *) -1;
|
|
||||||
id->output_cd = (void *) -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
prepare_iconv_descriptors (SCM port, SCM encoding)
|
|
||||||
{
|
|
||||||
scm_t_port *pt = SCM_PORT (port);
|
|
||||||
scm_t_iconv_descriptors *desc = pt->iconv_descriptors;
|
|
||||||
|
|
||||||
/* If the specified encoding is UTF-16 or UTF-32, then default to
|
|
||||||
big-endian byte order. This fallback isn't necessary if you read
|
|
||||||
on the port before writing to it, as the read will sniff the BOM if
|
|
||||||
any and specialize the encoding; see the manual. */
|
|
||||||
if (scm_is_eq (encoding, sym_UTF_16))
|
|
||||||
encoding = sym_UTF_16BE;
|
|
||||||
else if (scm_is_eq (encoding, sym_UTF_32))
|
|
||||||
encoding = sym_UTF_32BE;
|
|
||||||
|
|
||||||
if (desc && scm_is_eq (desc->precise_encoding, encoding))
|
|
||||||
return;
|
|
||||||
|
|
||||||
pt->iconv_descriptors = open_iconv_descriptors
|
|
||||||
(encoding, SCM_INPUT_PORT_P (port), SCM_OUTPUT_PORT_P (port));
|
|
||||||
}
|
|
||||||
|
|
||||||
SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
|
SCM_INTERNAL SCM scm_specialize_port_encoding_x (SCM port, SCM encoding);
|
||||||
SCM_DEFINE (scm_specialize_port_encoding_x,
|
SCM_DEFINE (scm_specialize_port_encoding_x,
|
||||||
"specialize-port-encoding!", 2, 0, 0,
|
"specialize-port-encoding!", 2, 0, 0,
|
||||||
|
@ -1107,33 +1087,41 @@ SCM_DEFINE (scm_specialize_port_encoding_x,
|
||||||
else
|
else
|
||||||
SCM_OUT_OF_RANGE (2, encoding);
|
SCM_OUT_OF_RANGE (2, encoding);
|
||||||
|
|
||||||
|
scm_i_pthread_mutex_lock (&iconv_lock);
|
||||||
prepare_iconv_descriptors (port, encoding);
|
prepare_iconv_descriptors (port, encoding);
|
||||||
|
scm_i_pthread_mutex_unlock (&iconv_lock);
|
||||||
|
|
||||||
return SCM_UNSPECIFIED;
|
return SCM_UNSPECIFIED;
|
||||||
}
|
}
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
scm_t_iconv_descriptors *
|
/* Acquire the iconv lock and fill in *INPUT_CD and/or *OUTPUT_CD. */
|
||||||
scm_i_port_iconv_descriptors (SCM port)
|
void
|
||||||
|
scm_port_acquire_iconv_descriptors (SCM port, iconv_t *input_cd,
|
||||||
|
iconv_t *output_cd)
|
||||||
{
|
{
|
||||||
scm_t_port *pt = SCM_PORT (port);
|
scm_t_port *pt = SCM_PORT (port);
|
||||||
|
|
||||||
if (!pt->iconv_descriptors)
|
scm_i_pthread_mutex_lock (&iconv_lock);
|
||||||
|
if (scm_is_false (pt->precise_encoding))
|
||||||
prepare_iconv_descriptors (port, pt->encoding);
|
prepare_iconv_descriptors (port, pt->encoding);
|
||||||
|
if (input_cd)
|
||||||
|
*input_cd = pt->input_cd;
|
||||||
|
if (output_cd)
|
||||||
|
*output_cd = pt->output_cd;
|
||||||
|
}
|
||||||
|
|
||||||
return pt->iconv_descriptors;
|
void
|
||||||
|
scm_port_release_iconv_descriptors (SCM port)
|
||||||
|
{
|
||||||
|
scm_i_pthread_mutex_unlock (&iconv_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The name of the encoding is itself encoded in ASCII. */
|
/* The name of the encoding is itself encoded in ASCII. */
|
||||||
void
|
void
|
||||||
scm_i_set_port_encoding_x (SCM port, const char *encoding)
|
scm_i_set_port_encoding_x (SCM port, const char *encoding)
|
||||||
{
|
{
|
||||||
scm_t_port *pt;
|
scm_t_port *pt = SCM_PORT (port);
|
||||||
scm_t_iconv_descriptors *prev;
|
|
||||||
|
|
||||||
/* Set the character encoding for this port. */
|
|
||||||
pt = SCM_PORT (port);
|
|
||||||
prev = pt->iconv_descriptors;
|
|
||||||
|
|
||||||
/* In order to handle cases where the encoding changes mid-stream
|
/* In order to handle cases where the encoding changes mid-stream
|
||||||
(e.g. within an HTTP stream, or within a file that is composed of
|
(e.g. within an HTTP stream, or within a file that is composed of
|
||||||
|
@ -1144,9 +1132,14 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
|
||||||
pt->at_stream_start_for_bom_write = 1;
|
pt->at_stream_start_for_bom_write = 1;
|
||||||
pt->encoding = canonicalize_encoding (encoding);
|
pt->encoding = canonicalize_encoding (encoding);
|
||||||
|
|
||||||
pt->iconv_descriptors = NULL;
|
scm_i_pthread_mutex_lock (&iconv_lock);
|
||||||
if (prev)
|
if (pt->input_cd != (iconv_t) -1)
|
||||||
close_iconv_descriptors (prev);
|
iconv_close (pt->input_cd);
|
||||||
|
if (pt->output_cd != (iconv_t) -1)
|
||||||
|
iconv_close (pt->output_cd);
|
||||||
|
pt->precise_encoding = SCM_BOOL_F;
|
||||||
|
pt->input_cd = pt->output_cd = (iconv_t) -1;
|
||||||
|
scm_i_pthread_mutex_unlock (&iconv_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
|
SCM_DEFINE (scm_sys_port_encoding, "%port-encoding", 1, 0, 0,
|
||||||
|
@ -1783,7 +1776,7 @@ SCM_DEFINE (scm_port_decode_char, "port-decode-char", 4, 0, 0,
|
||||||
{
|
{
|
||||||
char *input, *output;
|
char *input, *output;
|
||||||
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
|
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
|
||||||
scm_t_iconv_descriptors *id;
|
iconv_t input_cd;
|
||||||
size_t c_start, c_count;
|
size_t c_start, c_count;
|
||||||
size_t input_left, output_left, done;
|
size_t input_left, output_left, done;
|
||||||
|
|
||||||
|
@ -1794,14 +1787,15 @@ SCM_DEFINE (scm_port_decode_char, "port-decode-char", 4, 0, 0,
|
||||||
SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (bv));
|
SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (bv));
|
||||||
SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (bv) - c_start);
|
SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (bv) - c_start);
|
||||||
|
|
||||||
id = scm_i_port_iconv_descriptors (port);
|
|
||||||
input = (char *) SCM_BYTEVECTOR_CONTENTS (bv) + c_start;
|
input = (char *) SCM_BYTEVECTOR_CONTENTS (bv) + c_start;
|
||||||
input_left = c_count;
|
input_left = c_count;
|
||||||
output = (char *) utf8_buf;
|
output = (char *) utf8_buf;
|
||||||
output_left = sizeof (utf8_buf);
|
output_left = sizeof (utf8_buf);
|
||||||
|
|
||||||
/* FIXME: locking! */
|
/* FIXME: locking! */
|
||||||
done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
|
scm_port_acquire_iconv_descriptors (port, &input_cd, NULL);
|
||||||
|
done = iconv (input_cd, &input, &input_left, &output, &output_left);
|
||||||
|
scm_port_release_iconv_descriptors (port);
|
||||||
|
|
||||||
if (done == (size_t) -1)
|
if (done == (size_t) -1)
|
||||||
{
|
{
|
||||||
|
@ -1856,7 +1850,8 @@ peek_iconv_codepoint (SCM port, size_t *len)
|
||||||
/* Make sure iconv descriptors have been opened even if
|
/* Make sure iconv descriptors have been opened even if
|
||||||
there were no bytes, to be sure that a decoding error
|
there were no bytes, to be sure that a decoding error
|
||||||
is signalled if the encoding itself was invalid. */
|
is signalled if the encoding itself was invalid. */
|
||||||
scm_i_port_iconv_descriptors (port);
|
scm_port_acquire_iconv_descriptors (port, NULL, NULL);
|
||||||
|
scm_port_release_iconv_descriptors (port);
|
||||||
return EOF;
|
return EOF;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2469,16 +2464,22 @@ port_clear_stream_start_for_bom_write (SCM port, enum bom_io_mode io_mode)
|
||||||
/* Write a BOM if appropriate. */
|
/* Write a BOM if appropriate. */
|
||||||
if (scm_is_eq (pt->encoding, sym_UTF_16))
|
if (scm_is_eq (pt->encoding, sym_UTF_16))
|
||||||
{
|
{
|
||||||
scm_t_iconv_descriptors *id = scm_i_port_iconv_descriptors (port);
|
SCM precise_encoding;
|
||||||
if (scm_is_eq (id->precise_encoding, sym_UTF_16LE))
|
scm_port_acquire_iconv_descriptors (port, NULL, NULL);
|
||||||
|
precise_encoding = pt->precise_encoding;
|
||||||
|
scm_port_release_iconv_descriptors (port);
|
||||||
|
if (scm_is_eq (precise_encoding, sym_UTF_16LE))
|
||||||
scm_c_write (port, scm_utf16le_bom, sizeof (scm_utf16le_bom));
|
scm_c_write (port, scm_utf16le_bom, sizeof (scm_utf16le_bom));
|
||||||
else
|
else
|
||||||
scm_c_write (port, scm_utf16be_bom, sizeof (scm_utf16be_bom));
|
scm_c_write (port, scm_utf16be_bom, sizeof (scm_utf16be_bom));
|
||||||
}
|
}
|
||||||
else if (scm_is_eq (pt->encoding, sym_UTF_32))
|
else if (scm_is_eq (pt->encoding, sym_UTF_32))
|
||||||
{
|
{
|
||||||
scm_t_iconv_descriptors *id = scm_i_port_iconv_descriptors (port);
|
SCM precise_encoding;
|
||||||
if (scm_is_eq (id->precise_encoding, sym_UTF_32LE))
|
scm_port_acquire_iconv_descriptors (port, NULL, NULL);
|
||||||
|
precise_encoding = pt->precise_encoding;
|
||||||
|
scm_port_release_iconv_descriptors (port);
|
||||||
|
if (scm_is_eq (precise_encoding, sym_UTF_32LE))
|
||||||
scm_c_write (port, scm_utf32le_bom, sizeof (scm_utf32le_bom));
|
scm_c_write (port, scm_utf32le_bom, sizeof (scm_utf32le_bom));
|
||||||
else
|
else
|
||||||
scm_c_write (port, scm_utf32be_bom, sizeof (scm_utf32be_bom));
|
scm_c_write (port, scm_utf32be_bom, sizeof (scm_utf32be_bom));
|
||||||
|
|
|
@ -24,7 +24,6 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <iconv.h>
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
|
@ -1026,9 +1025,7 @@ display_string_using_iconv (const void *str, int narrow_p, size_t len,
|
||||||
scm_t_string_failed_conversion_handler strategy)
|
scm_t_string_failed_conversion_handler strategy)
|
||||||
{
|
{
|
||||||
size_t printed;
|
size_t printed;
|
||||||
scm_t_iconv_descriptors *id;
|
iconv_t output_cd;
|
||||||
|
|
||||||
id = scm_i_port_iconv_descriptors (port);
|
|
||||||
|
|
||||||
printed = 0;
|
printed = 0;
|
||||||
|
|
||||||
|
@ -1057,8 +1054,9 @@ display_string_using_iconv (const void *str, int narrow_p, size_t len,
|
||||||
output = encoded_output;
|
output = encoded_output;
|
||||||
output_left = sizeof (encoded_output);
|
output_left = sizeof (encoded_output);
|
||||||
|
|
||||||
done = iconv (id->output_cd, &input, &input_left,
|
scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
|
||||||
&output, &output_left);
|
done = iconv (output_cd, &input, &input_left, &output, &output_left);
|
||||||
|
scm_port_release_iconv_descriptors (port);
|
||||||
|
|
||||||
output_len = sizeof (encoded_output) - output_left;
|
output_len = sizeof (encoded_output) - output_left;
|
||||||
|
|
||||||
|
@ -1067,7 +1065,9 @@ display_string_using_iconv (const void *str, int narrow_p, size_t len,
|
||||||
int errno_save = errno;
|
int errno_save = errno;
|
||||||
|
|
||||||
/* Reset the `iconv' state. */
|
/* Reset the `iconv' state. */
|
||||||
iconv (id->output_cd, NULL, NULL, NULL, NULL);
|
scm_port_acquire_iconv_descriptors (port, NULL, &output_cd);
|
||||||
|
iconv (output_cd, NULL, NULL, NULL, NULL);
|
||||||
|
scm_port_release_iconv_descriptors (port);
|
||||||
|
|
||||||
/* Print the OUTPUT_LEN bytes successfully converted. */
|
/* Print the OUTPUT_LEN bytes successfully converted. */
|
||||||
scm_lfwrite (encoded_output, output_len, port);
|
scm_lfwrite (encoded_output, output_len, port);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue