mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-30 00:40:20 +02:00
Char readers peek into read buffer
* libguile/ports.c (scm_i_set_pending_eof): Remove now-unused helper. (peek_utf8_codepoint, peek_latin1_codepoint, peek_iconv_codepoint): (peek_codepoint): Refactor the fundamental character readers in Guile to peek into the read buffer instead of reading then unreading. This will allow Scheme to use the port buffer to convert, when we port this to Scheme. (get_codepoint): Use peek_codepoint. (scm_getc): Adapt. (scm_peek_char): Use peek_codepoint.
This commit is contained in:
parent
56c48d14ac
commit
1309ab8093
1 changed files with 153 additions and 222 deletions
375
libguile/ports.c
375
libguile/ports.c
|
@ -372,12 +372,6 @@ scm_set_port_get_natural_buffer_sizes
|
||||||
ptob->get_natural_buffer_sizes = get_natural_buffer_sizes;
|
ptob->get_natural_buffer_sizes = get_natural_buffer_sizes;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
scm_i_set_pending_eof (SCM port)
|
|
||||||
{
|
|
||||||
scm_port_buffer_set_has_eof_p (SCM_PTAB_ENTRY (port)->read_buf, SCM_BOOL_T);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
scm_i_clear_pending_eof (SCM port)
|
scm_i_clear_pending_eof (SCM port)
|
||||||
{
|
{
|
||||||
|
@ -1664,166 +1658,128 @@ utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
|
||||||
return codepoint;
|
return codepoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read a UTF-8 sequence from PORT. On success, return 0 and set
|
/* Peek a UTF-8 sequence from PORT. On success, return 0, set
|
||||||
*CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
|
*CODEPOINT to the codepoint that was read, and set *LEN to the length
|
||||||
representation, and set *LEN to the length in bytes. Return
|
in bytes. Return `EILSEQ' on error, setting *LEN to the shortest
|
||||||
`EILSEQ' on error. */
|
prefix that cannot begin a valid UTF-8 sequence. */
|
||||||
static int
|
static int
|
||||||
get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
|
peek_utf8_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
|
||||||
scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
|
||||||
{
|
{
|
||||||
#define ASSERT_NOT_EOF(b) \
|
int first_byte;
|
||||||
if (SCM_UNLIKELY ((b) == EOF)) \
|
|
||||||
goto invalid_seq
|
|
||||||
#define CONSUME_PEEKED_BYTE() \
|
|
||||||
scm_port_buffer_did_take (pt->read_buf, 1)
|
|
||||||
|
|
||||||
int byte;
|
first_byte = peek_byte_or_eof (port);
|
||||||
scm_t_port *pt;
|
if (first_byte == EOF)
|
||||||
|
|
||||||
*len = 0;
|
|
||||||
pt = SCM_PTAB_ENTRY (port);
|
|
||||||
|
|
||||||
byte = get_byte_or_eof (port);
|
|
||||||
if (byte == EOF)
|
|
||||||
{
|
{
|
||||||
*codepoint = EOF;
|
*codepoint = EOF;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
else if (first_byte < 0x80)
|
||||||
buf[0] = (scm_t_uint8) byte;
|
|
||||||
*len = 1;
|
|
||||||
|
|
||||||
if (buf[0] <= 0x7f)
|
|
||||||
/* 1-byte form. */
|
|
||||||
*codepoint = buf[0];
|
|
||||||
else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
|
|
||||||
{
|
{
|
||||||
/* 2-byte form. */
|
*codepoint = first_byte;
|
||||||
byte = peek_byte_or_eof (port);
|
*len = 1;
|
||||||
ASSERT_NOT_EOF (byte);
|
return 0;
|
||||||
|
|
||||||
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
|
|
||||||
goto invalid_seq;
|
|
||||||
|
|
||||||
CONSUME_PEEKED_BYTE ();
|
|
||||||
buf[1] = (scm_t_uint8) byte;
|
|
||||||
*len = 2;
|
|
||||||
|
|
||||||
*codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
|
|
||||||
| (buf[1] & 0x3f);
|
|
||||||
}
|
}
|
||||||
else if ((buf[0] & 0xf0) == 0xe0)
|
else if (first_byte >= 0xc2 && first_byte <= 0xdf)
|
||||||
{
|
{
|
||||||
/* 3-byte form. */
|
SCM read_buf = scm_fill_input (port, 2);
|
||||||
byte = peek_byte_or_eof (port);
|
size_t can_take = scm_port_buffer_can_take (read_buf);
|
||||||
ASSERT_NOT_EOF (byte);
|
const scm_t_uint8 *ptr = scm_port_buffer_take_pointer (read_buf);
|
||||||
|
|
||||||
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
|
if (can_take < 2 || (ptr[1] & 0xc0) != 0x80)
|
||||||
|| (buf[0] == 0xe0 && byte < 0xa0)
|
{
|
||||||
|| (buf[0] == 0xed && byte > 0x9f)))
|
*len = 1;
|
||||||
goto invalid_seq;
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
CONSUME_PEEKED_BYTE ();
|
*codepoint = (first_byte & 0x1f) << 6UL | (ptr[1] & 0x3f);
|
||||||
buf[1] = (scm_t_uint8) byte;
|
|
||||||
*len = 2;
|
*len = 2;
|
||||||
|
return 0;
|
||||||
byte = peek_byte_or_eof (port);
|
|
||||||
ASSERT_NOT_EOF (byte);
|
|
||||||
|
|
||||||
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
|
|
||||||
goto invalid_seq;
|
|
||||||
|
|
||||||
CONSUME_PEEKED_BYTE ();
|
|
||||||
buf[2] = (scm_t_uint8) byte;
|
|
||||||
*len = 3;
|
|
||||||
|
|
||||||
*codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
|
|
||||||
| ((scm_t_wchar) buf[1] & 0x3f) << 6UL
|
|
||||||
| (buf[2] & 0x3f);
|
|
||||||
}
|
}
|
||||||
else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
|
else if ((first_byte & 0xf0) == 0xe0)
|
||||||
{
|
{
|
||||||
/* 4-byte form. */
|
SCM read_buf = scm_fill_input (port, 3);
|
||||||
byte = peek_byte_or_eof (port);
|
size_t can_take = scm_port_buffer_can_take (read_buf);
|
||||||
ASSERT_NOT_EOF (byte);
|
const scm_t_uint8 *ptr = scm_port_buffer_take_pointer (read_buf);
|
||||||
|
|
||||||
if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
|
if (can_take < 2 || (ptr[1] & 0xc0) != 0x80
|
||||||
|| (buf[0] == 0xf0 && byte < 0x90)
|
|| (ptr[0] == 0xe0 && ptr[1] < 0xa0)
|
||||||
|| (buf[0] == 0xf4 && byte > 0x8f)))
|
|| (ptr[0] == 0xed && ptr[1] > 0x9f))
|
||||||
goto invalid_seq;
|
{
|
||||||
|
*len = 1;
|
||||||
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
CONSUME_PEEKED_BYTE ();
|
if (can_take < 3 || (ptr[2] & 0xc0) != 0x80)
|
||||||
buf[1] = (scm_t_uint8) byte;
|
{
|
||||||
*len = 2;
|
*len = 2;
|
||||||
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
byte = peek_byte_or_eof (port);
|
*codepoint = ((scm_t_wchar) ptr[0] & 0x0f) << 12UL
|
||||||
ASSERT_NOT_EOF (byte);
|
| ((scm_t_wchar) ptr[1] & 0x3f) << 6UL
|
||||||
|
| (ptr[2] & 0x3f);
|
||||||
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
|
|
||||||
goto invalid_seq;
|
|
||||||
|
|
||||||
CONSUME_PEEKED_BYTE ();
|
|
||||||
buf[2] = (scm_t_uint8) byte;
|
|
||||||
*len = 3;
|
*len = 3;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else if (first_byte >= 0xf0 && first_byte <= 0xf4)
|
||||||
|
{
|
||||||
|
SCM read_buf = scm_fill_input (port, 4);
|
||||||
|
size_t can_take = scm_port_buffer_can_take (read_buf);
|
||||||
|
const scm_t_uint8 *ptr = scm_port_buffer_take_pointer (read_buf);
|
||||||
|
|
||||||
byte = peek_byte_or_eof (port);
|
if (can_take < 2 || (ptr[1] & 0xc0) != 0x80
|
||||||
ASSERT_NOT_EOF (byte);
|
|| (ptr[0] == 0xf0 && ptr[1] < 0x90)
|
||||||
|
|| (ptr[0] == 0xf4 && ptr[1] > 0x8f))
|
||||||
|
{
|
||||||
|
*len = 1;
|
||||||
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
|
if (can_take < 3 || (ptr[2] & 0xc0) != 0x80)
|
||||||
goto invalid_seq;
|
{
|
||||||
|
*len = 2;
|
||||||
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
CONSUME_PEEKED_BYTE ();
|
if (can_take < 4 || (ptr[3] & 0xc0) != 0x80)
|
||||||
buf[3] = (scm_t_uint8) byte;
|
{
|
||||||
|
*len = 3;
|
||||||
|
return EILSEQ;
|
||||||
|
}
|
||||||
|
|
||||||
|
*codepoint = ((scm_t_wchar) ptr[0] & 0x07) << 18UL
|
||||||
|
| ((scm_t_wchar) ptr[1] & 0x3f) << 12UL
|
||||||
|
| ((scm_t_wchar) ptr[2] & 0x3f) << 6UL
|
||||||
|
| (ptr[3] & 0x3f);
|
||||||
*len = 4;
|
*len = 4;
|
||||||
|
return 0;
|
||||||
*codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
|
|
||||||
| ((scm_t_wchar) buf[1] & 0x3f) << 12UL
|
|
||||||
| ((scm_t_wchar) buf[2] & 0x3f) << 6UL
|
|
||||||
| (buf[3] & 0x3f);
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
goto invalid_seq;
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
invalid_seq:
|
|
||||||
/* Here we could choose the consume the faulty byte when it's not a
|
|
||||||
valid starting byte, but it's not a requirement. What Section 3.9
|
|
||||||
of Unicode 6.0.0 mandates, though, is to not consume a byte that
|
|
||||||
would otherwise be a valid starting byte. */
|
|
||||||
|
|
||||||
return EILSEQ;
|
|
||||||
|
|
||||||
#undef CONSUME_PEEKED_BYTE
|
|
||||||
#undef ASSERT_NOT_EOF
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Read an ISO-8859-1 codepoint (a byte) from PORT. On success, return
|
|
||||||
0 and set *CODEPOINT to the codepoint that was read, fill BUF with
|
|
||||||
its UTF-8 representation, and set *LEN to the length in bytes.
|
|
||||||
Return `EILSEQ' on error. */
|
|
||||||
static int
|
|
||||||
get_latin1_codepoint (SCM port, scm_t_wchar *codepoint,
|
|
||||||
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
|
||||||
{
|
|
||||||
*codepoint = get_byte_or_eof (port);
|
|
||||||
|
|
||||||
if (*codepoint == EOF)
|
|
||||||
*len = 0;
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*len = 1;
|
*len = 1;
|
||||||
buf[0] = *codepoint;
|
return EILSEQ;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Peek an ISO-8859-1 codepoint (a byte) from PORT. On success, return
|
||||||
|
0, set *CODEPOINT to the codepoint that was peeked, and set *LEN to
|
||||||
|
the length in bytes. No encoding error is possible. */
|
||||||
|
static int
|
||||||
|
peek_latin1_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
|
||||||
|
{
|
||||||
|
*codepoint = peek_byte_or_eof (port);
|
||||||
|
if (*codepoint == EOF)
|
||||||
|
*len = 0;
|
||||||
|
else
|
||||||
|
*len = 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Likewise, read a byte sequence from PORT, passing it through its
|
/* Peek a codepoint from PORT, decoding it through iconv. On success,
|
||||||
input conversion descriptor. */
|
return 0, set *CODEPOINT to the codepoint that was peeked, and set
|
||||||
|
*LEN to the length in bytes. Return `EILSEQ' on decoding error. */
|
||||||
static int
|
static int
|
||||||
get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
|
peek_iconv_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
|
||||||
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
|
||||||
{
|
{
|
||||||
scm_t_iconv_descriptors *id;
|
scm_t_iconv_descriptors *id;
|
||||||
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
|
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
|
||||||
|
@ -1833,40 +1789,38 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
int byte_read;
|
SCM read_buf;
|
||||||
char *input, *output;
|
char *input, *output;
|
||||||
size_t input_left, output_left, done;
|
size_t input_left, output_left, done;
|
||||||
|
|
||||||
byte_read = get_byte_or_eof (port);
|
read_buf = scm_fill_input (port, input_size + 1);
|
||||||
if (SCM_UNLIKELY (byte_read == EOF))
|
if (scm_port_buffer_can_take (read_buf) <= input_size)
|
||||||
{
|
{
|
||||||
if (SCM_LIKELY (input_size == 0))
|
if (input_size == 0)
|
||||||
|
/* Normal EOF. */
|
||||||
{
|
{
|
||||||
*codepoint = (scm_t_wchar) EOF;
|
*codepoint = (scm_t_wchar) EOF;
|
||||||
*len = input_size;
|
*len = 0;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
/* EOF found in the middle of a multibyte character. */
|
||||||
/* EOF found in the middle of a multibyte character. */
|
return EILSEQ;
|
||||||
scm_i_set_pending_eof (port);
|
|
||||||
return EILSEQ;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
buf[input_size++] = byte_read;
|
input_size++;
|
||||||
|
input = (char *) scm_port_buffer_take_pointer (read_buf);
|
||||||
input = buf;
|
|
||||||
input_left = input_size;
|
input_left = input_size;
|
||||||
output = (char *) utf8_buf;
|
output = (char *) utf8_buf;
|
||||||
output_left = sizeof (utf8_buf);
|
output_left = sizeof (utf8_buf);
|
||||||
|
|
||||||
|
/* FIXME: locking! */
|
||||||
done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
|
done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
|
||||||
|
|
||||||
if (done == (size_t) -1)
|
if (done == (size_t) -1)
|
||||||
{
|
{
|
||||||
int err = errno;
|
int err = errno;
|
||||||
if (SCM_LIKELY (err == EINVAL))
|
if (err == EINVAL)
|
||||||
/* The input byte sequence did not form a complete
|
/* The input byte sequence did not form a complete
|
||||||
character. Read another byte and try again. */
|
character. Read another byte and try again. */
|
||||||
continue;
|
continue;
|
||||||
|
@ -1876,47 +1830,38 @@ get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
size_t output_size = sizeof (utf8_buf) - output_left;
|
size_t output_size = sizeof (utf8_buf) - output_left;
|
||||||
if (SCM_LIKELY (output_size > 0))
|
if (output_size == 0)
|
||||||
{
|
/* iconv consumed some bytes without producing any output.
|
||||||
/* iconv generated output. Convert the UTF8_BUF sequence
|
Most likely this means that a Unicode byte-order mark
|
||||||
to a Unicode code point. */
|
(BOM) was consumed. In any case, keep going until we get
|
||||||
*codepoint = utf8_to_codepoint (utf8_buf, output_size);
|
output. */
|
||||||
*len = input_size;
|
continue;
|
||||||
return 0;
|
|
||||||
}
|
/* iconv generated output. Convert the UTF8_BUF sequence
|
||||||
else
|
to a Unicode code point. */
|
||||||
{
|
*codepoint = utf8_to_codepoint (utf8_buf, output_size);
|
||||||
/* iconv consumed some bytes without producing any output.
|
*len = input_size;
|
||||||
Most likely this means that a Unicode byte-order mark
|
return 0;
|
||||||
(BOM) was consumed, which should not be included in the
|
|
||||||
returned buf. Shift any remaining bytes to the beginning
|
|
||||||
of buf, and continue the loop. */
|
|
||||||
memmove (buf, input, input_left);
|
|
||||||
input_size = input_left;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
|
/* Peek a codepoint from PORT and return it in *CODEPOINT. Set *LEN to
|
||||||
with the byte representation of the codepoint in PORT's encoding, and
|
the length in bytes of that representation. Return 0 on success and
|
||||||
set *LEN to the length in bytes of that representation. Return 0 on
|
an errno value on error. */
|
||||||
success and an errno value on error. */
|
|
||||||
static SCM_C_INLINE int
|
static SCM_C_INLINE int
|
||||||
get_codepoint (SCM port, scm_t_wchar *codepoint,
|
peek_codepoint (SCM port, scm_t_wchar *codepoint, size_t *len)
|
||||||
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||||
scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
|
scm_t_port_internal *pti = SCM_PORT_GET_INTERNAL (port);
|
||||||
|
|
||||||
if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
|
if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8)
|
||||||
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
|
err = peek_utf8_codepoint (port, codepoint, len);
|
||||||
else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
|
else if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
|
||||||
err = get_latin1_codepoint (port, codepoint, buf, len);
|
err = peek_latin1_codepoint (port, codepoint, len);
|
||||||
else
|
else
|
||||||
err = get_iconv_codepoint (port, codepoint, buf, len);
|
err = peek_iconv_codepoint (port, codepoint, len);
|
||||||
|
|
||||||
if (SCM_LIKELY (err == 0))
|
if (SCM_LIKELY (err == 0))
|
||||||
{
|
{
|
||||||
|
@ -1934,31 +1879,50 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
|
||||||
&& (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
|
&& (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8
|
||||||
|| strcmp (pt->encoding, "UTF-16") == 0
|
|| strcmp (pt->encoding, "UTF-16") == 0
|
||||||
|| strcmp (pt->encoding, "UTF-32") == 0)))
|
|| strcmp (pt->encoding, "UTF-32") == 0)))
|
||||||
return get_codepoint (port, codepoint, buf, len);
|
{
|
||||||
|
scm_port_buffer_did_take (pt->read_buf, *len);
|
||||||
|
return peek_codepoint (port, codepoint, len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
update_port_lf (*codepoint, port);
|
|
||||||
}
|
}
|
||||||
else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
|
else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
|
||||||
{
|
{
|
||||||
*codepoint = '?';
|
*codepoint = '?';
|
||||||
err = 0;
|
err = 0;
|
||||||
update_port_lf (*codepoint, port);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SCM_C_INLINE int
|
||||||
|
get_codepoint (SCM port, scm_t_wchar *codepoint)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
size_t len = 0;
|
||||||
|
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||||
|
|
||||||
|
err = peek_codepoint (port, codepoint, &len);
|
||||||
|
scm_port_buffer_did_take (pt->read_buf, len);
|
||||||
|
if (err != 0 && pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
|
||||||
|
{
|
||||||
|
*codepoint = '?';
|
||||||
|
err = 0;
|
||||||
|
}
|
||||||
|
if (*codepoint == EOF)
|
||||||
|
scm_i_clear_pending_eof (port);
|
||||||
|
update_port_lf (*codepoint, port);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/* Read a codepoint from PORT and return it. */
|
/* Read a codepoint from PORT and return it. */
|
||||||
scm_t_wchar
|
scm_t_wchar
|
||||||
scm_getc (SCM port)
|
scm_getc (SCM port)
|
||||||
#define FUNC_NAME "scm_getc"
|
#define FUNC_NAME "scm_getc"
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
size_t len;
|
|
||||||
scm_t_wchar codepoint;
|
scm_t_wchar codepoint;
|
||||||
char buf[SCM_MBCHAR_BUF_SIZE];
|
|
||||||
|
|
||||||
err = get_codepoint (port, &codepoint, buf, &len);
|
err = get_codepoint (port, &codepoint);
|
||||||
if (SCM_UNLIKELY (err != 0))
|
if (SCM_UNLIKELY (err != 0))
|
||||||
/* At this point PORT should point past the invalid encoding, as per
|
/* At this point PORT should point past the invalid encoding, as per
|
||||||
R6RS-lib Section 8.2.4. */
|
R6RS-lib Section 8.2.4. */
|
||||||
|
@ -2141,55 +2105,22 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
|
||||||
"sequence when the error is raised.\n")
|
"sequence when the error is raised.\n")
|
||||||
#define FUNC_NAME s_scm_peek_char
|
#define FUNC_NAME s_scm_peek_char
|
||||||
{
|
{
|
||||||
int first_byte, err;
|
int err;
|
||||||
SCM result;
|
|
||||||
scm_t_wchar c;
|
scm_t_wchar c;
|
||||||
char bytes[SCM_MBCHAR_BUF_SIZE];
|
|
||||||
long column, line;
|
|
||||||
size_t len = 0;
|
size_t len = 0;
|
||||||
scm_t_port_internal *pti;
|
|
||||||
|
|
||||||
if (SCM_UNBNDP (port))
|
if (SCM_UNBNDP (port))
|
||||||
port = scm_current_input_port ();
|
port = scm_current_input_port ();
|
||||||
SCM_VALIDATE_OPINPORT (1, port);
|
SCM_VALIDATE_OPINPORT (1, port);
|
||||||
pti = SCM_PORT_GET_INTERNAL (port);
|
|
||||||
|
|
||||||
/* First, a couple fast paths. */
|
err = peek_codepoint (port, &c, &len);
|
||||||
first_byte = peek_byte_or_eof (port);
|
|
||||||
if (first_byte == EOF)
|
|
||||||
return SCM_EOF_VAL;
|
|
||||||
if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1)
|
|
||||||
return SCM_MAKE_CHAR (first_byte);
|
|
||||||
if (pti->encoding_mode == SCM_PORT_ENCODING_MODE_UTF8 && first_byte < 0x80)
|
|
||||||
return SCM_MAKE_CHAR (first_byte);
|
|
||||||
|
|
||||||
/* Now the slow paths. */
|
if (err == 0)
|
||||||
column = SCM_COL (port);
|
return c == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (c);
|
||||||
line = SCM_LINUM (port);
|
|
||||||
|
|
||||||
err = get_codepoint (port, &c, bytes, &len);
|
scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
|
||||||
|
/* Not reached. */
|
||||||
scm_unget_bytes ((unsigned char *) bytes, len, port);
|
return SCM_BOOL_F;
|
||||||
|
|
||||||
SCM_COL (port) = column;
|
|
||||||
SCM_LINUM (port) = line;
|
|
||||||
|
|
||||||
if (SCM_UNLIKELY (err != 0))
|
|
||||||
{
|
|
||||||
scm_decoding_error (FUNC_NAME, err, "input decoding error", port);
|
|
||||||
|
|
||||||
/* Shouldn't happen since `catch' always aborts to prompt. */
|
|
||||||
result = SCM_BOOL_F;
|
|
||||||
}
|
|
||||||
else if (c == EOF)
|
|
||||||
{
|
|
||||||
scm_i_set_pending_eof (port);
|
|
||||||
result = SCM_EOF_VAL;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
result = SCM_MAKE_CHAR (c);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue