1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-22 03:30:22 +02:00

Merge remote-tracking branch 'origin/stable-2.0'

This commit is contained in:
Andy Wingo 2011-05-09 00:13:04 +02:00
commit e690a3cbf2
20 changed files with 1726 additions and 359 deletions

View file

@ -965,9 +965,12 @@ unpack (const ffi_type *type, void *loc, SCM x)
}
#undef FUNC_NAME
/* Return a Scheme representation of the foreign value at LOC of type TYPE. */
/* Return a Scheme representation of the foreign value at LOC of type
TYPE. When RETURN_VALUE_P is true, LOC is assumed to point to a
return value buffer; otherwise LOC is assumed to point to an
argument buffer. */
static SCM
pack (const ffi_type * type, const void *loc)
pack (const ffi_type * type, const void *loc, int return_value_p)
{
switch (type->type)
{
@ -977,22 +980,48 @@ pack (const ffi_type * type, const void *loc)
return scm_from_double (*(float *) loc);
case FFI_TYPE_DOUBLE:
return scm_from_double (*(double *) loc);
/* For integer return values smaller than `int', libffi stores the
result in an `ffi_arg'-long buffer, of which only the
significant bits must be kept---hence the pair of casts below.
See <http://thread.gmane.org/gmane.comp.lib.ffi.general/406>
for details. */
case FFI_TYPE_UINT8:
return scm_from_uint8 (*(scm_t_uint8 *) loc);
if (return_value_p)
return scm_from_uint8 ((scm_t_uint8) *(ffi_arg *) loc);
else
return scm_from_uint8 (* (scm_t_uint8 *) loc);
case FFI_TYPE_SINT8:
return scm_from_int8 (*(scm_t_int8 *) loc);
if (return_value_p)
return scm_from_int8 ((scm_t_int8) *(ffi_arg *) loc);
else
return scm_from_int8 (* (scm_t_int8 *) loc);
case FFI_TYPE_UINT16:
return scm_from_uint16 (*(scm_t_uint16 *) loc);
if (return_value_p)
return scm_from_uint16 ((scm_t_uint16) *(ffi_arg *) loc);
else
return scm_from_uint16 (* (scm_t_uint16 *) loc);
case FFI_TYPE_SINT16:
return scm_from_int16 (*(scm_t_int16 *) loc);
if (return_value_p)
return scm_from_int16 ((scm_t_int16) *(ffi_arg *) loc);
else
return scm_from_int16 (* (scm_t_int16 *) loc);
case FFI_TYPE_UINT32:
return scm_from_uint32 (*(scm_t_uint32 *) loc);
if (return_value_p)
return scm_from_uint32 ((scm_t_uint32) *(ffi_arg *) loc);
else
return scm_from_uint32 (* (scm_t_uint32 *) loc);
case FFI_TYPE_SINT32:
return scm_from_int32 (*(scm_t_int32 *) loc);
if (return_value_p)
return scm_from_int32 ((scm_t_int32) *(ffi_arg *) loc);
else
return scm_from_int32 (* (scm_t_int32 *) loc);
case FFI_TYPE_UINT64:
return scm_from_uint64 (*(scm_t_uint64 *) loc);
case FFI_TYPE_SINT64:
return scm_from_int64 (*(scm_t_int64 *) loc);
case FFI_TYPE_STRUCT:
{
void *mem = scm_gc_malloc_pointerless (type->size, "foreign");
@ -1060,7 +1089,7 @@ scm_i_foreign_call (SCM foreign, const SCM *argv)
/* off we go! */
ffi_call (cif, func, rvalue, args);
return pack (cif->rtype, rvalue);
return pack (cif->rtype, rvalue, 1);
}
@ -1082,7 +1111,7 @@ invoke_closure (ffi_cif *cif, void *ret, void **args, void *data)
/* Pack ARGS to SCM values, setting ARGV pointers. */
for (i = 0; i < cif->nargs; i++)
argv[i] = pack (cif->arg_types[i], args[i]);
argv[i] = pack (cif->arg_types[i], args[i], 0);
result = scm_call_n (proc, argv, cif->nargs);

View file

@ -26,6 +26,7 @@
#include <wchar.h>
#endif
#include <math.h>
#include <unistr.h>
#include "libguile/_scm.h"
@ -192,7 +193,7 @@ scm_hasher(SCM obj, unsigned long n, size_t d)
case scm_tc16_real:
{
double r = SCM_REAL_VALUE (obj);
if (floor (r) == r)
if (floor (r) == r && !isinf (r) && !isnan (r))
{
obj = scm_inexact_to_exact (obj);
return scm_to_ulong (scm_modulo (obj, scm_from_ulong (n)));

View file

@ -3,7 +3,8 @@
#ifndef SCM_INLINE_H
#define SCM_INLINE_H
/* Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
/* Copyright (C) 2001, 2002, 2003, 2004, 2006, 2008, 2009, 2010,
* 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@ -98,6 +99,7 @@ SCM_API int scm_is_pair (SCM x);
SCM_API int scm_is_string (SCM x);
SCM_API int scm_get_byte_or_eof (SCM port);
SCM_API int scm_peek_byte_or_eof (SCM port);
SCM_API void scm_putc (char c, SCM port);
SCM_API void scm_puts (const char *str_data, SCM port);
@ -362,7 +364,7 @@ scm_get_byte_or_eof (SCM port)
if (pt->read_pos >= pt->read_end)
{
if (scm_fill_input (port) == EOF)
if (SCM_UNLIKELY (scm_fill_input (port) == EOF))
return EOF;
}
@ -371,6 +373,34 @@ scm_get_byte_or_eof (SCM port)
return c;
}
/* Like `scm_get_byte_or_eof' but does not change PORT's `read_pos'. */
#ifndef SCM_INLINE_C_INCLUDING_INLINE_H
SCM_C_EXTERN_INLINE
#endif
int
scm_peek_byte_or_eof (SCM port)
{
int c;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
if (pt->rw_active == SCM_PORT_WRITE)
/* may be marginally faster than calling scm_flush. */
scm_ptobs[SCM_PTOBNUM (port)].flush (port);
if (pt->rw_random)
pt->rw_active = SCM_PORT_READ;
if (pt->read_pos >= pt->read_end)
{
if (SCM_UNLIKELY (scm_fill_input (port) == EOF))
return EOF;
}
c = *pt->read_pos;
return c;
}
#ifndef SCM_INLINE_C_INCLUDING_INLINE_H
SCM_C_EXTERN_INLINE
#endif

View file

@ -1057,6 +1057,7 @@ update_port_lf (scm_t_wchar c, SCM port)
switch (c)
{
case '\a':
case EOF:
break;
case '\b':
SCM_DECCOL (port);
@ -1115,23 +1116,154 @@ utf8_to_codepoint (const scm_t_uint8 *utf8_buf, size_t size)
return codepoint;
}
/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
with the byte representation of the codepoint in PORT's encoding, and
set *LEN to the length in bytes of that representation. Return 0 on
success and an errno value on error. */
/* Read a UTF-8 sequence from PORT. On success, return 0 and set
*CODEPOINT to the codepoint that was read, fill BUF with its UTF-8
representation, and set *LEN to the length in bytes. Return
`EILSEQ' on error. */
static int
get_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
get_utf8_codepoint (SCM port, scm_t_wchar *codepoint,
scm_t_uint8 buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
#define ASSERT_NOT_EOF(b) \
if (SCM_UNLIKELY ((b) == EOF)) \
goto invalid_seq
#define CONSUME_PEEKED_BYTE() \
pt->read_pos++
int byte;
scm_t_port *pt;
*len = 0;
pt = SCM_PTAB_ENTRY (port);
byte = scm_get_byte_or_eof (port);
if (byte == EOF)
{
*codepoint = EOF;
return 0;
}
buf[0] = (scm_t_uint8) byte;
*len = 1;
if (buf[0] <= 0x7f)
/* 1-byte form. */
*codepoint = buf[0];
else if (buf[0] >= 0xc2 && buf[0] <= 0xdf)
{
/* 2-byte form. */
byte = scm_peek_byte_or_eof (port);
ASSERT_NOT_EOF (byte);
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
goto invalid_seq;
CONSUME_PEEKED_BYTE ();
buf[1] = (scm_t_uint8) byte;
*len = 2;
*codepoint = ((scm_t_wchar) buf[0] & 0x1f) << 6UL
| (buf[1] & 0x3f);
}
else if ((buf[0] & 0xf0) == 0xe0)
{
/* 3-byte form. */
byte = scm_peek_byte_or_eof (port);
ASSERT_NOT_EOF (byte);
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80
|| (buf[0] == 0xe0 && byte < 0xa0)
|| (buf[0] == 0xed && byte > 0x9f)))
goto invalid_seq;
CONSUME_PEEKED_BYTE ();
buf[1] = (scm_t_uint8) byte;
*len = 2;
byte = scm_peek_byte_or_eof (port);
ASSERT_NOT_EOF (byte);
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
goto invalid_seq;
CONSUME_PEEKED_BYTE ();
buf[2] = (scm_t_uint8) byte;
*len = 3;
*codepoint = ((scm_t_wchar) buf[0] & 0x0f) << 12UL
| ((scm_t_wchar) buf[1] & 0x3f) << 6UL
| (buf[2] & 0x3f);
}
else if (buf[0] >= 0xf0 && buf[0] <= 0xf4)
{
/* 4-byte form. */
byte = scm_peek_byte_or_eof (port);
ASSERT_NOT_EOF (byte);
if (SCM_UNLIKELY (((byte & 0xc0) != 0x80)
|| (buf[0] == 0xf0 && byte < 0x90)
|| (buf[0] == 0xf4 && byte > 0x8f)))
goto invalid_seq;
CONSUME_PEEKED_BYTE ();
buf[1] = (scm_t_uint8) byte;
*len = 2;
byte = scm_peek_byte_or_eof (port);
ASSERT_NOT_EOF (byte);
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
goto invalid_seq;
CONSUME_PEEKED_BYTE ();
buf[2] = (scm_t_uint8) byte;
*len = 3;
byte = scm_peek_byte_or_eof (port);
ASSERT_NOT_EOF (byte);
if (SCM_UNLIKELY ((byte & 0xc0) != 0x80))
goto invalid_seq;
CONSUME_PEEKED_BYTE ();
buf[3] = (scm_t_uint8) byte;
*len = 4;
*codepoint = ((scm_t_wchar) buf[0] & 0x07) << 18UL
| ((scm_t_wchar) buf[1] & 0x3f) << 12UL
| ((scm_t_wchar) buf[2] & 0x3f) << 6UL
| (buf[3] & 0x3f);
}
else
goto invalid_seq;
return 0;
invalid_seq:
/* Here we could choose the consume the faulty byte when it's not a
valid starting byte, but it's not a requirement. What Section 3.9
of Unicode 6.0.0 mandates, though, is to not consume a byte that
would otherwise be a valid starting byte. */
return EILSEQ;
#undef CONSUME_PEEKED_BYTE
#undef ASSERT_NOT_EOF
}
/* Likewise, read a byte sequence from PORT, passing it through its
input conversion descriptor. */
static int
get_iconv_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
scm_t_port *pt;
int err, byte_read;
size_t bytes_consumed, output_size;
char *output;
scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
scm_t_port *pt = SCM_PTAB_ENTRY (port);
if (SCM_UNLIKELY (pt->input_cd == (iconv_t) -1))
/* Initialize the conversion descriptors. */
scm_i_set_port_encoding_x (port, pt->encoding);
pt = SCM_PTAB_ENTRY (port);
for (output_size = 0, output = (char *) utf8_buf,
bytes_consumed = 0, err = 0;
@ -1177,30 +1309,45 @@ get_codepoint (SCM port, scm_t_wchar *codepoint,
if (SCM_UNLIKELY (output_size == 0))
/* An unterminated sequence. */
err = EILSEQ;
if (SCM_UNLIKELY (err != 0))
{
/* Reset the `iconv' state. */
iconv (pt->input_cd, NULL, NULL, NULL, NULL);
if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
{
*codepoint = '?';
err = 0;
}
/* Fail when the strategy is SCM_ICONVEH_ERROR or
SCM_ICONVEH_ESCAPE_SEQUENCE (the latter doesn't make sense for
input encoding errors.) */
}
else
else if (SCM_LIKELY (err == 0))
{
/* Convert the UTF8_BUF sequence to a Unicode code point. */
*codepoint = utf8_to_codepoint (utf8_buf, output_size);
update_port_lf (*codepoint, port);
*len = bytes_consumed;
}
*len = bytes_consumed;
return err;
}
/* Read a codepoint from PORT and return it in *CODEPOINT. Fill BUF
with the byte representation of the codepoint in PORT's encoding, and
set *LEN to the length in bytes of that representation. Return 0 on
success and an errno value on error. */
static int
get_codepoint (SCM port, scm_t_wchar *codepoint,
char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
{
int err;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
if (pt->input_cd == (iconv_t) -1)
/* Initialize the conversion descriptors, if needed. */
scm_i_set_port_encoding_x (port, pt->encoding);
/* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
if (pt->input_cd == (iconv_t) -1)
err = get_utf8_codepoint (port, codepoint, (scm_t_uint8 *) buf, len);
else
err = get_iconv_codepoint (port, codepoint, buf, len);
if (SCM_LIKELY (err == 0))
update_port_lf (*codepoint, port);
else if (pt->ilseq_handler == SCM_ICONVEH_QUESTION_MARK)
{
*codepoint = '?';
err = 0;
update_port_lf (*codepoint, port);
}
return err;
}
@ -2031,28 +2178,35 @@ scm_i_set_port_encoding_x (SCM port, const char *encoding)
if (encoding == NULL)
encoding = "ISO-8859-1";
pt->encoding = scm_gc_strdup (encoding, "port");
if (pt->encoding != encoding)
pt->encoding = scm_gc_strdup (encoding, "port");
if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
/* If ENCODING is UTF-8, then no conversion descriptor is opened
because we do I/O ourselves. This saves 100+ KiB for each
descriptor. */
if (strcmp (encoding, "UTF-8"))
{
/* Open an input iconv conversion descriptor, from ENCODING
to UTF-8. We choose UTF-8, not UTF-32, because iconv
implementations can typically convert from anything to
UTF-8, but not to UTF-32 (see
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
new_input_cd = iconv_open ("UTF-8", encoding);
if (new_input_cd == (iconv_t) -1)
goto invalid_encoding;
}
if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
{
new_output_cd = iconv_open (encoding, "UTF-8");
if (new_output_cd == (iconv_t) -1)
if (SCM_CELL_WORD_0 (port) & SCM_RDNG)
{
if (new_input_cd != (iconv_t) -1)
iconv_close (new_input_cd);
goto invalid_encoding;
/* Open an input iconv conversion descriptor, from ENCODING
to UTF-8. We choose UTF-8, not UTF-32, because iconv
implementations can typically convert from anything to
UTF-8, but not to UTF-32 (see
<http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00007.html>). */
new_input_cd = iconv_open ("UTF-8", encoding);
if (new_input_cd == (iconv_t) -1)
goto invalid_encoding;
}
if (SCM_CELL_WORD_0 (port) & SCM_WRTNG)
{
new_output_cd = iconv_open (encoding, "UTF-8");
if (new_output_cd == (iconv_t) -1)
{
if (new_input_cd != (iconv_t) -1)
iconv_close (new_input_cd);
goto invalid_encoding;
}
}
}

View file

@ -821,31 +821,57 @@ codepoint_to_utf8 (scm_t_wchar ch, scm_t_uint8 utf8[4])
return len;
}
/* Display the LEN codepoints in STR to PORT according to STRATEGY;
return the number of codepoints successfully displayed. If NARROW_P,
then STR is interpreted as a sequence of `char', denoting a Latin-1
string; otherwise it's interpreted as a sequence of
`scm_t_wchar'. */
static size_t
display_string (const void *str, int narrow_p,
size_t len, SCM port,
scm_t_string_failed_conversion_handler strategy)
{
#define STR_REF(s, x) \
(narrow_p \
? (scm_t_wchar) ((unsigned char *) (s))[x] \
: ((scm_t_wchar *) (s))[x])
/* Write STR to PORT as UTF-8. STR is a LEN-codepoint string; it is
narrow if NARROW_P is true, wide otherwise. Return LEN. */
static size_t
display_string_as_utf8 (const void *str, int narrow_p, size_t len,
SCM port)
{
size_t printed = 0;
while (len > printed)
{
size_t utf8_len, i;
char *input, utf8_buf[256];
/* Convert STR to UTF-8. */
for (i = printed, utf8_len = 0, input = utf8_buf;
i < len && utf8_len + 4 < sizeof (utf8_buf);
i++)
{
utf8_len += codepoint_to_utf8 (STR_REF (str, i),
(scm_t_uint8 *) input);
input = utf8_buf + utf8_len;
}
/* INPUT was successfully converted, entirely; print the
result. */
scm_lfwrite (utf8_buf, utf8_len, port);
printed += i - printed;
}
assert (printed == len);
return len;
}
/* Convert STR through PORT's output conversion descriptor and write the
output to PORT. Return the number of codepoints written. */
static size_t
display_string_using_iconv (const void *str, int narrow_p, size_t len,
SCM port,
scm_t_string_failed_conversion_handler strategy)
{
size_t printed;
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
if (SCM_UNLIKELY (pt->output_cd == (iconv_t) -1))
/* Initialize the conversion descriptors. */
scm_i_set_port_encoding_x (port, pt->encoding);
printed = 0;
while (len > printed)
@ -928,7 +954,35 @@ display_string (const void *str, int narrow_p,
}
return printed;
}
#undef STR_REF
/* Display the LEN codepoints in STR to PORT according to STRATEGY;
return the number of codepoints successfully displayed. If NARROW_P,
then STR is interpreted as a sequence of `char', denoting a Latin-1
string; otherwise it's interpreted as a sequence of
`scm_t_wchar'. */
static size_t
display_string (const void *str, int narrow_p,
size_t len, SCM port,
scm_t_string_failed_conversion_handler strategy)
{
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
if (pt->output_cd == (iconv_t) -1)
/* Initialize the conversion descriptors, if needed. */
scm_i_set_port_encoding_x (port, pt->encoding);
/* FIXME: In 2.1, add a flag to determine whether a port is UTF-8. */
if (pt->output_cd == (iconv_t) -1)
return display_string_as_utf8 (str, narrow_p, len, port);
else
return display_string_using_iconv (str, narrow_p, len,
port, strategy);
}
/* Attempt to display CH to PORT according to STRATEGY. Return non-zero

View file

@ -460,14 +460,11 @@ SCM_DEFINE (scm_lookahead_u8, "lookahead-u8", 1, 0, 0,
SCM_VALIDATE_BINARY_INPUT_PORT (1, port);
u8 = scm_get_byte_or_eof (port);
u8 = scm_peek_byte_or_eof (port);
if (u8 == EOF)
result = SCM_EOF_VAL;
else
{
scm_unget_byte (u8, port);
result = SCM_I_MAKINUM ((scm_t_uint8) u8);
}
result = SCM_I_MAKINUM ((scm_t_uint8) u8);
return result;
}

View file

@ -1135,7 +1135,7 @@ scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
return SCM_UNSPECIFIED;
}
static inline SCM
static SCM
scm_read_shebang (scm_t_wchar chr, SCM port)
{
int c = 0;