1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-15 16:20:17 +02:00

Merge remote-tracking branch 'origin/stable-2.0'

Conflicts:
	libguile/read.c
	test-suite/tests/tree-il.test
This commit is contained in:
Andy Wingo 2012-02-11 18:14:48 +01:00
commit a41bed83ab
10 changed files with 426 additions and 205 deletions

View file

@ -1,4 +1,4 @@
/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@ -1971,33 +1971,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
#define FUNC_NAME s_scm_string_to_utf8
{
SCM utf;
uint8_t *c_utf;
size_t c_strlen, c_utf_len = 0;
scm_t_uint8 *c_utf;
size_t c_utf_len = 0;
SCM_VALIDATE_STRING (1, str);
c_strlen = scm_i_string_length (str);
if (scm_i_is_narrow_string (str))
c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark,
scm_i_string_chars (str), c_strlen,
NULL, NULL, &c_utf_len);
else
{
const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);
c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len);
}
if (SCM_UNLIKELY (c_utf == NULL))
scm_syserror (FUNC_NAME);
else
{
scm_dynwind_begin (0);
scm_dynwind_free (c_utf);
utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
scm_dynwind_end ();
}
c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
free (c_utf);
return (utf);
}
@ -2014,6 +1996,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
}
#undef FUNC_NAME
static void
swap_u32 (scm_t_wchar *vals, size_t len)
{
size_t n;
for (n = 0; n < len; n++)
vals[n] = bswap_32 (vals[n]);
}
SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1, 1, 0,
(SCM str, SCM endianness),
@ -2021,7 +2011,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
"encoding of @var{str}.")
#define FUNC_NAME s_scm_string_to_utf32
{
STRING_TO_UTF (32);
SCM bv;
scm_t_wchar *wchars;
size_t wchar_len, bytes_len;
wchars = scm_to_utf32_stringn (str, &wchar_len);
bytes_len = wchar_len * sizeof (scm_t_wchar);
if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
scm_i_native_endianness))
swap_u32 (wchars, wchar_len);
bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
free (wchars);
return bv;
}
#undef FUNC_NAME

View file

@ -1,5 +1,5 @@
/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
* Foundation, Inc.
/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
* 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@ -142,13 +142,13 @@ SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
characters to procedures. */
static SCM *scm_i_read_hash_procedures;
static inline SCM
static SCM
scm_i_read_hash_procedures_ref (void)
{
return scm_fluid_ref (*scm_i_read_hash_procedures);
}
static inline void
static void
scm_i_read_hash_procedures_set_x (SCM value)
{
scm_fluid_set_x (*scm_i_read_hash_procedures, value);
@ -197,7 +197,7 @@ scm_i_read_hash_procedures_set_x (SCM value)
|| ((_chr) == 'd') || ((_chr) == 'l'))
/* Read an SCSH block comment. */
static inline SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
static SCM scm_read_commented_expression (scm_t_wchar, SCM);
static SCM scm_read_shebang (scm_t_wchar, SCM);
@ -207,7 +207,7 @@ static SCM scm_get_hash_procedure (int);
result in the pre-allocated buffer BUF. Return zero if the whole token has
fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
bytes actually read. */
static inline int
static int
read_token (SCM port, char *buf, const size_t buf_size, size_t *read)
{
*read = 0;
@ -286,7 +286,7 @@ read_complete_token (SCM port, char *buffer, const size_t buffer_size,
static int
flush_ws (SCM port, const char *eoferr)
{
register scm_t_wchar c;
scm_t_wchar c;
while (1)
switch (c = scm_getc_unlocked (port))
{
@ -356,9 +356,17 @@ flush_ws (SCM port, const char *eoferr)
/* Token readers. */
static SCM scm_read_expression (SCM port);
static SCM scm_read_sharp (int chr, SCM port);
static SCM scm_read_sharp (int chr, SCM port, long line, int column);
static SCM
maybe_annotate_source (SCM x, SCM port, long line, int column)
{
if (SCM_RECORD_POSITIONS_P)
scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
return x;
}
static SCM
scm_read_sexp (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_i_lreadparen"
@ -423,10 +431,7 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
}
exit:
if (SCM_RECORD_POSITIONS_P)
scm_i_set_source_properties_x (ans, line, column, SCM_FILENAME (port));
return ans;
return maybe_annotate_source (ans, port, line, column);
}
#undef FUNC_NAME
@ -492,6 +497,10 @@ scm_read_string (int chr, SCM port)
unsigned c_str_len = 0;
scm_t_wchar c;
/* Need to capture line and column numbers here. */
long line = SCM_LINUM (port);
int column = SCM_COL (port) - 1;
str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
while ('"' != (c = scm_getc_unlocked (port)))
{
@ -575,13 +584,8 @@ scm_read_string (int chr, SCM port)
scm_i_string_set_x (str, c_str_len++, c);
scm_i_string_stop_writing ();
}
if (c_str_len > 0)
{
return scm_i_substring_copy (str, 0, c_str_len);
}
return scm_nullstr;
return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len),
port, line, column);
}
#undef FUNC_NAME
@ -780,10 +784,7 @@ scm_read_quote (int chr, SCM port)
}
p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
if (SCM_RECORD_POSITIONS_P)
scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port));
return p;
return maybe_annotate_source (p, port, line, column);
}
SCM_SYMBOL (sym_syntax, "syntax");
@ -830,13 +831,10 @@ scm_read_syntax (int chr, SCM port)
}
p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
if (SCM_RECORD_POSITIONS_P)
scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port));
return p;
return maybe_annotate_source (p, port, line, column);
}
static inline SCM
static SCM
scm_read_nil (int chr, SCM port)
{
SCM id = scm_read_mixed_case_symbol (chr, port);
@ -849,7 +847,7 @@ scm_read_nil (int chr, SCM port)
return SCM_ELISP_NIL;
}
static inline SCM
static SCM
scm_read_semicolon_comment (int chr, SCM port)
{
int c;
@ -990,7 +988,7 @@ scm_read_character (scm_t_wchar chr, SCM port)
}
#undef FUNC_NAME
static inline SCM
static SCM
scm_read_keyword (int chr, SCM port)
{
SCM symbol;
@ -1009,24 +1007,35 @@ scm_read_keyword (int chr, SCM port)
return (scm_symbol_to_keyword (symbol));
}
static inline SCM
scm_read_vector (int chr, SCM port)
static SCM
scm_read_vector (int chr, SCM port, long line, int column)
{
/* Note: We call `scm_read_sexp ()' rather than READER here in order to
guarantee that it's going to do what we want. After all, this is an
implementation detail of `scm_read_vector ()', not a desirable
property. */
return (scm_vector (scm_read_sexp (chr, port)));
}
static inline SCM
scm_read_srfi4_vector (int chr, SCM port)
{
return scm_i_read_array (port, chr);
return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)),
port, line, column);
}
static SCM
scm_read_bytevector (scm_t_wchar chr, SCM port)
scm_read_array (int chr, SCM port, long line, int column)
{
SCM result = scm_i_read_array (port, chr);
if (scm_is_false (result))
return result;
else
return maybe_annotate_source (result, port, line, column);
}
static SCM
scm_read_srfi4_vector (int chr, SCM port, long line, int column)
{
return scm_read_array (chr, port, line, column);
}
static SCM
scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column)
{
chr = scm_getc_unlocked (port);
if (chr != 'u')
@ -1040,7 +1049,9 @@ scm_read_bytevector (scm_t_wchar chr, SCM port)
if (chr != '(')
goto syntax;
return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
return maybe_annotate_source
(scm_u8_list_to_bytevector (scm_read_sexp (chr, port)),
port, line, column);
syntax:
scm_i_input_error ("read_bytevector", port,
@ -1050,7 +1061,7 @@ scm_read_bytevector (scm_t_wchar chr, SCM port)
}
static SCM
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column)
{
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
terribly inefficient but who cares? */
@ -1066,10 +1077,12 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
if (chr != EOF)
scm_ungetc_unlocked (chr, port);
return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
return maybe_annotate_source
(scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
port, line, column);
}
static inline SCM
static SCM
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
{
int bang_seen = 0;
@ -1302,7 +1315,7 @@ scm_read_sharp_extension (int chr, SCM port)
/* The reader for the sharp `#' character. It basically dispatches reads
among the above token readers. */
static SCM
scm_read_sharp (scm_t_wchar chr, SCM port)
scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column)
#define FUNC_NAME "scm_lreadr"
{
SCM result;
@ -1318,28 +1331,27 @@ scm_read_sharp (scm_t_wchar chr, SCM port)
case '\\':
return (scm_read_character (chr, port));
case '(':
return (scm_read_vector (chr, port));
return (scm_read_vector (chr, port, line, column));
case 's':
case 'u':
case 'f':
case 'c':
/* This one may return either a boolean or an SRFI-4 vector. */
return (scm_read_srfi4_vector (chr, port));
return (scm_read_srfi4_vector (chr, port, line, column));
case 'v':
return (scm_read_bytevector (chr, port));
return (scm_read_bytevector (chr, port, line, column));
case '*':
return (scm_read_guile_bit_vector (chr, port));
return (scm_read_guile_bit_vector (chr, port, line, column));
case 't':
case 'T':
case 'F':
/* This one may return either a boolean or an SRFI-4 vector. */
return (scm_read_boolean (chr, port));
case ':':
return (scm_read_keyword (chr, port));
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
case '@':
return (scm_i_read_array (port, chr));
return (scm_read_array (chr, port, line, column));
case 'i':
case 'e':
@ -1396,7 +1408,7 @@ scm_read_expression (SCM port)
{
while (1)
{
register scm_t_wchar chr;
scm_t_wchar chr;
chr = scm_getc_unlocked (port);
@ -1422,8 +1434,9 @@ scm_read_expression (SCM port)
return (scm_read_quote (chr, port));
case '#':
{
SCM result;
result = scm_read_sharp (chr, port);
long line = SCM_LINUM (port);
int column = SCM_COL (port) - 1;
SCM result = scm_read_sharp (chr, port, line, column);
if (scm_is_eq (result, SCM_UNSPECIFIED))
/* We read a comment or some such. */
break;

View file

@ -372,31 +372,36 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end)
SCM
scm_i_substring_copy (SCM str, size_t start, size_t end)
{
size_t len = end - start;
SCM buf, my_buf, substr;
size_t str_start;
int wide = 0;
get_str_buf_start (&str, &buf, &str_start);
if (scm_i_is_narrow_string (str))
{
my_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (my_buf),
STRINGBUF_CHARS (buf) + str_start + start, len);
}
if (start == end)
return scm_i_make_string (0, NULL, 0);
else
{
my_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
+ start), len);
wide = 1;
size_t len = end - start;
SCM buf, my_buf, substr;
size_t str_start;
int wide = 0;
get_str_buf_start (&str, &buf, &str_start);
if (scm_i_is_narrow_string (str))
{
my_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (my_buf),
STRINGBUF_CHARS (buf) + str_start + start, len);
}
else
{
my_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
+ start), len);
wide = 1;
}
scm_remember_upto_here_1 (buf);
substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
(scm_t_bits) 0, (scm_t_bits) len);
if (wide)
scm_i_try_narrow_string (substr);
return substr;
}
scm_remember_upto_here_1 (buf);
substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
(scm_t_bits) 0, (scm_t_bits) len);
if (wide)
scm_i_try_narrow_string (substr);
return substr;
}
SCM
@ -1918,10 +1923,47 @@ scm_to_utf8_string (SCM str)
return scm_to_utf8_stringn (str, NULL);
}
static size_t
latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
{
size_t ret, i;
for (i = 0, ret = 0; i < len; i++)
ret += (str[i] < 128) ? 1 : 2;
return ret;
}
static scm_t_uint8*
latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
scm_t_uint8 *u8_result, size_t *u8_lenp)
{
size_t i, n;
size_t u8_len = latin1_u8_strlen (str, latin_len);
if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
u8_result = scm_malloc (u8_len + 1);
if (u8_lenp)
*u8_lenp = u8_len;
for (i = 0, n = 0; i < latin_len; i++)
n += u8_uctomb (u8_result + n, str[i], u8_len - n);
if (n != u8_len)
abort ();
u8_result[n] = 0;
return u8_result;
}
char *
scm_to_utf8_stringn (SCM str, size_t *lenp)
{
return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
if (scm_i_is_narrow_string (str))
return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
scm_i_string_length (str),
NULL, lenp);
else
return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
scm_i_string_length (str),
NULL, lenp);
}
scm_t_wchar *
@ -1939,9 +1981,20 @@ scm_to_utf32_stringn (SCM str, size_t *lenp)
SCM_VALIDATE_STRING (1, str);
if (scm_i_is_narrow_string (str))
result = (scm_t_wchar *)
scm_to_stringn (str, lenp, "UTF-32",
SCM_FAILED_CONVERSION_ERROR);
{
scm_t_uint8 *codepoints;
size_t i, len;
codepoints = (scm_t_uint8*) scm_i_string_chars (str);
len = scm_i_string_length (str);
if (lenp)
*lenp = len;
result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
for (i = 0; i < len; i++)
result[i] = codepoints[i];
result[len] = 0;
}
else
{
size_t len;