mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-15 16:20:17 +02:00
Merge remote-tracking branch 'origin/stable-2.0'
Conflicts: libguile/read.c test-suite/tests/tree-il.test
This commit is contained in:
commit
a41bed83ab
10 changed files with 426 additions and 205 deletions
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -1971,33 +1971,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
|
|||
#define FUNC_NAME s_scm_string_to_utf8
|
||||
{
|
||||
SCM utf;
|
||||
uint8_t *c_utf;
|
||||
size_t c_strlen, c_utf_len = 0;
|
||||
scm_t_uint8 *c_utf;
|
||||
size_t c_utf_len = 0;
|
||||
|
||||
SCM_VALIDATE_STRING (1, str);
|
||||
|
||||
c_strlen = scm_i_string_length (str);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark,
|
||||
scm_i_string_chars (str), c_strlen,
|
||||
NULL, NULL, &c_utf_len);
|
||||
else
|
||||
{
|
||||
const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);
|
||||
c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len);
|
||||
}
|
||||
if (SCM_UNLIKELY (c_utf == NULL))
|
||||
scm_syserror (FUNC_NAME);
|
||||
else
|
||||
{
|
||||
scm_dynwind_begin (0);
|
||||
scm_dynwind_free (c_utf);
|
||||
|
||||
utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
|
||||
memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
|
||||
|
||||
scm_dynwind_end ();
|
||||
}
|
||||
c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
|
||||
utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
|
||||
memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
|
||||
free (c_utf);
|
||||
|
||||
return (utf);
|
||||
}
|
||||
|
@ -2014,6 +1996,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
static void
|
||||
swap_u32 (scm_t_wchar *vals, size_t len)
|
||||
{
|
||||
size_t n;
|
||||
for (n = 0; n < len; n++)
|
||||
vals[n] = bswap_32 (vals[n]);
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_string_to_utf32, "string->utf32",
|
||||
1, 1, 0,
|
||||
(SCM str, SCM endianness),
|
||||
|
@ -2021,7 +2011,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
|
|||
"encoding of @var{str}.")
|
||||
#define FUNC_NAME s_scm_string_to_utf32
|
||||
{
|
||||
STRING_TO_UTF (32);
|
||||
SCM bv;
|
||||
scm_t_wchar *wchars;
|
||||
size_t wchar_len, bytes_len;
|
||||
|
||||
wchars = scm_to_utf32_stringn (str, &wchar_len);
|
||||
bytes_len = wchar_len * sizeof (scm_t_wchar);
|
||||
if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
|
||||
scm_i_native_endianness))
|
||||
swap_u32 (wchars, wchar_len);
|
||||
|
||||
bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
|
||||
memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
|
||||
free (wchars);
|
||||
|
||||
return bv;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
|
121
libguile/read.c
121
libguile/read.c
|
@ -1,5 +1,5 @@
|
|||
/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
|
||||
* Foundation, Inc.
|
||||
/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
|
||||
* 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -142,13 +142,13 @@ SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
|
|||
characters to procedures. */
|
||||
static SCM *scm_i_read_hash_procedures;
|
||||
|
||||
static inline SCM
|
||||
static SCM
|
||||
scm_i_read_hash_procedures_ref (void)
|
||||
{
|
||||
return scm_fluid_ref (*scm_i_read_hash_procedures);
|
||||
}
|
||||
|
||||
static inline void
|
||||
static void
|
||||
scm_i_read_hash_procedures_set_x (SCM value)
|
||||
{
|
||||
scm_fluid_set_x (*scm_i_read_hash_procedures, value);
|
||||
|
@ -197,7 +197,7 @@ scm_i_read_hash_procedures_set_x (SCM value)
|
|||
|| ((_chr) == 'd') || ((_chr) == 'l'))
|
||||
|
||||
/* Read an SCSH block comment. */
|
||||
static inline SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
|
||||
static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
|
||||
static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
|
||||
static SCM scm_read_commented_expression (scm_t_wchar, SCM);
|
||||
static SCM scm_read_shebang (scm_t_wchar, SCM);
|
||||
|
@ -207,7 +207,7 @@ static SCM scm_get_hash_procedure (int);
|
|||
result in the pre-allocated buffer BUF. Return zero if the whole token has
|
||||
fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
|
||||
bytes actually read. */
|
||||
static inline int
|
||||
static int
|
||||
read_token (SCM port, char *buf, const size_t buf_size, size_t *read)
|
||||
{
|
||||
*read = 0;
|
||||
|
@ -286,7 +286,7 @@ read_complete_token (SCM port, char *buffer, const size_t buffer_size,
|
|||
static int
|
||||
flush_ws (SCM port, const char *eoferr)
|
||||
{
|
||||
register scm_t_wchar c;
|
||||
scm_t_wchar c;
|
||||
while (1)
|
||||
switch (c = scm_getc_unlocked (port))
|
||||
{
|
||||
|
@ -356,9 +356,17 @@ flush_ws (SCM port, const char *eoferr)
|
|||
/* Token readers. */
|
||||
|
||||
static SCM scm_read_expression (SCM port);
|
||||
static SCM scm_read_sharp (int chr, SCM port);
|
||||
static SCM scm_read_sharp (int chr, SCM port, long line, int column);
|
||||
|
||||
|
||||
static SCM
|
||||
maybe_annotate_source (SCM x, SCM port, long line, int column)
|
||||
{
|
||||
if (SCM_RECORD_POSITIONS_P)
|
||||
scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
|
||||
return x;
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_sexp (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_i_lreadparen"
|
||||
|
@ -423,10 +431,7 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
|
|||
}
|
||||
|
||||
exit:
|
||||
if (SCM_RECORD_POSITIONS_P)
|
||||
scm_i_set_source_properties_x (ans, line, column, SCM_FILENAME (port));
|
||||
|
||||
return ans;
|
||||
return maybe_annotate_source (ans, port, line, column);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -492,6 +497,10 @@ scm_read_string (int chr, SCM port)
|
|||
unsigned c_str_len = 0;
|
||||
scm_t_wchar c;
|
||||
|
||||
/* Need to capture line and column numbers here. */
|
||||
long line = SCM_LINUM (port);
|
||||
int column = SCM_COL (port) - 1;
|
||||
|
||||
str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
|
||||
while ('"' != (c = scm_getc_unlocked (port)))
|
||||
{
|
||||
|
@ -575,13 +584,8 @@ scm_read_string (int chr, SCM port)
|
|||
scm_i_string_set_x (str, c_str_len++, c);
|
||||
scm_i_string_stop_writing ();
|
||||
}
|
||||
|
||||
if (c_str_len > 0)
|
||||
{
|
||||
return scm_i_substring_copy (str, 0, c_str_len);
|
||||
}
|
||||
|
||||
return scm_nullstr;
|
||||
return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len),
|
||||
port, line, column);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -780,10 +784,7 @@ scm_read_quote (int chr, SCM port)
|
|||
}
|
||||
|
||||
p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
|
||||
if (SCM_RECORD_POSITIONS_P)
|
||||
scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port));
|
||||
|
||||
return p;
|
||||
return maybe_annotate_source (p, port, line, column);
|
||||
}
|
||||
|
||||
SCM_SYMBOL (sym_syntax, "syntax");
|
||||
|
@ -830,13 +831,10 @@ scm_read_syntax (int chr, SCM port)
|
|||
}
|
||||
|
||||
p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
|
||||
if (SCM_RECORD_POSITIONS_P)
|
||||
scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port));
|
||||
|
||||
return p;
|
||||
return maybe_annotate_source (p, port, line, column);
|
||||
}
|
||||
|
||||
static inline SCM
|
||||
static SCM
|
||||
scm_read_nil (int chr, SCM port)
|
||||
{
|
||||
SCM id = scm_read_mixed_case_symbol (chr, port);
|
||||
|
@ -849,7 +847,7 @@ scm_read_nil (int chr, SCM port)
|
|||
return SCM_ELISP_NIL;
|
||||
}
|
||||
|
||||
static inline SCM
|
||||
static SCM
|
||||
scm_read_semicolon_comment (int chr, SCM port)
|
||||
{
|
||||
int c;
|
||||
|
@ -990,7 +988,7 @@ scm_read_character (scm_t_wchar chr, SCM port)
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
static inline SCM
|
||||
static SCM
|
||||
scm_read_keyword (int chr, SCM port)
|
||||
{
|
||||
SCM symbol;
|
||||
|
@ -1009,24 +1007,35 @@ scm_read_keyword (int chr, SCM port)
|
|||
return (scm_symbol_to_keyword (symbol));
|
||||
}
|
||||
|
||||
static inline SCM
|
||||
scm_read_vector (int chr, SCM port)
|
||||
static SCM
|
||||
scm_read_vector (int chr, SCM port, long line, int column)
|
||||
{
|
||||
/* Note: We call `scm_read_sexp ()' rather than READER here in order to
|
||||
guarantee that it's going to do what we want. After all, this is an
|
||||
implementation detail of `scm_read_vector ()', not a desirable
|
||||
property. */
|
||||
return (scm_vector (scm_read_sexp (chr, port)));
|
||||
}
|
||||
|
||||
static inline SCM
|
||||
scm_read_srfi4_vector (int chr, SCM port)
|
||||
{
|
||||
return scm_i_read_array (port, chr);
|
||||
return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)),
|
||||
port, line, column);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_bytevector (scm_t_wchar chr, SCM port)
|
||||
scm_read_array (int chr, SCM port, long line, int column)
|
||||
{
|
||||
SCM result = scm_i_read_array (port, chr);
|
||||
if (scm_is_false (result))
|
||||
return result;
|
||||
else
|
||||
return maybe_annotate_source (result, port, line, column);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_srfi4_vector (int chr, SCM port, long line, int column)
|
||||
{
|
||||
return scm_read_array (chr, port, line, column);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column)
|
||||
{
|
||||
chr = scm_getc_unlocked (port);
|
||||
if (chr != 'u')
|
||||
|
@ -1040,7 +1049,9 @@ scm_read_bytevector (scm_t_wchar chr, SCM port)
|
|||
if (chr != '(')
|
||||
goto syntax;
|
||||
|
||||
return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
|
||||
return maybe_annotate_source
|
||||
(scm_u8_list_to_bytevector (scm_read_sexp (chr, port)),
|
||||
port, line, column);
|
||||
|
||||
syntax:
|
||||
scm_i_input_error ("read_bytevector", port,
|
||||
|
@ -1050,7 +1061,7 @@ scm_read_bytevector (scm_t_wchar chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
|
||||
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column)
|
||||
{
|
||||
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
|
||||
terribly inefficient but who cares? */
|
||||
|
@ -1066,10 +1077,12 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
|
|||
if (chr != EOF)
|
||||
scm_ungetc_unlocked (chr, port);
|
||||
|
||||
return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
|
||||
return maybe_annotate_source
|
||||
(scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
|
||||
port, line, column);
|
||||
}
|
||||
|
||||
static inline SCM
|
||||
static SCM
|
||||
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
int bang_seen = 0;
|
||||
|
@ -1302,7 +1315,7 @@ scm_read_sharp_extension (int chr, SCM port)
|
|||
/* The reader for the sharp `#' character. It basically dispatches reads
|
||||
among the above token readers. */
|
||||
static SCM
|
||||
scm_read_sharp (scm_t_wchar chr, SCM port)
|
||||
scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM result;
|
||||
|
@ -1318,28 +1331,27 @@ scm_read_sharp (scm_t_wchar chr, SCM port)
|
|||
case '\\':
|
||||
return (scm_read_character (chr, port));
|
||||
case '(':
|
||||
return (scm_read_vector (chr, port));
|
||||
return (scm_read_vector (chr, port, line, column));
|
||||
case 's':
|
||||
case 'u':
|
||||
case 'f':
|
||||
case 'c':
|
||||
/* This one may return either a boolean or an SRFI-4 vector. */
|
||||
return (scm_read_srfi4_vector (chr, port));
|
||||
return (scm_read_srfi4_vector (chr, port, line, column));
|
||||
case 'v':
|
||||
return (scm_read_bytevector (chr, port));
|
||||
return (scm_read_bytevector (chr, port, line, column));
|
||||
case '*':
|
||||
return (scm_read_guile_bit_vector (chr, port));
|
||||
return (scm_read_guile_bit_vector (chr, port, line, column));
|
||||
case 't':
|
||||
case 'T':
|
||||
case 'F':
|
||||
/* This one may return either a boolean or an SRFI-4 vector. */
|
||||
return (scm_read_boolean (chr, port));
|
||||
case ':':
|
||||
return (scm_read_keyword (chr, port));
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7': case '8': case '9':
|
||||
case '@':
|
||||
return (scm_i_read_array (port, chr));
|
||||
return (scm_read_array (chr, port, line, column));
|
||||
|
||||
case 'i':
|
||||
case 'e':
|
||||
|
@ -1396,7 +1408,7 @@ scm_read_expression (SCM port)
|
|||
{
|
||||
while (1)
|
||||
{
|
||||
register scm_t_wchar chr;
|
||||
scm_t_wchar chr;
|
||||
|
||||
chr = scm_getc_unlocked (port);
|
||||
|
||||
|
@ -1422,8 +1434,9 @@ scm_read_expression (SCM port)
|
|||
return (scm_read_quote (chr, port));
|
||||
case '#':
|
||||
{
|
||||
SCM result;
|
||||
result = scm_read_sharp (chr, port);
|
||||
long line = SCM_LINUM (port);
|
||||
int column = SCM_COL (port) - 1;
|
||||
SCM result = scm_read_sharp (chr, port, line, column);
|
||||
if (scm_is_eq (result, SCM_UNSPECIFIED))
|
||||
/* We read a comment or some such. */
|
||||
break;
|
||||
|
|
|
@ -372,31 +372,36 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end)
|
|||
SCM
|
||||
scm_i_substring_copy (SCM str, size_t start, size_t end)
|
||||
{
|
||||
size_t len = end - start;
|
||||
SCM buf, my_buf, substr;
|
||||
size_t str_start;
|
||||
int wide = 0;
|
||||
get_str_buf_start (&str, &buf, &str_start);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
my_buf = make_stringbuf (len);
|
||||
memcpy (STRINGBUF_CHARS (my_buf),
|
||||
STRINGBUF_CHARS (buf) + str_start + start, len);
|
||||
}
|
||||
if (start == end)
|
||||
return scm_i_make_string (0, NULL, 0);
|
||||
else
|
||||
{
|
||||
my_buf = make_wide_stringbuf (len);
|
||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
|
||||
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
|
||||
+ start), len);
|
||||
wide = 1;
|
||||
size_t len = end - start;
|
||||
SCM buf, my_buf, substr;
|
||||
size_t str_start;
|
||||
int wide = 0;
|
||||
get_str_buf_start (&str, &buf, &str_start);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
my_buf = make_stringbuf (len);
|
||||
memcpy (STRINGBUF_CHARS (my_buf),
|
||||
STRINGBUF_CHARS (buf) + str_start + start, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
my_buf = make_wide_stringbuf (len);
|
||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
|
||||
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
|
||||
+ start), len);
|
||||
wide = 1;
|
||||
}
|
||||
scm_remember_upto_here_1 (buf);
|
||||
substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
|
||||
(scm_t_bits) 0, (scm_t_bits) len);
|
||||
if (wide)
|
||||
scm_i_try_narrow_string (substr);
|
||||
return substr;
|
||||
}
|
||||
scm_remember_upto_here_1 (buf);
|
||||
substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
|
||||
(scm_t_bits) 0, (scm_t_bits) len);
|
||||
if (wide)
|
||||
scm_i_try_narrow_string (substr);
|
||||
return substr;
|
||||
}
|
||||
|
||||
SCM
|
||||
|
@ -1918,10 +1923,47 @@ scm_to_utf8_string (SCM str)
|
|||
return scm_to_utf8_stringn (str, NULL);
|
||||
}
|
||||
|
||||
static size_t
|
||||
latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
|
||||
{
|
||||
size_t ret, i;
|
||||
for (i = 0, ret = 0; i < len; i++)
|
||||
ret += (str[i] < 128) ? 1 : 2;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static scm_t_uint8*
|
||||
latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
|
||||
scm_t_uint8 *u8_result, size_t *u8_lenp)
|
||||
{
|
||||
size_t i, n;
|
||||
size_t u8_len = latin1_u8_strlen (str, latin_len);
|
||||
|
||||
if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
|
||||
u8_result = scm_malloc (u8_len + 1);
|
||||
if (u8_lenp)
|
||||
*u8_lenp = u8_len;
|
||||
|
||||
for (i = 0, n = 0; i < latin_len; i++)
|
||||
n += u8_uctomb (u8_result + n, str[i], u8_len - n);
|
||||
if (n != u8_len)
|
||||
abort ();
|
||||
u8_result[n] = 0;
|
||||
|
||||
return u8_result;
|
||||
}
|
||||
|
||||
char *
|
||||
scm_to_utf8_stringn (SCM str, size_t *lenp)
|
||||
{
|
||||
return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
|
||||
scm_i_string_length (str),
|
||||
NULL, lenp);
|
||||
else
|
||||
return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
|
||||
scm_i_string_length (str),
|
||||
NULL, lenp);
|
||||
}
|
||||
|
||||
scm_t_wchar *
|
||||
|
@ -1939,9 +1981,20 @@ scm_to_utf32_stringn (SCM str, size_t *lenp)
|
|||
SCM_VALIDATE_STRING (1, str);
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
result = (scm_t_wchar *)
|
||||
scm_to_stringn (str, lenp, "UTF-32",
|
||||
SCM_FAILED_CONVERSION_ERROR);
|
||||
{
|
||||
scm_t_uint8 *codepoints;
|
||||
size_t i, len;
|
||||
|
||||
codepoints = (scm_t_uint8*) scm_i_string_chars (str);
|
||||
len = scm_i_string_length (str);
|
||||
if (lenp)
|
||||
*lenp = len;
|
||||
|
||||
result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
|
||||
for (i = 0; i < len; i++)
|
||||
result[i] = codepoints[i];
|
||||
result[len] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t len;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue