Merge remote-tracking branch 'origin/stable-2.0'

Conflicts: libguile/read.c test-suite/tests/tree-il.test
2025-06-15 16:20:17 +02:00 · 2012-02-11 18:14:48 +01:00 · 2012-02-11 18:14:48 +01:00 · a41bed83ab
commit a41bed83ab
parent bbabae997d c2c3bddb1d
10 changed files with 426 additions and 205 deletions
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@ -1,4 +1,4 @@
-/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
@ -1971,33 +1971,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
 #define FUNC_NAME s_scm_string_to_utf8
 {
  SCM utf;
-  uint8_t *c_utf;
-  size_t c_strlen, c_utf_len = 0;
+  scm_t_uint8 *c_utf;
+  size_t c_utf_len = 0;

  SCM_VALIDATE_STRING (1, str);

-  c_strlen = scm_i_string_length (str);
-  if (scm_i_is_narrow_string (str))
-    c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark,
-                                   scm_i_string_chars (str), c_strlen,
-                                   NULL, NULL, &c_utf_len);
-  else
-    {
-      const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);
-      c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len);
-    }
-  if (SCM_UNLIKELY (c_utf == NULL))
-    scm_syserror (FUNC_NAME);
-  else
-    {
-      scm_dynwind_begin (0);
-      scm_dynwind_free (c_utf);
-
-      utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
-
-      scm_dynwind_end ();
-    }
+  c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
+  free (c_utf);

  return (utf);
 }
@ -2014,6 +1996,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
 }
 #undef FUNC_NAME

+static void
+swap_u32 (scm_t_wchar *vals, size_t len)
+{
+  size_t n;
+  for (n = 0; n < len; n++)
+    vals[n] = bswap_32 (vals[n]);
+}
+
 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
 	    1, 1, 0,
 	    (SCM str, SCM endianness),
@ -2021,7 +2011,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
 	    "encoding of @var{str}.")
 #define FUNC_NAME s_scm_string_to_utf32
 {
-  STRING_TO_UTF (32);
+  SCM bv;
+  scm_t_wchar *wchars;
+  size_t wchar_len, bytes_len;
+
+  wchars = scm_to_utf32_stringn (str, &wchar_len);
+  bytes_len = wchar_len * sizeof (scm_t_wchar);
+  if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
+                  scm_i_native_endianness))
+    swap_u32 (wchars, wchar_len);
+  
+  bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
+  free (wchars);
+
+  return bv;
 }
 #undef FUNC_NAME

--- a/libguile/read.c
+++ b/libguile/read.c
@ -1,5 +1,5 @@
-/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
- * Foundation, Inc.
+/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
+ *   2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
@ -142,13 +142,13 @@ SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
   characters to procedures.  */
 static SCM *scm_i_read_hash_procedures;

-static inline SCM
+static SCM
 scm_i_read_hash_procedures_ref (void)
 {
  return scm_fluid_ref (*scm_i_read_hash_procedures);
 }

-static inline void
+static void
 scm_i_read_hash_procedures_set_x (SCM value)
 {
  scm_fluid_set_x (*scm_i_read_hash_procedures, value);
@ -197,7 +197,7 @@ scm_i_read_hash_procedures_set_x (SCM value)
   || ((_chr) == 'd') || ((_chr) == 'l'))

 /* Read an SCSH block comment.  */
-static inline SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
+static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
 static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
 static SCM scm_read_commented_expression (scm_t_wchar, SCM);
 static SCM scm_read_shebang (scm_t_wchar, SCM);
@ -207,7 +207,7 @@ static SCM scm_get_hash_procedure (int);
   result in the pre-allocated buffer BUF.  Return zero if the whole token has
   fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
   bytes actually read.  */
-static inline int
+static int
 read_token (SCM port, char *buf, const size_t buf_size, size_t *read)
 {
   *read = 0;
@ -286,7 +286,7 @@ read_complete_token (SCM port, char *buffer, const size_t buffer_size,
 static int
 flush_ws (SCM port, const char *eoferr)
 {
-  register scm_t_wchar c;
+  scm_t_wchar c;
  while (1)
    switch (c = scm_getc_unlocked (port))
      {
@ -356,9 +356,17 @@ flush_ws (SCM port, const char *eoferr)
 /* Token readers.  */

 static SCM scm_read_expression (SCM port);
-static SCM scm_read_sharp (int chr, SCM port);
+static SCM scm_read_sharp (int chr, SCM port, long line, int column);


+static SCM
+maybe_annotate_source (SCM x, SCM port, long line, int column)
+{
+  if (SCM_RECORD_POSITIONS_P)
+    scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
+  return x;
+}
+
 static SCM
 scm_read_sexp (scm_t_wchar chr, SCM port)
 #define FUNC_NAME "scm_i_lreadparen"
@ -423,10 +431,7 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
    }

 exit:
-  if (SCM_RECORD_POSITIONS_P)
-    scm_i_set_source_properties_x (ans, line, column, SCM_FILENAME (port));
-
-  return ans;
+  return maybe_annotate_source (ans, port, line, column);
 }
 #undef FUNC_NAME

@ -492,6 +497,10 @@ scm_read_string (int chr, SCM port)
  unsigned c_str_len = 0;
  scm_t_wchar c;

+  /* Need to capture line and column numbers here. */
+  long line = SCM_LINUM (port);
+  int column = SCM_COL (port) - 1;
+
  str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
  while ('"' != (c = scm_getc_unlocked (port)))
    {
@ -575,13 +584,8 @@ scm_read_string (int chr, SCM port)
      scm_i_string_set_x (str, c_str_len++, c);
      scm_i_string_stop_writing ();
    }
-
-  if (c_str_len > 0)
-    {
-      return scm_i_substring_copy (str, 0, c_str_len);
-    }
-
-  return scm_nullstr;
+  return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len),
+                                port, line, column);
 }
 #undef FUNC_NAME

@ -780,10 +784,7 @@ scm_read_quote (int chr, SCM port)
    }

  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
-  if (SCM_RECORD_POSITIONS_P)
-    scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port));
-
-  return p;
+  return maybe_annotate_source (p, port, line, column);
 }

 SCM_SYMBOL (sym_syntax, "syntax");
@ -830,13 +831,10 @@ scm_read_syntax (int chr, SCM port)
    }

  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
-  if (SCM_RECORD_POSITIONS_P)
-    scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port));
-
-  return p;
+  return maybe_annotate_source (p, port, line, column);
 }

-static inline SCM
+static SCM
 scm_read_nil (int chr, SCM port)
 {
  SCM id = scm_read_mixed_case_symbol (chr, port);
@ -849,7 +847,7 @@ scm_read_nil (int chr, SCM port)
  return SCM_ELISP_NIL;
 }
  
-static inline SCM
+static SCM
 scm_read_semicolon_comment (int chr, SCM port)
 {
  int c;
@ -990,7 +988,7 @@ scm_read_character (scm_t_wchar chr, SCM port)
 }
 #undef FUNC_NAME

-static inline SCM
+static SCM
 scm_read_keyword (int chr, SCM port)
 {
  SCM symbol;
@ -1009,24 +1007,35 @@ scm_read_keyword (int chr, SCM port)
  return (scm_symbol_to_keyword (symbol));
 }

-static inline SCM
-scm_read_vector (int chr, SCM port)
+static SCM
+scm_read_vector (int chr, SCM port, long line, int column)
 {
  /* Note: We call `scm_read_sexp ()' rather than READER here in order to
     guarantee that it's going to do what we want.  After all, this is an
     implementation detail of `scm_read_vector ()', not a desirable
     property.  */
-  return (scm_vector (scm_read_sexp (chr, port)));
-}
-
-static inline SCM
-scm_read_srfi4_vector (int chr, SCM port)
-{
-  return scm_i_read_array (port, chr);
+  return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)),
+                                port, line, column);
 }

 static SCM
-scm_read_bytevector (scm_t_wchar chr, SCM port)
+scm_read_array (int chr, SCM port, long line, int column)
+{
+  SCM result = scm_i_read_array (port, chr);
+  if (scm_is_false (result))
+    return result;
+  else
+    return maybe_annotate_source (result, port, line, column);
+}
+
+static SCM
+scm_read_srfi4_vector (int chr, SCM port, long line, int column)
+{
+  return scm_read_array (chr, port, line, column);
+}
+
+static SCM
+scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column)
 {
  chr = scm_getc_unlocked (port);
  if (chr != 'u')
@ -1040,7 +1049,9 @@ scm_read_bytevector (scm_t_wchar chr, SCM port)
  if (chr != '(')
    goto syntax;

-  return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
+  return maybe_annotate_source
+    (scm_u8_list_to_bytevector (scm_read_sexp (chr, port)),
+     port, line, column);

 syntax:
  scm_i_input_error ("read_bytevector", port,
@ -1050,7 +1061,7 @@ scm_read_bytevector (scm_t_wchar chr, SCM port)
 }

 static SCM
-scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
+scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column)
 {
  /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
     terribly inefficient but who cares?  */
@ -1066,10 +1077,12 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
  if (chr != EOF)
    scm_ungetc_unlocked (chr, port);

-  return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
+  return maybe_annotate_source
+    (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
+     port, line, column);
 }

-static inline SCM
+static SCM
 scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
 {
  int bang_seen = 0;
@ -1302,7 +1315,7 @@ scm_read_sharp_extension (int chr, SCM port)
 /* The reader for the sharp `#' character.  It basically dispatches reads
   among the above token readers.   */
 static SCM
-scm_read_sharp (scm_t_wchar chr, SCM port)
+scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column)
 #define FUNC_NAME "scm_lreadr"
 {
  SCM result;
@ -1318,28 +1331,27 @@ scm_read_sharp (scm_t_wchar chr, SCM port)
    case '\\':
      return (scm_read_character (chr, port));
    case '(':
-      return (scm_read_vector (chr, port));
+      return (scm_read_vector (chr, port, line, column));
    case 's':
    case 'u':
    case 'f':
    case 'c':
      /* This one may return either a boolean or an SRFI-4 vector.  */
-      return (scm_read_srfi4_vector (chr, port));
+      return (scm_read_srfi4_vector (chr, port, line, column));
    case 'v':
-      return (scm_read_bytevector (chr, port));
+      return (scm_read_bytevector (chr, port, line, column));
    case '*':
-      return (scm_read_guile_bit_vector (chr, port));
+      return (scm_read_guile_bit_vector (chr, port, line, column));
    case 't':
    case 'T':
    case 'F':
-      /* This one may return either a boolean or an SRFI-4 vector.  */
      return (scm_read_boolean (chr, port));
    case ':':
      return (scm_read_keyword (chr, port));
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
    case '@':
-      return (scm_i_read_array (port, chr));
+        return (scm_read_array (chr, port, line, column));

    case 'i':
    case 'e':
@ -1396,7 +1408,7 @@ scm_read_expression (SCM port)
 {
  while (1)
    {
-      register scm_t_wchar chr;
+      scm_t_wchar chr;

      chr = scm_getc_unlocked (port);

@ -1422,8 +1434,9 @@ scm_read_expression (SCM port)
 	  return (scm_read_quote (chr, port));
 	case '#':
 	  {
-	    SCM result;
-	    result = scm_read_sharp (chr, port);
+            long line  = SCM_LINUM (port);
+            int column = SCM_COL (port) - 1;
+	    SCM result = scm_read_sharp (chr, port, line, column);
 	    if (scm_is_eq (result, SCM_UNSPECIFIED))
 	      /* We read a comment or some such.  */
 	      break;
--- a/libguile/strings.c
+++ b/libguile/strings.c
@ -372,31 +372,36 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end)
 SCM
 scm_i_substring_copy (SCM str, size_t start, size_t end)
 {
-  size_t len = end - start;
-  SCM buf, my_buf, substr;
-  size_t str_start;
-  int wide = 0;
-  get_str_buf_start (&str, &buf, &str_start);
-  if (scm_i_is_narrow_string (str))
-    {
-      my_buf = make_stringbuf (len);
-      memcpy (STRINGBUF_CHARS (my_buf),
-              STRINGBUF_CHARS (buf) + str_start + start, len);
-    }
+  if (start == end)
+    return scm_i_make_string (0, NULL, 0);
  else
    {
-      my_buf = make_wide_stringbuf (len);
-      u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
-               (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start 
-                                 + start), len);
-      wide = 1;
+      size_t len = end - start;
+      SCM buf, my_buf, substr;
+      size_t str_start;
+      int wide = 0;
+      get_str_buf_start (&str, &buf, &str_start);
+      if (scm_i_is_narrow_string (str))
+        {
+          my_buf = make_stringbuf (len);
+          memcpy (STRINGBUF_CHARS (my_buf),
+                  STRINGBUF_CHARS (buf) + str_start + start, len);
+        }
+      else
+        {
+          my_buf = make_wide_stringbuf (len);
+          u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
+                   (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start 
+                                     + start), len);
+          wide = 1;
+        }
+      scm_remember_upto_here_1 (buf);
+      substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
+                                (scm_t_bits) 0, (scm_t_bits) len);
+      if (wide)
+        scm_i_try_narrow_string (substr);
+      return substr;
    }
-  scm_remember_upto_here_1 (buf);
-  substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
-                            (scm_t_bits) 0, (scm_t_bits) len);
-  if (wide)
-    scm_i_try_narrow_string (substr);
-  return substr;
 }

 SCM
@ -1918,10 +1923,47 @@ scm_to_utf8_string (SCM str)
  return scm_to_utf8_stringn (str, NULL);
 }

+static size_t
+latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
+{
+  size_t ret, i;
+  for (i = 0, ret = 0; i < len; i++)
+    ret += (str[i] < 128) ? 1 : 2;
+  return ret;
+}
+
+static scm_t_uint8*
+latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
+              scm_t_uint8 *u8_result, size_t *u8_lenp)
+{
+  size_t i, n;
+  size_t u8_len = latin1_u8_strlen (str, latin_len);
+
+  if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
+    u8_result = scm_malloc (u8_len + 1);
+  if (u8_lenp)
+    *u8_lenp = u8_len;
+
+  for (i = 0, n = 0; i < latin_len; i++)
+    n += u8_uctomb (u8_result + n, str[i], u8_len - n);
+  if (n != u8_len)
+    abort ();
+  u8_result[n] = 0;
+
+  return u8_result;
+}
+
 char *
 scm_to_utf8_stringn (SCM str, size_t *lenp)
 {
-  return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
+  if (scm_i_is_narrow_string (str))
+    return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
+                                  scm_i_string_length (str),
+                                  NULL, lenp);
+  else
+    return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
+                               scm_i_string_length (str),
+                               NULL, lenp);
 }

 scm_t_wchar *
@ -1939,9 +1981,20 @@ scm_to_utf32_stringn (SCM str, size_t *lenp)
  SCM_VALIDATE_STRING (1, str);

  if (scm_i_is_narrow_string (str))
-    result = (scm_t_wchar *)
-      scm_to_stringn (str, lenp, "UTF-32",
-		      SCM_FAILED_CONVERSION_ERROR);
+    {
+      scm_t_uint8 *codepoints;
+      size_t i, len;
+
+      codepoints = (scm_t_uint8*) scm_i_string_chars (str);
+      len = scm_i_string_length (str);
+      if (lenp)
+	*lenp = len;
+
+      result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
+      for (i = 0; i < len; i++)
+        result[i] = codepoints[i];
+      result[len] = 0;
+    }
  else
    {
      size_t len;