fix bug where scm_from_utf8_stringn would not detect bad utf-8

* libguile/strings.c (scm_from_utf8_stringn): * libguile/symbols.c (utf8_string_equals_wide_string): The "bad UTF8" return from u8_mbtouc is a 0xfffd character, not a negative byte length. Fixes a bug in which invalid UTF-8 would not be caught. * libguile/bytevectors.c (scm_utf8_to_string): Use scm_from_utf8_stringn directly. Just a little cleanup. * test-suite/tests/iconv.test ("narrow non-ascii string"): Add test for parsing bad utf-8 with substitution.
2025-06-14 07:30:32 +02:00 · 2013-01-15 11:01:10 +01:00 · 2013-01-15 11:01:10 +01:00 · 8c76a8971b
commit 8c76a8971b
parent b4fa6cc909
4 changed files with 13 additions and 8 deletions
--- a/libguile/strings.c
+++ b/libguile/strings.c
@ -1,4 +1,4 @@
-/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
@ -1526,7 +1526,8 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,

  if (encoding == NULL || len == 0)
    return scm_from_latin1_stringn (str, len);
-  else if (strcmp (encoding, "UTF-8") == 0)
+  else if (strcmp (encoding, "UTF-8") == 0
+           && handler == SCM_FAILED_CONVERSION_ERROR)
    return scm_from_utf8_stringn (str, len);

  u32len = 0;
@ -1639,7 +1640,7 @@ scm_from_utf8_stringn (const char *str, size_t len)

          nbytes = u8_mbtouc (&c, ustr + i, len - i);

-          if (nbytes < 0)
+          if (c == 0xfffd)
            /* Bad UTF-8.  */
            decoding_error (__func__, errno, str, len);