mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
fix bug where scm_from_utf8_stringn would not detect bad utf-8
* libguile/strings.c (scm_from_utf8_stringn): * libguile/symbols.c (utf8_string_equals_wide_string): The "bad UTF8" return from u8_mbtouc is a 0xfffd character, not a negative byte length. Fixes a bug in which invalid UTF-8 would not be caught. * libguile/bytevectors.c (scm_utf8_to_string): Use scm_from_utf8_stringn directly. Just a little cleanup. * test-suite/tests/iconv.test ("narrow non-ascii string"): Add test for parsing bad utf-8 with substitution.
This commit is contained in:
parent
b4fa6cc909
commit
8c76a8971b
4 changed files with 13 additions and 8 deletions
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -2050,8 +2050,7 @@ SCM_DEFINE (scm_utf8_to_string, "utf8->string",
|
|||
|
||||
c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
|
||||
c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
|
||||
str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
|
||||
SCM_FAILED_CONVERSION_ERROR);
|
||||
str = scm_from_utf8_stringn (c_utf, c_utf_len);
|
||||
|
||||
return (str);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -1526,7 +1526,8 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
|||
|
||||
if (encoding == NULL || len == 0)
|
||||
return scm_from_latin1_stringn (str, len);
|
||||
else if (strcmp (encoding, "UTF-8") == 0)
|
||||
else if (strcmp (encoding, "UTF-8") == 0
|
||||
&& handler == SCM_FAILED_CONVERSION_ERROR)
|
||||
return scm_from_utf8_stringn (str, len);
|
||||
|
||||
u32len = 0;
|
||||
|
@ -1639,7 +1640,7 @@ scm_from_utf8_stringn (const char *str, size_t len)
|
|||
|
||||
nbytes = u8_mbtouc (&c, ustr + i, len - i);
|
||||
|
||||
if (nbytes < 0)
|
||||
if (c == 0xfffd)
|
||||
/* Bad UTF-8. */
|
||||
decoding_error (__func__, errno, str, len);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
|
||||
* 2006, 2009, 2011 Free Software Foundation, Inc.
|
||||
* 2006, 2009, 2011, 2013 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -167,7 +167,7 @@ utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
|
|||
nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
|
||||
if (nbytes == 0)
|
||||
break;
|
||||
else if (nbytes < 0)
|
||||
else if (c == 0xfffd)
|
||||
/* Bad UTF-8. */
|
||||
return 0;
|
||||
else if (c != wide[char_idx])
|
||||
|
|
|
@ -94,6 +94,11 @@
|
|||
(pass-if-exception "misparse latin1 as utf8" exception:decoding-error
|
||||
(bytevector->string (string->bytevector s "latin1") "utf-8"))
|
||||
|
||||
(pass-if "misparse latin1 as utf8 with substitutions"
|
||||
(equal? (bytevector->string (string->bytevector s "latin1")
|
||||
"utf-8" 'substitute)
|
||||
"?t?"))
|
||||
|
||||
(pass-if-exception "misparse latin1 as ascii" exception:decoding-error
|
||||
(bytevector->string (string->bytevector s "latin1") "ascii"))))
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue