mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 12:20:26 +02:00
Allow decoding of UTF-8 containing U+FFFD, the replacement character.
* libguile/strings.c (scm_from_utf8_stringn): Use 'u8_mbtoucr' and check for a decoding error by its 'nbytes' return value. Previously we used 'u8_mbtouc' and improperly assumed that a U+FFFD character indicated a decoding error. * libguile/symbols.c (utf8_string_equals_wide_string): Likewise. * test-suite/tests/bytevectors.test (exception:decoding-error): New variable. ("2.9 Operations on Strings"): Add tests.
This commit is contained in:
parent
48412395c6
commit
00884bb79f
3 changed files with 21 additions and 8 deletions
|
@ -1,4 +1,5 @@
|
||||||
/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
|
/* Copyright (C) 1995, 1996, 1998, 2000, 2001, 2004, 2006,
|
||||||
|
* 2008-2015 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public License
|
* modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -1673,9 +1674,9 @@ scm_from_utf8_stringn (const char *str, size_t len)
|
||||||
|
|
||||||
ascii = 0;
|
ascii = 0;
|
||||||
|
|
||||||
nbytes = u8_mbtouc (&c, ustr + i, len - i);
|
nbytes = u8_mbtoucr (&c, ustr + i, len - i);
|
||||||
|
|
||||||
if (c == 0xfffd)
|
if (nbytes < 0)
|
||||||
/* Bad UTF-8. */
|
/* Bad UTF-8. */
|
||||||
decoding_error (__func__, errno, str, len);
|
decoding_error (__func__, errno, str, len);
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/* Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
|
/* Copyright (C) 1995-1998, 2000, 2001, 2003, 2004, 2006, 2009, 2011,
|
||||||
* 2006, 2009, 2011, 2013 Free Software Foundation, Inc.
|
* 2013, 2015 Free Software Foundation, Inc.
|
||||||
*
|
*
|
||||||
* This library is free software; you can redistribute it and/or
|
* This library is free software; you can redistribute it and/or
|
||||||
* modify it under the terms of the GNU Lesser General Public License
|
* modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -164,10 +164,10 @@ utf8_string_equals_wide_string (const scm_t_uint8 *narrow, size_t nlen,
|
||||||
ucs4_t c;
|
ucs4_t c;
|
||||||
int nbytes;
|
int nbytes;
|
||||||
|
|
||||||
nbytes = u8_mbtouc (&c, narrow + byte_idx, nlen - byte_idx);
|
nbytes = u8_mbtoucr (&c, narrow + byte_idx, nlen - byte_idx);
|
||||||
if (nbytes == 0)
|
if (nbytes == 0)
|
||||||
break;
|
break;
|
||||||
else if (c == 0xfffd)
|
else if (nbytes < 0)
|
||||||
/* Bad UTF-8. */
|
/* Bad UTF-8. */
|
||||||
return 0;
|
return 0;
|
||||||
else if (c != wide[char_idx])
|
else if (c != wide[char_idx])
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
;;;; bytevectors.test --- R6RS bytevectors. -*- mode: scheme; coding: utf-8; -*-
|
;;;; bytevectors.test --- R6RS bytevectors. -*- mode: scheme; coding: utf-8; -*-
|
||||||
;;;;
|
;;;;
|
||||||
;;;; Copyright (C) 2009-2014 Free Software Foundation, Inc.
|
;;;; Copyright (C) 2009-2015 Free Software Foundation, Inc.
|
||||||
;;;;
|
;;;;
|
||||||
;;;; Ludovic Courtès
|
;;;; Ludovic Courtès
|
||||||
;;;;
|
;;;;
|
||||||
|
@ -24,6 +24,9 @@
|
||||||
:use-module (rnrs bytevectors)
|
:use-module (rnrs bytevectors)
|
||||||
:use-module (srfi srfi-4))
|
:use-module (srfi srfi-4))
|
||||||
|
|
||||||
|
(define exception:decoding-error
|
||||||
|
(cons 'decoding-error "input (locale conversion|decoding) error"))
|
||||||
|
|
||||||
;;; Some of the tests in here are examples taken from the R6RS Standard
|
;;; Some of the tests in here are examples taken from the R6RS Standard
|
||||||
;;; Libraries document.
|
;;; Libraries document.
|
||||||
|
|
||||||
|
@ -501,6 +504,15 @@
|
||||||
(= (string-length str)
|
(= (string-length str)
|
||||||
(- (bytevector-length utf8) 2)))))
|
(- (bytevector-length utf8) 2)))))
|
||||||
|
|
||||||
|
(pass-if-equal "utf8->string [replacement character]"
|
||||||
|
'(104 105 65533)
|
||||||
|
(map char->integer
|
||||||
|
(string->list (utf8->string #vu8(104 105 239 191 189)))))
|
||||||
|
|
||||||
|
(pass-if-exception "utf8->string [invalid encoding]"
|
||||||
|
exception:decoding-error
|
||||||
|
(utf8->string #vu8(104 105 239 191 50)))
|
||||||
|
|
||||||
(pass-if "utf16->string"
|
(pass-if "utf16->string"
|
||||||
(let* ((utf16 (uint-list->bytevector (map char->integer
|
(let* ((utf16 (uint-list->bytevector (map char->integer
|
||||||
(string->list "hello, world"))
|
(string->list "hello, world"))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue