1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 11:50:28 +02:00

Implement 'string-utf8-length' and 'scm_c_string_utf8_length'.

* libguile/strings.c (utf8_length, scm_c_string_utf8_length)
  (scm_string_utf8_length): New functions.
* libguile/strings.h (scm_c_string_utf8_length, scm_string_utf8_length):
  New prototypes.
* doc/ref/api-data.texi (Bytevectors as Strings): Add docs.
* doc/ref/guile.texi: Update manual copyright date to 2015.
* test-suite/tests/strings.test (string-utf8-length): Add tests.
This commit is contained in:
Mark H Weaver 2015-01-22 01:22:19 -05:00 committed by Andy Wingo
parent 751a55e355
commit e390e5760b
5 changed files with 63 additions and 6 deletions

View file

@ -1,6 +1,6 @@
@c -*-texinfo-*- @c -*-texinfo-*-
@c This is part of the GNU Guile Reference Manual. @c This is part of the GNU Guile Reference Manual.
@c Copyright (C) 1996, 1997, 2000-2004, 2006-2014 @c Copyright (C) 1996, 1997, 2000-2004, 2006-2015
@c Free Software Foundation, Inc. @c Free Software Foundation, Inc.
@c See the file guile.texi for copying conditions. @c See the file guile.texi for copying conditions.
@ -4983,6 +4983,12 @@ in one of the most commonly available encoding formats.
@result{} #vu8(99 97 102 195 169) @result{} #vu8(99 97 102 195 169)
@end lisp @end lisp
@deftypefn {Scheme Procedure} {} string-utf8-length str
@deftypefnx {C function} SCM scm_string_utf8_length (str)
@deftypefnx {C function} size_t scm_c_string_utf8_length (str)
Return the number of bytes in the UTF-8 representation of @var{str}.
@end deftypefn
@deffn {Scheme Procedure} string->utf8 str @deffn {Scheme Procedure} string->utf8 str
@deffnx {Scheme Procedure} string->utf16 str [endianness] @deffnx {Scheme Procedure} string->utf16 str [endianness]
@deffnx {Scheme Procedure} string->utf32 str [endianness] @deffnx {Scheme Procedure} string->utf32 str [endianness]

View file

@ -14,7 +14,7 @@
This manual documents Guile version @value{VERSION}. This manual documents Guile version @value{VERSION}.
Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2009, Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2009,
2010, 2011, 2012, 2013, 2014 Free Software Foundation. 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation.
Permission is granted to copy, distribute and/or modify this document Permission is granted to copy, distribute and/or modify this document
under the terms of the GNU Free Documentation License, Version 1.3 or under the terms of the GNU Free Documentation License, Version 1.3 or

View file

@ -1,5 +1,5 @@
/* Copyright (C) 1995, 1996, 1998, 2000, 2001, 2004, 2006, /* Copyright (C) 1995, 1996, 1998, 2000, 2001, 2004, 2006,
* 2008-2015 Free Software Foundation, Inc. * 2008-2016 Free Software Foundation, Inc.
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License * modify it under the terms of the GNU Lesser General Public License
@ -2065,6 +2065,38 @@ u32_u8_length_in_bytes (const scm_t_uint32 *str, size_t len)
return ret; return ret;
} }
static size_t
utf8_length (SCM str)
{
if (scm_i_is_narrow_string (str))
return latin1_u8_strlen ((scm_t_uint8 *) scm_i_string_chars (str),
scm_i_string_length (str));
else
return u32_u8_length_in_bytes
((scm_t_uint32 *) scm_i_string_wide_chars (str),
scm_i_string_length (str));
}
size_t
scm_c_string_utf8_length (SCM string)
#define FUNC_NAME "scm_c_string_utf8_length"
{
SCM_VALIDATE_STRING (1, string);
return utf8_length (string);
}
#undef FUNC_NAME
SCM_DEFINE (scm_string_utf8_length, "string-utf8-length", 1, 0, 0,
(SCM string),
"Returns the number of bytes in the UTF-8 representation of "
"@var{string}.")
#define FUNC_NAME s_scm_string_utf8_length
{
SCM_VALIDATE_STRING (1, string);
return scm_from_size_t (utf8_length (string));
}
#undef FUNC_NAME
char * char *
scm_to_utf8_stringn (SCM str, size_t *lenp) scm_to_utf8_stringn (SCM str, size_t *lenp)
#define FUNC_NAME "scm_to_utf8_stringn" #define FUNC_NAME "scm_to_utf8_stringn"

View file

@ -3,7 +3,8 @@
#ifndef SCM_STRINGS_H #ifndef SCM_STRINGS_H
#define SCM_STRINGS_H #define SCM_STRINGS_H
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. /* Copyright (C) 1995-1998, 2000, 2001, 2004-2006, 2008-2011, 2013,
* 2015-2016 Free Software Foundation, Inc.
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License * modify it under the terms of the GNU Lesser General Public License
@ -107,6 +108,7 @@ SCM_API SCM scm_string_p (SCM x);
SCM_API SCM scm_string (SCM chrs); SCM_API SCM scm_string (SCM chrs);
SCM_API SCM scm_make_string (SCM k, SCM chr); SCM_API SCM scm_make_string (SCM k, SCM chr);
SCM_API SCM scm_string_length (SCM str); SCM_API SCM scm_string_length (SCM str);
SCM_API SCM scm_string_utf8_length (SCM str);
SCM_API SCM scm_string_bytes_per_char (SCM str); SCM_API SCM scm_string_bytes_per_char (SCM str);
SCM_API SCM scm_string_ref (SCM str, SCM k); SCM_API SCM scm_string_ref (SCM str, SCM k);
SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
@ -120,6 +122,7 @@ SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler); scm_t_string_failed_conversion_handler handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr); SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str); SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_string_utf8_length (SCM str);
SCM_API size_t scm_c_symbol_length (SCM sym); SCM_API size_t scm_c_symbol_length (SCM sym);
SCM_API SCM scm_c_string_ref (SCM str, size_t pos); SCM_API SCM scm_c_string_ref (SCM str, size_t pos);
SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr); SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr);

View file

@ -1,8 +1,8 @@
;;;; strings.test --- test suite for Guile's string functions -*- scheme -*- ;;;; strings.test --- test suite for Guile's string functions -*- scheme -*-
;;;; Jim Blandy <jimb@red-bean.com> --- August 1999 ;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
;;;; ;;;;
;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009, 2010, ;;;; Copyright (C) 1999, 2001, 2004-2006, 2008-2011, 2013,
;;;; 2011, 2013 Free Software Foundation, Inc. ;;;; 2015 Free Software Foundation, Inc.
;;;; ;;;;
;;;; This library is free software; you can redistribute it and/or ;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public ;;;; modify it under the terms of the GNU Lesser General Public
@ -457,6 +457,22 @@
(pass-if "compatibility composition is equal?" (pass-if "compatibility composition is equal?"
(equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69"))) (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
;;
;; string-utf8-length
;;
(with-test-prefix "string-utf8-length"
(pass-if-exception "wrong type argument"
exception:wrong-type-arg
(string-utf8-length 50))
(pass-if-equal 0 (string-utf8-length ""))
(pass-if-equal 1 (string-utf8-length "\0"))
(pass-if-equal 5 (string-utf8-length "hello"))
(pass-if-equal 7 (string-utf8-length "helloλ"))
(pass-if-equal 9 (string-utf8-length "ሠላም")))
;; ;;
;; string-ref ;; string-ref
;; ;;