diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 3f787b1c9..3a3a8e4ac 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -1,6 +1,6 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000-2004, 2006-2014 +@c Copyright (C) 1996, 1997, 2000-2004, 2006-2015 @c Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @@ -4983,6 +4983,12 @@ in one of the most commonly available encoding formats. @result{} #vu8(99 97 102 195 169) @end lisp +@deftypefn {Scheme Procedure} {} string-utf8-length str +@deftypefnx {C function} SCM scm_string_utf8_length (str) +@deftypefnx {C function} size_t scm_c_string_utf8_length (str) +Return the number of bytes in the UTF-8 representation of @var{str}. +@end deftypefn + @deffn {Scheme Procedure} string->utf8 str @deffnx {Scheme Procedure} string->utf16 str [endianness] @deffnx {Scheme Procedure} string->utf32 str [endianness] diff --git a/doc/ref/guile.texi b/doc/ref/guile.texi index 5f21188fa..cb4c431f2 100644 --- a/doc/ref/guile.texi +++ b/doc/ref/guile.texi @@ -14,7 +14,7 @@ This manual documents Guile version @value{VERSION}. Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2009, -2010, 2011, 2012, 2013, 2014 Free Software Foundation. +2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation. Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.3 or diff --git a/libguile/strings.c b/libguile/strings.c index 2e5647e6d..dc2e4f5fe 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1998, 2000, 2001, 2004, 2006, - * 2008-2015 Free Software Foundation, Inc. + * 2008-2016 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -2065,6 +2065,38 @@ u32_u8_length_in_bytes (const scm_t_uint32 *str, size_t len) return ret; } +static size_t +utf8_length (SCM str) +{ + if (scm_i_is_narrow_string (str)) + return latin1_u8_strlen ((scm_t_uint8 *) scm_i_string_chars (str), + scm_i_string_length (str)); + else + return u32_u8_length_in_bytes + ((scm_t_uint32 *) scm_i_string_wide_chars (str), + scm_i_string_length (str)); +} + +size_t +scm_c_string_utf8_length (SCM string) +#define FUNC_NAME "scm_c_string_utf8_length" +{ + SCM_VALIDATE_STRING (1, string); + return utf8_length (string); +} +#undef FUNC_NAME + +SCM_DEFINE (scm_string_utf8_length, "string-utf8-length", 1, 0, 0, + (SCM string), + "Returns the number of bytes in the UTF-8 representation of " + "@var{string}.") +#define FUNC_NAME s_scm_string_utf8_length +{ + SCM_VALIDATE_STRING (1, string); + return scm_from_size_t (utf8_length (string)); +} +#undef FUNC_NAME + char * scm_to_utf8_stringn (SCM str, size_t *lenp) #define FUNC_NAME "scm_to_utf8_stringn" diff --git a/libguile/strings.h b/libguile/strings.h index 24471cd69..882e7ce64 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -3,7 +3,8 @@ #ifndef SCM_STRINGS_H #define SCM_STRINGS_H -/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004, 2005, 2006, 2008, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. +/* Copyright (C) 1995-1998, 2000, 2001, 2004-2006, 2008-2011, 2013, + * 2015-2016 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -107,6 +108,7 @@ SCM_API SCM scm_string_p (SCM x); SCM_API SCM scm_string (SCM chrs); SCM_API SCM scm_make_string (SCM k, SCM chr); SCM_API SCM scm_string_length (SCM str); +SCM_API SCM scm_string_utf8_length (SCM str); SCM_API SCM scm_string_bytes_per_char (SCM str); SCM_API SCM scm_string_ref (SCM str, SCM k); SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); @@ -120,6 +122,7 @@ SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding, scm_t_string_failed_conversion_handler handler); SCM_API SCM scm_c_make_string (size_t len, SCM chr); SCM_API size_t scm_c_string_length (SCM str); +SCM_API size_t scm_c_string_utf8_length (SCM str); SCM_API size_t scm_c_symbol_length (SCM sym); SCM_API SCM scm_c_string_ref (SCM str, size_t pos); SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr); diff --git a/test-suite/tests/strings.test b/test-suite/tests/strings.test index 56c898c8b..66c8a6b95 100644 --- a/test-suite/tests/strings.test +++ b/test-suite/tests/strings.test @@ -1,8 +1,8 @@ ;;;; strings.test --- test suite for Guile's string functions -*- scheme -*- ;;;; Jim Blandy --- August 1999 ;;;; -;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009, 2010, -;;;; 2011, 2013 Free Software Foundation, Inc. +;;;; Copyright (C) 1999, 2001, 2004-2006, 2008-2011, 2013, +;;;; 2015 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -457,6 +457,22 @@ (pass-if "compatibility composition is equal?" (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69"))) +;; +;; string-utf8-length +;; + +(with-test-prefix "string-utf8-length" + + (pass-if-exception "wrong type argument" + exception:wrong-type-arg + (string-utf8-length 50)) + + (pass-if-equal 0 (string-utf8-length "")) + (pass-if-equal 1 (string-utf8-length "\0")) + (pass-if-equal 5 (string-utf8-length "hello")) + (pass-if-equal 7 (string-utf8-length "helloλ")) + (pass-if-equal 9 (string-utf8-length "ሠላም"))) + ;; ;; string-ref ;;