1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-17 09:10:22 +02:00

Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests
This commit is contained in:
Michael Gran 2009-08-08 02:35:00 -07:00
parent a876e7dcea
commit 9c44cd4559
15 changed files with 1046 additions and 306 deletions

View file

@ -549,6 +549,7 @@ SCM_DEFINE (scm_string_copy_x, "string-copy!", 3, 2, 0,
len = cend - cstart;
SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
target = scm_i_string_start_writing (target);
ctarget = scm_i_string_writable_chars (target);
memmove (ctarget + ctstart, cstr + cstart, len);
scm_i_string_stop_writing ();
@ -985,6 +986,7 @@ SCM_DEFINE (scm_substring_fill_x, "string-fill!", 2, 2, 0,
4, end, cend);
SCM_VALIDATE_CHAR_COPY (2, chr, c);
str = scm_i_string_start_writing (str);
cstr = scm_i_string_writable_chars (str);
for (k = cstart; k < cend; k++)
cstr[k] = c;
@ -2376,6 +2378,7 @@ string_upcase_x (SCM v, size_t start, size_t end)
size_t k;
char *dst;
v = scm_i_string_start_writing (v);
dst = scm_i_string_writable_chars (v);
for (k = start; k < end; ++k)
dst[k] = scm_c_upcase (dst[k]);
@ -2442,6 +2445,7 @@ string_downcase_x (SCM v, size_t start, size_t end)
size_t k;
char *dst;
v = scm_i_string_start_writing (v);
dst = scm_i_string_writable_chars (v);
for (k = start; k < end; ++k)
dst[k] = scm_c_downcase (dst[k]);
@ -2511,6 +2515,7 @@ string_titlecase_x (SCM str, size_t start, size_t end)
size_t i;
int in_word = 0;
str = scm_i_string_start_writing (str);
sz = (unsigned char *) scm_i_string_writable_chars (str);
for(i = start; i < end; i++)
{
@ -2635,6 +2640,7 @@ SCM_DEFINE (scm_string_reverse, "string-reverse", 1, 2, 0,
2, start, cstart,
3, end, cend);
result = scm_string_copy (str);
result = scm_i_string_start_writing (result);
ctarget = scm_i_string_writable_chars (result);
string_reverse_x (ctarget, cstart, cend);
scm_i_string_stop_writing ();
@ -2658,6 +2664,7 @@ SCM_DEFINE (scm_string_reverse_x, "string-reverse!", 1, 2, 0,
2, start, cstart,
3, end, cend);
str = scm_i_string_start_writing (str);
cstr = scm_i_string_writable_chars (str);
string_reverse_x (cstr, cstart, cend);
scm_i_string_stop_writing ();
@ -3018,19 +3025,16 @@ SCM_DEFINE (scm_string_for_each, "string-for-each", 2, 2, 0,
"return value is not specified.")
#define FUNC_NAME s_scm_string_for_each
{
const char *cstr;
size_t cstart, cend;
scm_t_trampoline_1 proc_tramp = scm_trampoline_1 (proc);
SCM_ASSERT (proc_tramp, proc, SCM_ARG1, FUNC_NAME);
MY_VALIDATE_SUBSTRING_SPEC_COPY (2, s, cstr,
3, start, cstart,
4, end, cend);
MY_VALIDATE_SUBSTRING_SPEC (2, s,
3, start, cstart,
4, end, cend);
while (cstart < cend)
{
unsigned int c = (unsigned char) cstr[cstart];
proc_tramp (proc, SCM_MAKE_CHAR (c));
cstr = scm_i_string_chars (s);
proc_tramp (proc, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart)));
cstart++;
}
@ -3162,6 +3166,7 @@ SCM_DEFINE (scm_string_xcopy_x, "string-xcopy!", 4, 3, 0,
SCM_ASSERT_RANGE (1, tstart,
ctstart + (csto - csfrom) <= scm_i_string_length (target));
target = scm_i_string_start_writing (target);
p = scm_i_string_writable_chars (target) + ctstart;
cs = scm_i_string_chars (s);
while (csfrom < csto)
@ -3200,8 +3205,8 @@ SCM_DEFINE (scm_string_replace, "string-replace", 2, 4, 0,
MY_VALIDATE_SUBSTRING_SPEC (2, s2,
5, start2, cstart2,
6, end2, cend2);
result = scm_i_make_string (cstart1 + (cend2 - cstart2) +
scm_i_string_length (s1) - cend1, &p);
result = scm_i_make_string ((cstart1 + cend2 - cstart2
+ scm_i_string_length (s1) - cend1), &p);
cstr1 = scm_i_string_chars (s1);
cstr2 = scm_i_string_chars (s2);
memmove (p, cstr1, cstart1 * sizeof (char));