mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-16 00:30:21 +02:00
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
This commit is contained in:
parent
a876e7dcea
commit
9c44cd4559
15 changed files with 1046 additions and 306 deletions
|
@ -72,31 +72,82 @@ VM_DEFINE_LOADER (82, load_number, "load-number")
|
|||
VM_DEFINE_LOADER (83, load_string, "load-string")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_locale_stringn ((char *)ip, len));
|
||||
/* Was: scm_makfromstr (ip, len, 0) */
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL);
|
||||
PUSH (str);
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (84, load_symbol, "load-symbol")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_locale_symboln ((char *)ip, len));
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL);
|
||||
PUSH (scm_string_to_symbol (str));
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (85, load_keyword, "load-keyword")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_locale_keywordn ((char *)ip, len));
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL);
|
||||
PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str)));
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
|
@ -132,13 +183,29 @@ VM_DEFINE_INSTRUCTION (87, link_now, "link-now", 0, 1, 1)
|
|||
|
||||
VM_DEFINE_LOADER (88, define, "define")
|
||||
{
|
||||
SCM sym;
|
||||
SCM str, sym;
|
||||
size_t len;
|
||||
|
||||
int width;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
sym = scm_from_locale_symboln ((char *)ip, len);
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL);
|
||||
sym = scm_string_to_symbol (str);
|
||||
ip += len * width;
|
||||
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue