mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-06 04:00:26 +02:00
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
This commit is contained in:
parent
a876e7dcea
commit
9c44cd4559
15 changed files with 1046 additions and 306 deletions
|
@ -969,6 +969,34 @@ scm_fill_input (SCM port)
|
|||
* This function differs from scm_c_write; it updates port line and
|
||||
* column. */
|
||||
|
||||
static void
|
||||
update_port_lf (scm_t_wchar c, SCM port)
|
||||
{
|
||||
if (c == '\a')
|
||||
{
|
||||
}
|
||||
else if (c == '\b')
|
||||
{
|
||||
SCM_DECCOL (port);
|
||||
}
|
||||
else if (c == '\n')
|
||||
{
|
||||
SCM_INCLINE (port);
|
||||
}
|
||||
else if (c == '\r')
|
||||
{
|
||||
SCM_ZEROCOL (port);
|
||||
}
|
||||
else if (c == '\t')
|
||||
{
|
||||
SCM_TABCOL (port);
|
||||
}
|
||||
else
|
||||
{
|
||||
SCM_INCCOL (port);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
scm_lfwrite (const char *ptr, size_t size, SCM port)
|
||||
{
|
||||
|
@ -980,30 +1008,54 @@ scm_lfwrite (const char *ptr, size_t size, SCM port)
|
|||
|
||||
ptob->write (port, ptr, size);
|
||||
|
||||
for (; size; ptr++, size--) {
|
||||
if (*ptr == '\a') {
|
||||
}
|
||||
else if (*ptr == '\b') {
|
||||
SCM_DECCOL(port);
|
||||
}
|
||||
else if (*ptr == '\n') {
|
||||
SCM_INCLINE(port);
|
||||
}
|
||||
else if (*ptr == '\r') {
|
||||
SCM_ZEROCOL(port);
|
||||
}
|
||||
else if (*ptr == '\t') {
|
||||
SCM_TABCOL(port);
|
||||
}
|
||||
else {
|
||||
SCM_INCCOL(port);
|
||||
}
|
||||
for (; size; ptr++, size--)
|
||||
update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
|
||||
|
||||
if (pt->rw_random)
|
||||
pt->rw_active = SCM_PORT_WRITE;
|
||||
}
|
||||
|
||||
/* Write a scheme string STR to PORT from START inclusive to END
|
||||
exclusive. */
|
||||
void
|
||||
scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
|
||||
{
|
||||
size_t i, size = scm_i_string_length (str);
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
|
||||
scm_t_wchar p;
|
||||
char *buf;
|
||||
size_t len;
|
||||
|
||||
if (pt->rw_active == SCM_PORT_READ)
|
||||
scm_end_input (port);
|
||||
|
||||
if (end == -1)
|
||||
end = size;
|
||||
size = end - start;
|
||||
|
||||
buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
|
||||
NULL, iconveh_escape_sequence);
|
||||
ptob->write (port, buf, len);
|
||||
free (buf);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
p = scm_i_string_ref (str, i + start);
|
||||
update_port_lf (p, port);
|
||||
}
|
||||
|
||||
if (pt->rw_random)
|
||||
pt->rw_active = SCM_PORT_WRITE;
|
||||
}
|
||||
|
||||
/* Write a scheme string STR to PORT. */
|
||||
void
|
||||
scm_lfwrite_str (SCM str, SCM port)
|
||||
{
|
||||
scm_lfwrite_substr (str, 0, -1, port);
|
||||
}
|
||||
|
||||
/* scm_c_read
|
||||
*
|
||||
* Used by an application to read arbitrary number of bytes from an
|
||||
|
|
|
@ -269,6 +269,9 @@ SCM_API SCM scm_read_char (SCM port);
|
|||
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
|
||||
SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
|
||||
SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
|
||||
SCM_INTERNAL void scm_lfwrite_str (SCM str, SCM port);
|
||||
SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
|
||||
SCM port);
|
||||
SCM_API void scm_flush (SCM port);
|
||||
SCM_API void scm_end_input (SCM port);
|
||||
SCM_API int scm_fill_input (SCM port);
|
||||
|
|
113
libguile/print.c
113
libguile/print.c
|
@ -563,37 +563,96 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
if (SCM_WRITINGP (pstate))
|
||||
{
|
||||
size_t i, j, len;
|
||||
const char *data;
|
||||
static char const hex[] = "0123456789abcdef";
|
||||
char buf[8];
|
||||
|
||||
|
||||
scm_putc ('"', port);
|
||||
len = scm_i_string_length (exp);
|
||||
data = scm_i_string_chars (exp);
|
||||
for (i = 0, j = 0; i < len; ++i)
|
||||
for (i = 0; i < len; ++i)
|
||||
{
|
||||
unsigned char ch = data[i];
|
||||
if ((ch < 32 && ch != '\n') || (127 <= ch && ch < 148))
|
||||
{
|
||||
static char const hex[]="0123456789abcdef";
|
||||
char buf[4];
|
||||
scm_t_wchar ch = scm_i_string_ref (exp, i);
|
||||
int printed = 0;
|
||||
|
||||
scm_lfwrite (data+j, i-j, port);
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'x';
|
||||
buf[2] = hex [ch / 16];
|
||||
buf[3] = hex [ch % 16];
|
||||
scm_lfwrite (buf, 4, port);
|
||||
data = scm_i_string_chars (exp);
|
||||
j = i+1;
|
||||
if (ch == ' ' || ch == '\n')
|
||||
{
|
||||
scm_putc (ch, port);
|
||||
printed = 1;
|
||||
}
|
||||
else if (ch == '"' || ch == '\\')
|
||||
{
|
||||
scm_lfwrite (data+j, i-j, port);
|
||||
scm_putc ('\\', port);
|
||||
data = scm_i_string_chars (exp);
|
||||
j = i;
|
||||
scm_charprint (ch, port);
|
||||
printed = 1;
|
||||
}
|
||||
else
|
||||
if (uc_is_general_category_withtable
|
||||
(ch,
|
||||
UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M |
|
||||
UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P |
|
||||
UC_CATEGORY_MASK_S))
|
||||
{
|
||||
/* Print the character since it is a graphic
|
||||
character. */
|
||||
scm_t_wchar *wbuf;
|
||||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
|
||||
wbuf[0] = ch;
|
||||
|
||||
buf = u32_conv_to_encoding ("ISO-8859-1",
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1, NULL, NULL, &len);
|
||||
if (buf != NULL)
|
||||
{
|
||||
/* Character is graphic and representable in
|
||||
this encoding. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
printed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!printed)
|
||||
{
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding or is not graphic. */
|
||||
if (ch <= 0xFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'x';
|
||||
buf[2] = hex[ch / 16];
|
||||
buf[3] = hex[ch % 16];
|
||||
scm_lfwrite (buf, 4, port);
|
||||
}
|
||||
else if (ch <= 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'u';
|
||||
buf[2] = hex[(ch & 0xF000) >> 12];
|
||||
buf[3] = hex[(ch & 0xF00) >> 8];
|
||||
buf[4] = hex[(ch & 0xF0) >> 4];
|
||||
buf[5] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 6, port);
|
||||
j = i + 1;
|
||||
}
|
||||
else if (ch > 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'U';
|
||||
buf[2] = hex[(ch & 0xF00000) >> 20];
|
||||
buf[3] = hex[(ch & 0xF0000) >> 16];
|
||||
buf[4] = hex[(ch & 0xF000) >> 12];
|
||||
buf[5] = hex[(ch & 0xF00) >> 8];
|
||||
buf[6] = hex[(ch & 0xF0) >> 4];
|
||||
buf[7] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 8, port);
|
||||
j = i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
scm_lfwrite (data+j, i-j, port);
|
||||
scm_putc ('"', port);
|
||||
scm_remember_upto_here_1 (exp);
|
||||
}
|
||||
|
@ -606,8 +665,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
if (scm_i_symbol_is_interned (exp))
|
||||
{
|
||||
scm_print_symbol_name (scm_i_symbol_chars (exp),
|
||||
scm_i_symbol_length (exp),
|
||||
port);
|
||||
scm_i_symbol_length (exp), port);
|
||||
scm_remember_upto_here_1 (exp);
|
||||
}
|
||||
else
|
||||
|
@ -763,6 +821,17 @@ scm_prin1 (SCM exp, SCM port, int writingp)
|
|||
}
|
||||
}
|
||||
|
||||
/* Print a character.
|
||||
*/
|
||||
void
|
||||
scm_charprint (scm_t_uint32 ch, SCM port)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
|
||||
wbuf[0] = ch;
|
||||
scm_lfwrite_str (wstr, port);
|
||||
}
|
||||
|
||||
/* Print an integer.
|
||||
*/
|
||||
|
|
|
@ -77,6 +77,7 @@ SCM_API SCM scm_print_options (SCM setting);
|
|||
SCM_API SCM scm_make_print_state (void);
|
||||
SCM_API void scm_free_print_state (SCM print_state);
|
||||
SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state);
|
||||
SCM_API void scm_charprint (scm_t_uint32 c, SCM port);
|
||||
SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
|
||||
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
|
||||
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);
|
||||
|
|
131
libguile/read.c
131
libguile/read.c
|
@ -387,32 +387,28 @@ scm_read_string (int chr, SCM port)
|
|||
object (the string returned). */
|
||||
|
||||
SCM str = SCM_BOOL_F;
|
||||
char c_str[READER_STRING_BUFFER_SIZE];
|
||||
unsigned c_str_len = 0;
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
|
||||
str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
|
||||
while ('"' != (c = scm_getc (port)))
|
||||
{
|
||||
if (c == EOF)
|
||||
str_eof: scm_i_input_error (FUNC_NAME, port,
|
||||
"end of file in string constant",
|
||||
SCM_EOL);
|
||||
|
||||
if (c_str_len + 1 >= sizeof (c_str))
|
||||
{
|
||||
/* Flush the C buffer onto a Scheme string. */
|
||||
SCM addy;
|
||||
str_eof:
|
||||
scm_i_input_error (FUNC_NAME, port,
|
||||
"end of file in string constant", SCM_EOL);
|
||||
}
|
||||
|
||||
if (str == SCM_BOOL_F)
|
||||
str = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
|
||||
if (c_str_len + 1 >= scm_i_string_length (str))
|
||||
{
|
||||
SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
|
||||
|
||||
addy = scm_from_locale_stringn (c_str, c_str_len);
|
||||
str = scm_string_append_shared (scm_list_2 (str, addy));
|
||||
|
||||
c_str_len = 0;
|
||||
str = scm_string_append (scm_list_2 (str, addy));
|
||||
}
|
||||
|
||||
if (c == '\\')
|
||||
{
|
||||
switch (c = scm_getc (port))
|
||||
{
|
||||
case EOF:
|
||||
|
@ -452,45 +448,106 @@ scm_read_string (int chr, SCM port)
|
|||
break;
|
||||
case 'x':
|
||||
{
|
||||
int a, b;
|
||||
scm_t_wchar a, b;
|
||||
a = scm_getc (port);
|
||||
if (a == EOF) goto str_eof;
|
||||
if (a == EOF)
|
||||
goto str_eof;
|
||||
b = scm_getc (port);
|
||||
if (b == EOF) goto str_eof;
|
||||
if ('0' <= a && a <= '9') a -= '0';
|
||||
else if ('A' <= a && a <= 'F') a = a - 'A' + 10;
|
||||
else if ('a' <= a && a <= 'f') a = a - 'a' + 10;
|
||||
else goto bad_escaped;
|
||||
if ('0' <= b && b <= '9') b -= '0';
|
||||
else if ('A' <= b && b <= 'F') b = b - 'A' + 10;
|
||||
else if ('a' <= b && b <= 'f') b = b - 'a' + 10;
|
||||
else goto bad_escaped;
|
||||
if (b == EOF)
|
||||
goto str_eof;
|
||||
if ('0' <= a && a <= '9')
|
||||
a -= '0';
|
||||
else if ('A' <= a && a <= 'F')
|
||||
a = a - 'A' + 10;
|
||||
else if ('a' <= a && a <= 'f')
|
||||
a = a - 'a' + 10;
|
||||
else
|
||||
{
|
||||
c = a;
|
||||
goto bad_escaped;
|
||||
}
|
||||
if ('0' <= b && b <= '9')
|
||||
b -= '0';
|
||||
else if ('A' <= b && b <= 'F')
|
||||
b = b - 'A' + 10;
|
||||
else if ('a' <= b && b <= 'f')
|
||||
b = b - 'a' + 10;
|
||||
else
|
||||
{
|
||||
c = b;
|
||||
goto bad_escaped;
|
||||
}
|
||||
c = a * 16 + b;
|
||||
break;
|
||||
}
|
||||
case 'u':
|
||||
{
|
||||
scm_t_wchar a;
|
||||
int i;
|
||||
c = 0;
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
a = scm_getc (port);
|
||||
if (a == EOF)
|
||||
goto str_eof;
|
||||
if ('0' <= a && a <= '9')
|
||||
a -= '0';
|
||||
else if ('A' <= a && a <= 'F')
|
||||
a = a - 'A' + 10;
|
||||
else if ('a' <= a && a <= 'f')
|
||||
a = a - 'a' + 10;
|
||||
else
|
||||
{
|
||||
c = a;
|
||||
goto bad_escaped;
|
||||
}
|
||||
c = c * 16 + a;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'U':
|
||||
{
|
||||
scm_t_wchar a;
|
||||
int i;
|
||||
c = 0;
|
||||
for (i = 0; i < 6; i++)
|
||||
{
|
||||
a = scm_getc (port);
|
||||
if (a == EOF)
|
||||
goto str_eof;
|
||||
if ('0' <= a && a <= '9')
|
||||
a -= '0';
|
||||
else if ('A' <= a && a <= 'F')
|
||||
a = a - 'A' + 10;
|
||||
else if ('a' <= a && a <= 'f')
|
||||
a = a - 'a' + 10;
|
||||
else
|
||||
{
|
||||
c = a;
|
||||
goto bad_escaped;
|
||||
}
|
||||
c = c * 16 + a;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
bad_escaped:
|
||||
scm_i_input_error (FUNC_NAME, port,
|
||||
"illegal character in escape sequence: ~S",
|
||||
scm_list_1 (SCM_MAKE_CHAR (c)));
|
||||
}
|
||||
c_str[c_str_len++] = c;
|
||||
}
|
||||
str = scm_i_string_start_writing (str);
|
||||
scm_i_string_set_x (str, c_str_len++, c);
|
||||
scm_i_string_stop_writing ();
|
||||
}
|
||||
|
||||
if (c_str_len > 0)
|
||||
{
|
||||
SCM addy;
|
||||
|
||||
addy = scm_from_locale_stringn (c_str, c_str_len);
|
||||
if (str == SCM_BOOL_F)
|
||||
str = addy;
|
||||
else
|
||||
str = scm_string_append_shared (scm_list_2 (str, addy));
|
||||
return scm_i_substring_copy (str, 0, c_str_len);
|
||||
}
|
||||
else
|
||||
str = (str == SCM_BOOL_F) ? scm_nullstr : str;
|
||||
|
||||
return str;
|
||||
return scm_nullstr;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
|
|
@ -131,6 +131,7 @@ SCM_DEFINE (scm_read_string_x_partial, "read-string!/partial", 1, 3, 0,
|
|||
don't touch the file descriptor. otherwise the
|
||||
"return immediately if something is available" rule may
|
||||
be violated. */
|
||||
str = scm_i_string_start_writing (str);
|
||||
dest = scm_i_string_writable_chars (str) + offset;
|
||||
chars_read = scm_take_from_input_buffers (port, dest, read_len);
|
||||
scm_i_string_stop_writing ();
|
||||
|
@ -140,6 +141,7 @@ SCM_DEFINE (scm_read_string_x_partial, "read-string!/partial", 1, 3, 0,
|
|||
if (chars_read == 0 && read_len > 0) /* don't confuse read_len == 0 with
|
||||
EOF. */
|
||||
{
|
||||
str = scm_i_string_start_writing (str);
|
||||
dest = scm_i_string_writable_chars (str) + offset;
|
||||
SCM_SYSCALL (chars_read = read (fdes, dest, read_len));
|
||||
scm_i_string_stop_writing ();
|
||||
|
|
|
@ -1438,6 +1438,7 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
fd = SCM_FPORT_FDES (sock);
|
||||
|
||||
len = scm_i_string_length (buf);
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
dest = scm_i_string_writable_chars (buf);
|
||||
SCM_SYSCALL (rv = recv (fd, dest, len, flg));
|
||||
scm_i_string_stop_writing ();
|
||||
|
@ -1482,6 +1483,7 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0,
|
|||
fd = SCM_FPORT_FDES (sock);
|
||||
|
||||
len = scm_i_string_length (message);
|
||||
message = scm_i_string_start_writing (message);
|
||||
src = scm_i_string_writable_chars (message);
|
||||
SCM_SYSCALL (rv = send (fd, src, len, flg));
|
||||
scm_i_string_stop_writing ();
|
||||
|
@ -1550,6 +1552,7 @@ SCM_DEFINE (scm_recvfrom, "recvfrom!", 2, 3, 0,
|
|||
|
||||
/* recvfrom will not necessarily return an address. usually nothing
|
||||
is returned for stream sockets. */
|
||||
str = scm_i_string_start_writing (str);
|
||||
buf = scm_i_string_writable_chars (str);
|
||||
((struct sockaddr *) &addr)->sa_family = AF_UNSPEC;
|
||||
SCM_SYSCALL (rv = recvfrom (fd, buf + offset,
|
||||
|
|
|
@ -549,6 +549,7 @@ SCM_DEFINE (scm_string_copy_x, "string-copy!", 3, 2, 0,
|
|||
len = cend - cstart;
|
||||
SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
|
||||
|
||||
target = scm_i_string_start_writing (target);
|
||||
ctarget = scm_i_string_writable_chars (target);
|
||||
memmove (ctarget + ctstart, cstr + cstart, len);
|
||||
scm_i_string_stop_writing ();
|
||||
|
@ -985,6 +986,7 @@ SCM_DEFINE (scm_substring_fill_x, "string-fill!", 2, 2, 0,
|
|||
4, end, cend);
|
||||
SCM_VALIDATE_CHAR_COPY (2, chr, c);
|
||||
|
||||
str = scm_i_string_start_writing (str);
|
||||
cstr = scm_i_string_writable_chars (str);
|
||||
for (k = cstart; k < cend; k++)
|
||||
cstr[k] = c;
|
||||
|
@ -2376,6 +2378,7 @@ string_upcase_x (SCM v, size_t start, size_t end)
|
|||
size_t k;
|
||||
char *dst;
|
||||
|
||||
v = scm_i_string_start_writing (v);
|
||||
dst = scm_i_string_writable_chars (v);
|
||||
for (k = start; k < end; ++k)
|
||||
dst[k] = scm_c_upcase (dst[k]);
|
||||
|
@ -2442,6 +2445,7 @@ string_downcase_x (SCM v, size_t start, size_t end)
|
|||
size_t k;
|
||||
char *dst;
|
||||
|
||||
v = scm_i_string_start_writing (v);
|
||||
dst = scm_i_string_writable_chars (v);
|
||||
for (k = start; k < end; ++k)
|
||||
dst[k] = scm_c_downcase (dst[k]);
|
||||
|
@ -2511,6 +2515,7 @@ string_titlecase_x (SCM str, size_t start, size_t end)
|
|||
size_t i;
|
||||
int in_word = 0;
|
||||
|
||||
str = scm_i_string_start_writing (str);
|
||||
sz = (unsigned char *) scm_i_string_writable_chars (str);
|
||||
for(i = start; i < end; i++)
|
||||
{
|
||||
|
@ -2635,6 +2640,7 @@ SCM_DEFINE (scm_string_reverse, "string-reverse", 1, 2, 0,
|
|||
2, start, cstart,
|
||||
3, end, cend);
|
||||
result = scm_string_copy (str);
|
||||
result = scm_i_string_start_writing (result);
|
||||
ctarget = scm_i_string_writable_chars (result);
|
||||
string_reverse_x (ctarget, cstart, cend);
|
||||
scm_i_string_stop_writing ();
|
||||
|
@ -2658,6 +2664,7 @@ SCM_DEFINE (scm_string_reverse_x, "string-reverse!", 1, 2, 0,
|
|||
2, start, cstart,
|
||||
3, end, cend);
|
||||
|
||||
str = scm_i_string_start_writing (str);
|
||||
cstr = scm_i_string_writable_chars (str);
|
||||
string_reverse_x (cstr, cstart, cend);
|
||||
scm_i_string_stop_writing ();
|
||||
|
@ -3018,19 +3025,16 @@ SCM_DEFINE (scm_string_for_each, "string-for-each", 2, 2, 0,
|
|||
"return value is not specified.")
|
||||
#define FUNC_NAME s_scm_string_for_each
|
||||
{
|
||||
const char *cstr;
|
||||
size_t cstart, cend;
|
||||
scm_t_trampoline_1 proc_tramp = scm_trampoline_1 (proc);
|
||||
|
||||
SCM_ASSERT (proc_tramp, proc, SCM_ARG1, FUNC_NAME);
|
||||
MY_VALIDATE_SUBSTRING_SPEC_COPY (2, s, cstr,
|
||||
MY_VALIDATE_SUBSTRING_SPEC (2, s,
|
||||
3, start, cstart,
|
||||
4, end, cend);
|
||||
while (cstart < cend)
|
||||
{
|
||||
unsigned int c = (unsigned char) cstr[cstart];
|
||||
proc_tramp (proc, SCM_MAKE_CHAR (c));
|
||||
cstr = scm_i_string_chars (s);
|
||||
proc_tramp (proc, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart)));
|
||||
cstart++;
|
||||
}
|
||||
|
||||
|
@ -3162,6 +3166,7 @@ SCM_DEFINE (scm_string_xcopy_x, "string-xcopy!", 4, 3, 0,
|
|||
SCM_ASSERT_RANGE (1, tstart,
|
||||
ctstart + (csto - csfrom) <= scm_i_string_length (target));
|
||||
|
||||
target = scm_i_string_start_writing (target);
|
||||
p = scm_i_string_writable_chars (target) + ctstart;
|
||||
cs = scm_i_string_chars (s);
|
||||
while (csfrom < csto)
|
||||
|
@ -3200,8 +3205,8 @@ SCM_DEFINE (scm_string_replace, "string-replace", 2, 4, 0,
|
|||
MY_VALIDATE_SUBSTRING_SPEC (2, s2,
|
||||
5, start2, cstart2,
|
||||
6, end2, cend2);
|
||||
result = scm_i_make_string (cstart1 + (cend2 - cstart2) +
|
||||
scm_i_string_length (s1) - cend1, &p);
|
||||
result = scm_i_make_string ((cstart1 + cend2 - cstart2
|
||||
+ scm_i_string_length (s1) - cend1), &p);
|
||||
cstr1 = scm_i_string_chars (s1);
|
||||
cstr2 = scm_i_string_chars (s2);
|
||||
memmove (p, cstr1, cstart1 * sizeof (char));
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/chars.h"
|
||||
|
@ -69,10 +71,12 @@
|
|||
|
||||
#define STRINGBUF_F_SHARED 0x100
|
||||
#define STRINGBUF_F_INLINE 0x200
|
||||
#define STRINGBUF_F_WIDE 0x400
|
||||
|
||||
#define STRINGBUF_TAG scm_tc7_stringbuf
|
||||
#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
|
||||
#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
|
||||
#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
|
||||
|
||||
#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
|
||||
#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
|
||||
|
@ -82,6 +86,7 @@
|
|||
#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
|
||||
? STRINGBUF_INLINE_CHARS (buf) \
|
||||
: STRINGBUF_OUTLINE_CHARS (buf))
|
||||
#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *)SCM_CELL_WORD_1(buf))
|
||||
#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
|
||||
? STRINGBUF_INLINE_LENGTH (buf) \
|
||||
: STRINGBUF_OUTLINE_LENGTH (buf))
|
||||
|
@ -126,6 +131,23 @@ make_stringbuf (size_t len)
|
|||
}
|
||||
}
|
||||
|
||||
static SCM
|
||||
make_wide_stringbuf (size_t len)
|
||||
{
|
||||
scm_t_wchar *mem;
|
||||
#if SCM_DEBUG
|
||||
if (len < 1000)
|
||||
lenhist[len]++;
|
||||
else
|
||||
lenhist[1000]++;
|
||||
#endif
|
||||
|
||||
mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
|
||||
mem[len] = 0;
|
||||
return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_WIDE, (scm_t_bits) mem,
|
||||
(scm_t_bits) len, (scm_t_bits) 0);
|
||||
}
|
||||
|
||||
/* Return a new stringbuf whose underlying storage consists of the LEN+1
|
||||
octets pointed to by STR (the last octet is zero). */
|
||||
SCM
|
||||
|
@ -147,8 +169,58 @@ void
|
|||
scm_i_stringbuf_free (SCM buf)
|
||||
{
|
||||
if (!STRINGBUF_INLINE (buf))
|
||||
{
|
||||
if (!STRINGBUF_WIDE (buf))
|
||||
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
|
||||
STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
|
||||
else
|
||||
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
|
||||
sizeof (scm_t_wchar) * (STRINGBUF_OUTLINE_LENGTH (buf)
|
||||
+ 1), "string");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
widen_stringbuf (SCM buf)
|
||||
{
|
||||
size_t i, len;
|
||||
scm_t_wchar *mem;
|
||||
|
||||
if (STRINGBUF_WIDE (buf))
|
||||
return;
|
||||
|
||||
if (STRINGBUF_INLINE (buf))
|
||||
{
|
||||
len = STRINGBUF_INLINE_LENGTH (buf);
|
||||
|
||||
mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
|
||||
for (i = 0; i < len; i++)
|
||||
mem[i] =
|
||||
(scm_t_wchar) (unsigned char) STRINGBUF_INLINE_CHARS (buf)[i];
|
||||
mem[len] = 0;
|
||||
|
||||
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_INLINE);
|
||||
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
|
||||
SCM_SET_CELL_WORD_1 (buf, mem);
|
||||
SCM_SET_CELL_WORD_2 (buf, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
len = STRINGBUF_OUTLINE_LENGTH (buf);
|
||||
|
||||
mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
|
||||
for (i = 0; i < len; i++)
|
||||
mem[i] =
|
||||
(scm_t_wchar) (unsigned char) STRINGBUF_OUTLINE_CHARS (buf)[i];
|
||||
mem[len] = 0;
|
||||
|
||||
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), len + 1, "string");
|
||||
|
||||
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
|
||||
SCM_SET_CELL_WORD_1 (buf, mem);
|
||||
SCM_SET_CELL_WORD_2 (buf, len);
|
||||
}
|
||||
}
|
||||
|
||||
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
|
||||
|
@ -195,6 +267,18 @@ scm_i_make_string (size_t len, char **charsp)
|
|||
return res;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_i_make_wide_string (size_t len, scm_t_wchar ** charsp)
|
||||
{
|
||||
SCM buf = make_wide_stringbuf (len);
|
||||
SCM res;
|
||||
if (charsp)
|
||||
*charsp = STRINGBUF_WIDE_CHARS (buf);
|
||||
res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
|
||||
(scm_t_bits) 0, (scm_t_bits) len);
|
||||
return res;
|
||||
}
|
||||
|
||||
static void
|
||||
validate_substring_args (SCM str, size_t start, size_t end)
|
||||
{
|
||||
|
@ -253,12 +337,24 @@ scm_i_substring_copy (SCM str, size_t start, size_t end)
|
|||
SCM buf, my_buf;
|
||||
size_t str_start;
|
||||
get_str_buf_start (&str, &buf, &str_start);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
my_buf = make_stringbuf (len);
|
||||
memcpy (STRINGBUF_CHARS (my_buf),
|
||||
STRINGBUF_CHARS (buf) + str_start + start, len);
|
||||
}
|
||||
else
|
||||
{
|
||||
my_buf = make_wide_stringbuf (len);
|
||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
|
||||
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
|
||||
+ start), len);
|
||||
/* Even though this string is wide, the substring may be narrow.
|
||||
Consider adding code to narrow string. */
|
||||
}
|
||||
scm_remember_upto_here_1 (buf);
|
||||
return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
|
||||
(scm_t_bits)0, (scm_t_bits) len);
|
||||
return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
|
||||
(scm_t_bits) 0, (scm_t_bits) len);
|
||||
}
|
||||
|
||||
SCM
|
||||
|
@ -330,17 +426,45 @@ scm_i_string_length (SCM str)
|
|||
return STRING_LENGTH (str);
|
||||
}
|
||||
|
||||
int
|
||||
scm_i_is_narrow_string (SCM str)
|
||||
{
|
||||
return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
|
||||
}
|
||||
|
||||
const char *
|
||||
scm_i_string_chars (SCM str)
|
||||
{
|
||||
SCM buf;
|
||||
size_t start;
|
||||
get_str_buf_start (&str, &buf, &start);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
return STRINGBUF_CHARS (buf) + start;
|
||||
else
|
||||
scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
|
||||
scm_list_1 (str));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *
|
||||
scm_i_string_writable_chars (SCM orig_str)
|
||||
const scm_t_wchar *
|
||||
scm_i_string_wide_chars (SCM str)
|
||||
{
|
||||
SCM buf;
|
||||
size_t start;
|
||||
|
||||
get_str_buf_start (&str, &buf, &start);
|
||||
if (!scm_i_is_narrow_string (str))
|
||||
return STRINGBUF_WIDE_CHARS (buf) + start;
|
||||
else
|
||||
scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
|
||||
scm_list_1 (str));
|
||||
}
|
||||
|
||||
/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
|
||||
a new string buffer, so that it can be modified without modifying
|
||||
other strings. */
|
||||
SCM
|
||||
scm_i_string_start_writing (SCM orig_str)
|
||||
{
|
||||
SCM buf, str = orig_str;
|
||||
size_t start;
|
||||
|
@ -352,18 +476,26 @@ scm_i_string_writable_chars (SCM orig_str)
|
|||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
||||
if (STRINGBUF_SHARED (buf))
|
||||
{
|
||||
/* Clone stringbuf. For this, we put all threads to sleep.
|
||||
*/
|
||||
|
||||
/* Clone the stringbuf. */
|
||||
size_t len = STRING_LENGTH (str);
|
||||
SCM new_buf;
|
||||
|
||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
new_buf = make_stringbuf (len);
|
||||
memcpy (STRINGBUF_CHARS (new_buf),
|
||||
STRINGBUF_CHARS (buf) + STRING_START (str), len);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
new_buf = make_wide_stringbuf (len);
|
||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
|
||||
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
|
||||
+ STRING_START (str)), len);
|
||||
}
|
||||
scm_i_thread_put_to_sleep ();
|
||||
SET_STRING_STRINGBUF (str, new_buf);
|
||||
start -= STRING_START (str);
|
||||
|
@ -374,8 +506,39 @@ scm_i_string_writable_chars (SCM orig_str)
|
|||
|
||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
||||
}
|
||||
return orig_str;
|
||||
}
|
||||
|
||||
/* Return a pointer to the chars of a string that fits in a Latin-1
|
||||
encoding. */
|
||||
char *
|
||||
scm_i_string_writable_chars (SCM str)
|
||||
{
|
||||
SCM buf;
|
||||
size_t start;
|
||||
|
||||
get_str_buf_start (&str, &buf, &start);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
return STRINGBUF_CHARS (buf) + start;
|
||||
else
|
||||
scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
|
||||
scm_list_1 (str));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Return a pointer to the Unicode codepoints of a string. */
|
||||
static scm_t_wchar *
|
||||
scm_i_string_writable_wide_chars (SCM str)
|
||||
{
|
||||
SCM buf;
|
||||
size_t start;
|
||||
|
||||
get_str_buf_start (&str, &buf, &start);
|
||||
if (!scm_i_is_narrow_string (str))
|
||||
return STRINGBUF_WIDE_CHARS (buf) + start;
|
||||
else
|
||||
scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
|
||||
scm_list_1 (str));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -384,6 +547,34 @@ scm_i_string_stop_writing (void)
|
|||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
||||
}
|
||||
|
||||
/* Return the Xth character is C. */
|
||||
scm_t_wchar
|
||||
scm_i_string_ref (SCM str, size_t x)
|
||||
{
|
||||
if (scm_i_is_narrow_string (str))
|
||||
return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
|
||||
else
|
||||
return scm_i_string_wide_chars (str)[x];
|
||||
}
|
||||
|
||||
void
|
||||
scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
|
||||
{
|
||||
if (chr > 0xFF && scm_i_is_narrow_string (str))
|
||||
widen_stringbuf (STRING_STRINGBUF (str));
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
char *dst = scm_i_string_writable_chars (str);
|
||||
dst[p] = (char) (unsigned char) chr;
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
|
||||
dst[p] = chr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Symbols.
|
||||
|
||||
Basic symbol creation and accessing is done here, the rest is in
|
||||
|
@ -418,11 +609,22 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
|
|||
else
|
||||
{
|
||||
/* make new buf. */
|
||||
if (scm_i_is_narrow_string (name))
|
||||
{
|
||||
SCM new_buf = make_stringbuf (length);
|
||||
memcpy (STRINGBUF_CHARS (new_buf),
|
||||
STRINGBUF_CHARS (buf) + start, length);
|
||||
buf = new_buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
SCM new_buf = make_wide_stringbuf (length);
|
||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
|
||||
(scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
|
||||
length);
|
||||
buf = new_buf;
|
||||
}
|
||||
}
|
||||
return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
|
||||
(scm_t_bits) hash, SCM_UNPACK (props));
|
||||
}
|
||||
|
@ -466,11 +668,40 @@ scm_c_symbol_length (SCM sym)
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
int
|
||||
scm_i_is_narrow_symbol (SCM sym)
|
||||
{
|
||||
SCM buf;
|
||||
|
||||
buf = SYMBOL_STRINGBUF (sym);
|
||||
return !STRINGBUF_WIDE (buf);
|
||||
}
|
||||
|
||||
const char *
|
||||
scm_i_symbol_chars (SCM sym)
|
||||
{
|
||||
SCM buf = SYMBOL_STRINGBUF (sym);
|
||||
SCM buf;
|
||||
|
||||
buf = SYMBOL_STRINGBUF (sym);
|
||||
if (!STRINGBUF_WIDE (buf))
|
||||
return STRINGBUF_CHARS (buf);
|
||||
else
|
||||
scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
|
||||
scm_list_1 (sym));
|
||||
}
|
||||
|
||||
/* Return a pointer to the Unicode codepoints of a symbol's name. */
|
||||
const scm_t_wchar *
|
||||
scm_i_symbol_wide_chars (SCM sym)
|
||||
{
|
||||
SCM buf;
|
||||
|
||||
buf = SYMBOL_STRINGBUF (sym);
|
||||
if (STRINGBUF_WIDE (buf))
|
||||
return STRINGBUF_WIDE_CHARS (buf);
|
||||
else
|
||||
scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
|
||||
scm_list_1 (sym));
|
||||
}
|
||||
|
||||
SCM
|
||||
|
@ -496,6 +727,15 @@ scm_i_symbol_substring (SCM sym, size_t start, size_t end)
|
|||
(scm_t_bits)start, (scm_t_bits) end - start);
|
||||
}
|
||||
|
||||
scm_t_wchar
|
||||
scm_i_symbol_ref (SCM sym, size_t x)
|
||||
{
|
||||
if (scm_i_is_narrow_symbol (sym))
|
||||
return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
|
||||
else
|
||||
return scm_i_symbol_wide_chars (sym)[x];
|
||||
}
|
||||
|
||||
/* Debugging
|
||||
*/
|
||||
|
||||
|
@ -505,15 +745,17 @@ SCM scm_sys_string_dump (SCM);
|
|||
SCM scm_sys_symbol_dump (SCM);
|
||||
SCM scm_sys_stringbuf_hist (void);
|
||||
|
||||
SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
|
||||
(SCM str),
|
||||
"")
|
||||
SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str), "")
|
||||
#define FUNC_NAME s_scm_sys_string_dump
|
||||
{
|
||||
SCM_VALIDATE_STRING (1, str);
|
||||
fprintf (stderr, "%p:\n", str);
|
||||
fprintf (stderr, " start: %u\n", STRING_START (str));
|
||||
fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
|
||||
if (scm_i_is_narrow_string (str))
|
||||
fprintf (stderr, " format: narrow\n");
|
||||
else
|
||||
fprintf (stderr, " format: wide\n");
|
||||
if (IS_SH_STRING (str))
|
||||
{
|
||||
fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
|
||||
|
@ -524,36 +766,54 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
|
|||
{
|
||||
SCM buf = STRING_STRINGBUF (str);
|
||||
fprintf (stderr, " buf: %p\n", buf);
|
||||
if (scm_i_is_narrow_string (str))
|
||||
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
|
||||
else
|
||||
fprintf (stderr, " chars: %p\n", STRINGBUF_WIDE_CHARS (buf));
|
||||
fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
|
||||
fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
|
||||
if (STRINGBUF_SHARED (buf))
|
||||
fprintf (stderr, " shared: true\n");
|
||||
else
|
||||
fprintf (stderr, " shared: false\n");
|
||||
if (STRINGBUF_INLINE (buf))
|
||||
fprintf (stderr, " inline: true\n");
|
||||
else
|
||||
fprintf (stderr, " inline: false\n");
|
||||
|
||||
}
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
|
||||
(SCM sym),
|
||||
"")
|
||||
SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym), "")
|
||||
#define FUNC_NAME s_scm_sys_symbol_dump
|
||||
{
|
||||
SCM_VALIDATE_SYMBOL (1, sym);
|
||||
fprintf (stderr, "%p:\n", sym);
|
||||
fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
|
||||
if (scm_i_is_narrow_symbol (sym))
|
||||
fprintf (stderr, " format: narrow\n");
|
||||
else
|
||||
fprintf (stderr, " format: wide\n");
|
||||
{
|
||||
SCM buf = SYMBOL_STRINGBUF (sym);
|
||||
fprintf (stderr, " buf: %p\n", buf);
|
||||
if (scm_i_is_narrow_symbol (sym))
|
||||
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
|
||||
else
|
||||
fprintf (stderr, " chars: %p\n", STRINGBUF_WIDE_CHARS (buf));
|
||||
fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
|
||||
fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
|
||||
if (STRINGBUF_SHARED (buf))
|
||||
fprintf (stderr, " shared: true\n");
|
||||
else
|
||||
fprintf (stderr, " shared: false\n");
|
||||
|
||||
}
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
|
||||
(void),
|
||||
"")
|
||||
SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
|
||||
#define FUNC_NAME s_scm_sys_stringbuf_hist
|
||||
{
|
||||
int i;
|
||||
|
@ -589,29 +849,46 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1,
|
|||
#define FUNC_NAME s_scm_string
|
||||
{
|
||||
SCM result;
|
||||
SCM rest;
|
||||
size_t len;
|
||||
char *data;
|
||||
size_t p = 0;
|
||||
long i;
|
||||
|
||||
/* Verify that this is a list of chars. */
|
||||
i = scm_ilength (chrs);
|
||||
len = (size_t) i;
|
||||
rest = chrs;
|
||||
|
||||
SCM_ASSERT (len >= 0, chrs, SCM_ARG1, FUNC_NAME);
|
||||
while (len > 0 && scm_is_pair (rest))
|
||||
{
|
||||
long i = scm_ilength (chrs);
|
||||
|
||||
SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
|
||||
len = i;
|
||||
}
|
||||
|
||||
result = scm_i_make_string (len, &data);
|
||||
while (len > 0 && scm_is_pair (chrs))
|
||||
{
|
||||
SCM elt = SCM_CAR (chrs);
|
||||
|
||||
SCM elt = SCM_CAR (rest);
|
||||
SCM_VALIDATE_CHAR (SCM_ARGn, elt);
|
||||
*data++ = SCM_CHAR (elt);
|
||||
chrs = SCM_CDR (chrs);
|
||||
rest = SCM_CDR (rest);
|
||||
len--;
|
||||
scm_remember_upto_here_1 (elt);
|
||||
}
|
||||
|
||||
/* Construct a string containing this list of chars. */
|
||||
len = (size_t) i;
|
||||
rest = chrs;
|
||||
|
||||
result = scm_i_make_string (len, NULL);
|
||||
result = scm_i_string_start_writing (result);
|
||||
while (len > 0 && scm_is_pair (rest))
|
||||
{
|
||||
SCM elt = SCM_CAR (rest);
|
||||
scm_i_string_set_x (result, p, SCM_CHAR (elt));
|
||||
p++;
|
||||
rest = SCM_CDR (rest);
|
||||
len--;
|
||||
scm_remember_upto_here_1 (elt);
|
||||
}
|
||||
scm_i_string_stop_writing ();
|
||||
|
||||
if (len > 0)
|
||||
scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
|
||||
if (!scm_is_null (chrs))
|
||||
if (!scm_is_null (rest))
|
||||
scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
|
||||
|
||||
return result;
|
||||
|
@ -634,13 +911,16 @@ SCM
|
|||
scm_c_make_string (size_t len, SCM chr)
|
||||
#define FUNC_NAME NULL
|
||||
{
|
||||
char *dst;
|
||||
SCM res = scm_i_make_string (len, &dst);
|
||||
size_t p;
|
||||
SCM res = scm_i_make_string (len, NULL);
|
||||
|
||||
if (!SCM_UNBNDP (chr))
|
||||
{
|
||||
SCM_VALIDATE_CHAR (0, chr);
|
||||
memset (dst, SCM_CHAR (chr), len);
|
||||
res = scm_i_string_start_writing (res);
|
||||
for (p = 0; p < len; p++)
|
||||
scm_i_string_set_x (res, p, SCM_CHAR (chr));
|
||||
scm_i_string_stop_writing ();
|
||||
}
|
||||
|
||||
return res;
|
||||
|
@ -657,6 +937,20 @@ SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_string_width, "string-width", 1, 0, 0,
|
||||
(SCM string),
|
||||
"Return the bytes used to represent a character in @var{string}."
|
||||
"This will return 1 or 4.")
|
||||
#define FUNC_NAME s_scm_string_width
|
||||
{
|
||||
SCM_VALIDATE_STRING (1, string);
|
||||
if (!scm_i_is_narrow_string (string))
|
||||
return scm_from_int (4);
|
||||
|
||||
return scm_from_int (1);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
size_t
|
||||
scm_c_string_length (SCM string)
|
||||
{
|
||||
|
@ -682,7 +976,10 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
|
|||
else
|
||||
scm_out_of_range (NULL, k);
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
|
||||
else
|
||||
return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -691,7 +988,11 @@ scm_c_string_ref (SCM str, size_t p)
|
|||
{
|
||||
if (p >= scm_i_string_length (str))
|
||||
scm_out_of_range (NULL, scm_from_size_t (p));
|
||||
if (scm_i_is_narrow_string (str))
|
||||
return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
|
||||
else
|
||||
return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
|
||||
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
|
||||
|
@ -713,11 +1014,10 @@ SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
|
|||
scm_out_of_range (NULL, k);
|
||||
|
||||
SCM_VALIDATE_CHAR (3, chr);
|
||||
{
|
||||
char *dst = scm_i_string_writable_chars (str);
|
||||
dst[idx] = SCM_CHAR (chr);
|
||||
str = scm_i_string_start_writing (str);
|
||||
scm_i_string_set_x (str, idx, SCM_CHAR (chr));
|
||||
scm_i_string_stop_writing ();
|
||||
}
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
@ -727,11 +1027,9 @@ scm_c_string_set_x (SCM str, size_t p, SCM chr)
|
|||
{
|
||||
if (p >= scm_i_string_length (str))
|
||||
scm_out_of_range (NULL, scm_from_size_t (p));
|
||||
{
|
||||
char *dst = scm_i_string_writable_chars (str);
|
||||
dst[p] = SCM_CHAR (chr);
|
||||
str = scm_i_string_start_writing (str);
|
||||
scm_i_string_set_x (str, p, SCM_CHAR (chr));
|
||||
scm_i_string_stop_writing ();
|
||||
}
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
|
||||
|
@ -837,26 +1135,50 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
|
|||
#define FUNC_NAME s_scm_string_append
|
||||
{
|
||||
SCM res;
|
||||
size_t i = 0;
|
||||
size_t len = 0;
|
||||
int wide = 0;
|
||||
SCM l, s;
|
||||
char *data;
|
||||
scm_t_wchar *wdata;
|
||||
int i;
|
||||
|
||||
SCM_VALIDATE_REST_ARGUMENT (args);
|
||||
for (l = args; !scm_is_null (l); l = SCM_CDR (l))
|
||||
{
|
||||
s = SCM_CAR (l);
|
||||
SCM_VALIDATE_STRING (SCM_ARGn, s);
|
||||
i += scm_i_string_length (s);
|
||||
len += scm_i_string_length (s);
|
||||
if (!scm_i_is_narrow_string (s))
|
||||
wide = 1;
|
||||
}
|
||||
res = scm_i_make_string (i, &data);
|
||||
if (!wide)
|
||||
res = scm_i_make_string (len, &data);
|
||||
else
|
||||
res = scm_i_make_wide_string (len, &wdata);
|
||||
|
||||
for (l = args; !scm_is_null (l); l = SCM_CDR (l))
|
||||
{
|
||||
size_t len;
|
||||
s = SCM_CAR (l);
|
||||
SCM_VALIDATE_STRING (SCM_ARGn, s);
|
||||
len = scm_i_string_length (s);
|
||||
if (!wide)
|
||||
{
|
||||
memcpy (data, scm_i_string_chars (s), len);
|
||||
data += len;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (scm_i_is_narrow_string (s))
|
||||
{
|
||||
for (i = 0; i < scm_i_string_length (s); i++)
|
||||
wdata[i] = (unsigned char) scm_i_string_chars (s)[i];
|
||||
}
|
||||
else
|
||||
u32_cpy ((scm_t_uint32 *) wdata,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (s), len);
|
||||
wdata += len;
|
||||
}
|
||||
scm_remember_upto_here_1 (s);
|
||||
}
|
||||
return res;
|
||||
|
@ -875,8 +1197,11 @@ scm_from_locale_stringn (const char *str, size_t len)
|
|||
SCM res;
|
||||
char *dst;
|
||||
|
||||
if (len == (size_t)-1)
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
if (len == 0)
|
||||
return scm_nullstr;
|
||||
|
||||
res = scm_i_make_string (len, &dst);
|
||||
memcpy (dst, str, len);
|
||||
return res;
|
||||
|
@ -885,6 +1210,9 @@ scm_from_locale_stringn (const char *str, size_t len)
|
|||
SCM
|
||||
scm_from_locale_string (const char *str)
|
||||
{
|
||||
if (str == NULL)
|
||||
return scm_nullstr;
|
||||
|
||||
return scm_from_locale_stringn (str, -1);
|
||||
}
|
||||
|
||||
|
@ -893,21 +1221,20 @@ scm_take_locale_stringn (char *str, size_t len)
|
|||
{
|
||||
SCM buf, res;
|
||||
|
||||
if (len == (size_t)-1)
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
else
|
||||
{
|
||||
/* Ensure STR is null terminated. A realloc for 1 extra byte should
|
||||
often be satisfied from the alignment padding after the block, with
|
||||
no actual data movement. */
|
||||
str = scm_realloc (str, len+1);
|
||||
str = scm_realloc (str, len + 1);
|
||||
str[len] = '\0';
|
||||
}
|
||||
|
||||
buf = scm_i_take_stringbufn (str, len);
|
||||
res = scm_double_cell (STRING_TAG,
|
||||
SCM_UNPACK (buf),
|
||||
(scm_t_bits) 0, (scm_t_bits) len);
|
||||
SCM_UNPACK (buf), (scm_t_bits) 0, (scm_t_bits) len);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -917,33 +1244,143 @@ scm_take_locale_string (char *str)
|
|||
return scm_take_locale_stringn (str, -1);
|
||||
}
|
||||
|
||||
char *
|
||||
scm_to_locale_stringn (SCM str, size_t *lenp)
|
||||
/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
|
||||
and \UXXXXXX. */
|
||||
static void
|
||||
unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
|
||||
{
|
||||
char *res;
|
||||
size_t len;
|
||||
char *before, *after;
|
||||
size_t i, j;
|
||||
|
||||
before = *bufp;
|
||||
after = *bufp;
|
||||
i = 0;
|
||||
j = 0;
|
||||
while (i < *lenp)
|
||||
{
|
||||
if ((i <= *lenp - 6)
|
||||
&& before[i] == '\\'
|
||||
&& before[i + 1] == 'u'
|
||||
&& before[i + 2] == '0' && before[i + 3] == '0')
|
||||
{
|
||||
/* Convert \u00NN to \xNN */
|
||||
after[j] = '\\';
|
||||
after[j + 1] = 'x';
|
||||
after[j + 2] = tolower (before[i + 4]);
|
||||
after[j + 3] = tolower (before[i + 5]);
|
||||
i += 6;
|
||||
j += 4;
|
||||
}
|
||||
else if ((i <= *lenp - 10)
|
||||
&& before[i] == '\\'
|
||||
&& before[i + 1] == 'U'
|
||||
&& before[i + 2] == '0' && before[i + 3] == '0')
|
||||
{
|
||||
/* Convert \U00NNNNNN to \UNNNNNN */
|
||||
after[j] = '\\';
|
||||
after[j + 1] = 'U';
|
||||
after[j + 2] = tolower (before[i + 4]);
|
||||
after[j + 3] = tolower (before[i + 5]);
|
||||
after[j + 4] = tolower (before[i + 6]);
|
||||
after[j + 5] = tolower (before[i + 7]);
|
||||
after[j + 6] = tolower (before[i + 8]);
|
||||
after[j + 7] = tolower (before[i + 9]);
|
||||
i += 10;
|
||||
j += 8;
|
||||
}
|
||||
else
|
||||
{
|
||||
after[j] = before[i];
|
||||
i++;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
*lenp = j;
|
||||
after = scm_realloc (after, j);
|
||||
}
|
||||
|
||||
char *
|
||||
scm_to_locale_stringn (SCM str, size_t * lenp)
|
||||
{
|
||||
const char *enc;
|
||||
|
||||
/* In the future, enc will hold the port's encoding. */
|
||||
enc = NULL;
|
||||
|
||||
return scm_to_stringn (str, lenp, enc, iconveh_escape_sequence);
|
||||
}
|
||||
|
||||
/* Low-level scheme to C string conversion function. */
|
||||
char *
|
||||
scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
||||
enum iconv_ilseq_handler handler)
|
||||
{
|
||||
static const char iso[11] = "ISO-8859-1";
|
||||
char *buf;
|
||||
size_t ilen, len, i;
|
||||
|
||||
if (!scm_is_string (str))
|
||||
scm_wrong_type_arg_msg (NULL, 0, str, "string");
|
||||
len = scm_i_string_length (str);
|
||||
res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
|
||||
memcpy (res, scm_i_string_chars (str), len);
|
||||
ilen = scm_i_string_length (str);
|
||||
|
||||
if (ilen == 0)
|
||||
{
|
||||
buf = scm_malloc (1);
|
||||
buf[0] = '\0';
|
||||
if (lenp)
|
||||
*lenp = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
if (lenp == NULL)
|
||||
{
|
||||
res[len] = '\0';
|
||||
if (strlen (res) != len)
|
||||
{
|
||||
free (res);
|
||||
for (i = 0; i < ilen; i++)
|
||||
if (scm_i_string_ref (str, i) == '\0')
|
||||
scm_misc_error (NULL,
|
||||
"string contains #\\nul character: ~S",
|
||||
scm_list_1 (str));
|
||||
}
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
if (lenp)
|
||||
{
|
||||
buf = scm_malloc (ilen);
|
||||
memcpy (buf, scm_i_string_chars (str), ilen);
|
||||
*lenp = ilen;
|
||||
return buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = scm_malloc (ilen + 1);
|
||||
memcpy (buf, scm_i_string_chars (str), ilen);
|
||||
buf[ilen] = '\0';
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
buf = NULL;
|
||||
len = 0;
|
||||
buf = u32_conv_to_encoding (iso,
|
||||
handler,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (str),
|
||||
ilen, NULL, NULL, &len);
|
||||
if (buf == NULL)
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (iso), str));
|
||||
|
||||
if (handler == iconveh_escape_sequence)
|
||||
unistring_escapes_to_guile_escapes (&buf, &len);
|
||||
|
||||
if (lenp)
|
||||
*lenp = len;
|
||||
else
|
||||
{
|
||||
buf = scm_realloc (buf, len + 1);
|
||||
buf[len] = '\0';
|
||||
}
|
||||
|
||||
scm_remember_upto_here_1 (str);
|
||||
return res;
|
||||
return buf;
|
||||
}
|
||||
|
||||
char *
|
||||
|
@ -956,11 +1393,14 @@ size_t
|
|||
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
|
||||
{
|
||||
size_t len;
|
||||
|
||||
char *result = NULL;
|
||||
if (!scm_is_string (str))
|
||||
scm_wrong_type_arg_msg (NULL, 0, str, "string");
|
||||
len = scm_i_string_length (str);
|
||||
memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
|
||||
result = scm_to_locale_stringn (str, &len);
|
||||
|
||||
memcpy (buf, result, (len > max_len) ? max_len : len);
|
||||
free (result);
|
||||
|
||||
scm_remember_upto_here_1 (str);
|
||||
return len;
|
||||
}
|
||||
|
@ -1081,6 +1521,7 @@ scm_i_deprecated_string_chars (SCM str)
|
|||
|
||||
/* The following is still wrong, of course...
|
||||
*/
|
||||
str = scm_i_string_start_writing (str);
|
||||
chars = scm_i_string_writable_chars (str);
|
||||
scm_i_string_stop_writing ();
|
||||
return chars;
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
|
||||
|
||||
#include <uniconv.h>
|
||||
#include "libguile/__scm.h"
|
||||
|
||||
|
||||
|
@ -46,26 +47,37 @@
|
|||
|
||||
Internal, low level interface to the character arrays
|
||||
|
||||
- Use scm_i_string_chars to get a pointer to the byte array of a
|
||||
string for reading. Use scm_i_string_length to get the number of
|
||||
bytes in that array. The array is not null-terminated.
|
||||
- Use scm_is_narrow_string to determine is the string is narrow or
|
||||
wide.
|
||||
|
||||
- Use scm_i_string_chars or scm_i_string_wide_chars to get a
|
||||
pointer to the byte or scm_t_wchar array of a string for reading.
|
||||
Use scm_i_string_length to get the number of characters in that
|
||||
array. The array is not null-terminated.
|
||||
|
||||
- The array is valid as long as the corresponding SCM object is
|
||||
protected but only until the next SCM_TICK. During such a 'safe
|
||||
point', strings might change their representation.
|
||||
|
||||
- Use scm_i_string_writable_chars to get the same pointer as with
|
||||
scm_i_string_chars, but for reading and writing. This is a
|
||||
potentially costly operation since it implements the
|
||||
copy-on-write behavior. When done with the writing, call
|
||||
scm_i_string_stop_writing. You must do this before the next
|
||||
SCM_TICK. (This means, before calling almost any other scm_
|
||||
function and you can't allow throws, of course.)
|
||||
- Use scm_i_string_start_writing to get a version of the string
|
||||
ready for reading and writing. This is a potentially costly
|
||||
operation since it implements the copy-on-write behavior. When
|
||||
done with the writing, call scm_i_string_stop_writing. You must
|
||||
do this before the next SCM_TICK. (This means, before calling
|
||||
almost any other scm_ function and you can't allow throws, of
|
||||
course.)
|
||||
|
||||
- New strings can be created with scm_i_make_string. This gives
|
||||
access to a writable pointer that remains valid as long as nobody
|
||||
else makes a copy-on-write substring of the string. Do not call
|
||||
scm_i_string_stop_writing for this pointer.
|
||||
- New strings can be created with scm_i_make_string or
|
||||
scm_i_make_wide_string. This gives access to a writable pointer
|
||||
that remains valid as long as nobody else makes a copy-on-write
|
||||
substring of the string. Do not call scm_i_string_stop_writing
|
||||
for this pointer.
|
||||
|
||||
- Alternately, scm_i_string_ref and scm_i_string_set_x can be used
|
||||
to read and write strings without worrying about whether the
|
||||
string is narrow or wide. scm_i_string_set_x still needs to be
|
||||
bracketed by scm_i_string_start_writing and
|
||||
scm_i_string_stop_writing.
|
||||
|
||||
Legacy interface
|
||||
|
||||
|
@ -74,13 +86,15 @@
|
|||
- SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately
|
||||
calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH
|
||||
is the same as scm_i_string_length. SCM_STRING_CHARS will throw
|
||||
an error for for strings that are not null-terminated.
|
||||
an error for for strings that are not null-terminated. There is
|
||||
no wide version of this interface.
|
||||
*/
|
||||
|
||||
SCM_API SCM scm_string_p (SCM x);
|
||||
SCM_API SCM scm_string (SCM chrs);
|
||||
SCM_API SCM scm_make_string (SCM k, SCM chr);
|
||||
SCM_API SCM scm_string_length (SCM str);
|
||||
SCM_API SCM scm_string_width (SCM str);
|
||||
SCM_API SCM scm_string_ref (SCM str, SCM k);
|
||||
SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
|
||||
SCM_API SCM scm_substring (SCM str, SCM start, SCM end);
|
||||
|
@ -106,6 +120,9 @@ SCM_API SCM scm_take_locale_string (char *str);
|
|||
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
|
||||
SCM_API char *scm_to_locale_string (SCM str);
|
||||
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
|
||||
SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
|
||||
const char *encoding,
|
||||
enum iconv_ilseq_handler handler);
|
||||
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
|
||||
|
||||
SCM_API SCM scm_makfromstrs (int argc, char **argv);
|
||||
|
@ -113,15 +130,20 @@ SCM_API SCM scm_makfromstrs (int argc, char **argv);
|
|||
/* internal accessor functions. Arguments must be valid. */
|
||||
|
||||
SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap);
|
||||
SCM_INTERNAL SCM scm_i_make_wide_string (size_t len, scm_t_wchar **datap);
|
||||
SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end);
|
||||
SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end);
|
||||
SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end);
|
||||
SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end);
|
||||
SCM_INTERNAL size_t scm_i_string_length (SCM str);
|
||||
SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str);
|
||||
SCM_API const scm_t_wchar *scm_i_string_wide_chars (SCM str);
|
||||
SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str);
|
||||
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
|
||||
SCM_INTERNAL void scm_i_string_stop_writing (void);
|
||||
|
||||
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
|
||||
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
|
||||
/* internal functions related to symbols. */
|
||||
|
||||
SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
|
||||
|
@ -133,8 +155,11 @@ SCM_INTERNAL SCM
|
|||
scm_i_c_take_symbol (char *name, size_t len,
|
||||
scm_t_bits flags, unsigned long hash, SCM props);
|
||||
SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
|
||||
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
|
||||
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
|
||||
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
|
||||
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
|
||||
|
||||
/* internal GC functions. */
|
||||
|
||||
|
|
|
@ -336,6 +336,7 @@ do { \
|
|||
|
||||
#define FETCH() (*ip++)
|
||||
#define FETCH_LENGTH(len) do { len=*ip++; len<<=8; len+=*ip++; len<<=8; len+=*ip++; } while (0)
|
||||
#define FETCH_WIDTH(width) do { width=*ip++; } while (0)
|
||||
|
||||
#undef CLOCK
|
||||
#if VM_USE_CLOCK
|
||||
|
|
|
@ -72,31 +72,82 @@ VM_DEFINE_LOADER (82, load_number, "load-number")
|
|||
VM_DEFINE_LOADER (83, load_string, "load-string")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_locale_stringn ((char *)ip, len));
|
||||
/* Was: scm_makfromstr (ip, len, 0) */
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL);
|
||||
PUSH (str);
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (84, load_symbol, "load-symbol")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_locale_symboln ((char *)ip, len));
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL);
|
||||
PUSH (scm_string_to_symbol (str));
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_LOADER (85, load_keyword, "load-keyword")
|
||||
{
|
||||
size_t len;
|
||||
int width;
|
||||
SCM str;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_from_locale_keywordn ((char *)ip, len));
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL);
|
||||
PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str)));
|
||||
ip += len * width;
|
||||
NEXT;
|
||||
}
|
||||
|
||||
|
@ -132,13 +183,29 @@ VM_DEFINE_INSTRUCTION (87, link_now, "link-now", 0, 1, 1)
|
|||
|
||||
VM_DEFINE_LOADER (88, define, "define")
|
||||
{
|
||||
SCM sym;
|
||||
SCM str, sym;
|
||||
size_t len;
|
||||
|
||||
int width;
|
||||
FETCH_LENGTH (len);
|
||||
FETCH_WIDTH (width);
|
||||
SYNC_REGISTER ();
|
||||
sym = scm_from_locale_symboln ((char *)ip, len);
|
||||
ip += len;
|
||||
if (width == 1)
|
||||
{
|
||||
char *buf;
|
||||
str = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, (char *) ip, len);
|
||||
}
|
||||
else if (width == 4)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
str = scm_i_make_wide_string (len, &wbuf);
|
||||
memcpy ((char *) wbuf, (char *) ip, len * width);
|
||||
}
|
||||
else
|
||||
SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL);
|
||||
sym = scm_string_to_symbol (str);
|
||||
ip += len * width;
|
||||
|
||||
SYNC_REGISTER ();
|
||||
PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T));
|
||||
|
|
|
@ -34,6 +34,10 @@
|
|||
;; lengths are encoded in 3 bytes
|
||||
(define *len-len* 3)
|
||||
|
||||
;; the number of bytes per string character is encoded in 1 byte
|
||||
(define *width-len* 1)
|
||||
|
||||
|
||||
(define (byte-length assembly)
|
||||
(pmatch assembly
|
||||
(,label (guard (not (pair? label)))
|
||||
|
@ -45,15 +49,15 @@
|
|||
((load-number ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
((load-string ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
((load-symbol ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
((load-keyword ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
((load-array ,bv)
|
||||
(+ 1 *len-len* (bytevector-length bv)))
|
||||
((define ,str)
|
||||
(+ 1 *len-len* (string-length str)))
|
||||
(+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
|
||||
((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code)
|
||||
(+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
|
||||
((,inst . _) (guard (>= (instruction-length inst) 0))
|
||||
|
|
|
@ -65,6 +65,12 @@
|
|||
(write-byte (logand (ash x -8) 255))
|
||||
(write-byte (logand (ash x -16) 255))
|
||||
(write-byte (logand (ash x -24) 255)))
|
||||
(define (write-uint32 x) (case byte-order
|
||||
((1234) (write-uint32-le x))
|
||||
((4321) (write-uint32-be x))
|
||||
(else (error "unknown endianness" byte-order))))
|
||||
(define (write-wide-string s)
|
||||
(string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
|
||||
(define (write-loader-len len)
|
||||
(write-byte (ash len -16))
|
||||
(write-byte (logand (ash len -8) 255))
|
||||
|
@ -72,6 +78,14 @@
|
|||
(define (write-loader str)
|
||||
(write-loader-len (string-length str))
|
||||
(write-string str))
|
||||
(define (write-sized-loader str)
|
||||
(let ((len (string-length str))
|
||||
(wid (string-width str)))
|
||||
(write-loader-len len)
|
||||
(write-byte wid)
|
||||
(if (= wid 4)
|
||||
(write-wide-string str)
|
||||
(write-string str))))
|
||||
(define (write-bytevector bv)
|
||||
(write-loader-len (bytevector-length bv))
|
||||
;; Ew!
|
||||
|
@ -89,10 +103,6 @@
|
|||
(write-uint16 (case byte-order
|
||||
((1234) write-uint16-le)
|
||||
((4321) write-uint16-be)
|
||||
(else (error "unknown endianness" byte-order))))
|
||||
(write-uint32 (case byte-order
|
||||
((1234) write-uint32-le)
|
||||
((4321) write-uint32-be)
|
||||
(else (error "unknown endianness" byte-order)))))
|
||||
(let ((opcode (instruction->opcode inst))
|
||||
(len (instruction-length inst)))
|
||||
|
@ -126,11 +136,11 @@
|
|||
((load-unsigned-integer ,str) (write-loader str))
|
||||
((load-integer ,str) (write-loader str))
|
||||
((load-number ,str) (write-loader str))
|
||||
((load-string ,str) (write-loader str))
|
||||
((load-symbol ,str) (write-loader str))
|
||||
((load-keyword ,str) (write-loader str))
|
||||
((load-string ,str) (write-sized-loader str))
|
||||
((load-symbol ,str) (write-sized-loader str))
|
||||
((load-keyword ,str) (write-sized-loader str))
|
||||
((load-array ,bv) (write-bytevector bv))
|
||||
((define ,str) (write-loader str))
|
||||
((define ,str) (write-sized-loader str))
|
||||
((br ,l) (write-break l))
|
||||
((br-if ,l) (write-break l))
|
||||
((br-if-not ,l) (write-break l))
|
||||
|
|
|
@ -79,15 +79,15 @@
|
|||
(char->integer #\1) (char->integer #\4)))
|
||||
|
||||
(comp-test '(load-string "foo")
|
||||
(vector 'load-string 0 0 3 (char->integer #\f) (char->integer #\o)
|
||||
(vector 'load-string 0 0 3 1 (char->integer #\f) (char->integer #\o)
|
||||
(char->integer #\o)))
|
||||
|
||||
(comp-test '(load-symbol "foo")
|
||||
(vector 'load-symbol 0 0 3 (char->integer #\f) (char->integer #\o)
|
||||
(vector 'load-symbol 0 0 3 1 (char->integer #\f) (char->integer #\o)
|
||||
(char->integer #\o)))
|
||||
|
||||
(comp-test '(load-keyword "qux")
|
||||
(vector 'load-keyword 0 0 3 (char->integer #\q) (char->integer #\u)
|
||||
(vector 'load-keyword 0 0 3 1 (char->integer #\q) (char->integer #\u)
|
||||
(char->integer #\x)))
|
||||
|
||||
(comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue