1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-06 04:00:26 +02:00

Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests
This commit is contained in:
Michael Gran 2009-08-08 02:35:00 -07:00
parent a876e7dcea
commit 9c44cd4559
15 changed files with 1046 additions and 306 deletions

View file

@ -969,7 +969,35 @@ scm_fill_input (SCM port)
* This function differs from scm_c_write; it updates port line and * This function differs from scm_c_write; it updates port line and
* column. */ * column. */
void static void
update_port_lf (scm_t_wchar c, SCM port)
{
if (c == '\a')
{
}
else if (c == '\b')
{
SCM_DECCOL (port);
}
else if (c == '\n')
{
SCM_INCLINE (port);
}
else if (c == '\r')
{
SCM_ZEROCOL (port);
}
else if (c == '\t')
{
SCM_TABCOL (port);
}
else
{
SCM_INCCOL (port);
}
}
void
scm_lfwrite (const char *ptr, size_t size, SCM port) scm_lfwrite (const char *ptr, size_t size, SCM port)
{ {
scm_t_port *pt = SCM_PTAB_ENTRY (port); scm_t_port *pt = SCM_PTAB_ENTRY (port);
@ -980,30 +1008,54 @@ scm_lfwrite (const char *ptr, size_t size, SCM port)
ptob->write (port, ptr, size); ptob->write (port, ptr, size);
for (; size; ptr++, size--) { for (; size; ptr++, size--)
if (*ptr == '\a') { update_port_lf ((scm_t_wchar) (unsigned char) *ptr, port);
}
else if (*ptr == '\b') {
SCM_DECCOL(port);
}
else if (*ptr == '\n') {
SCM_INCLINE(port);
}
else if (*ptr == '\r') {
SCM_ZEROCOL(port);
}
else if (*ptr == '\t') {
SCM_TABCOL(port);
}
else {
SCM_INCCOL(port);
}
}
if (pt->rw_random) if (pt->rw_random)
pt->rw_active = SCM_PORT_WRITE; pt->rw_active = SCM_PORT_WRITE;
} }
/* Write a scheme string STR to PORT from START inclusive to END
exclusive. */
void
scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
{
size_t i, size = scm_i_string_length (str);
scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_ptob_descriptor *ptob = &scm_ptobs[SCM_PTOBNUM (port)];
scm_t_wchar p;
char *buf;
size_t len;
if (pt->rw_active == SCM_PORT_READ)
scm_end_input (port);
if (end == -1)
end = size;
size = end - start;
buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
NULL, iconveh_escape_sequence);
ptob->write (port, buf, len);
free (buf);
for (i = 0; i < size; i++)
{
p = scm_i_string_ref (str, i + start);
update_port_lf (p, port);
}
if (pt->rw_random)
pt->rw_active = SCM_PORT_WRITE;
}
/* Write a scheme string STR to PORT. */
void
scm_lfwrite_str (SCM str, SCM port)
{
scm_lfwrite_substr (str, 0, -1, port);
}
/* scm_c_read /* scm_c_read
* *
* Used by an application to read arbitrary number of bytes from an * Used by an application to read arbitrary number of bytes from an

View file

@ -269,6 +269,9 @@ SCM_API SCM scm_read_char (SCM port);
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size); SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
SCM_API void scm_c_write (SCM port, const void *buffer, size_t size); SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port); SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
SCM_INTERNAL void scm_lfwrite_str (SCM str, SCM port);
SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
SCM port);
SCM_API void scm_flush (SCM port); SCM_API void scm_flush (SCM port);
SCM_API void scm_end_input (SCM port); SCM_API void scm_end_input (SCM port);
SCM_API int scm_fill_input (SCM port); SCM_API int scm_fill_input (SCM port);

View file

@ -559,55 +559,113 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
break; break;
} }
break; break;
case scm_tc7_string: case scm_tc7_string:
if (SCM_WRITINGP (pstate)) if (SCM_WRITINGP (pstate))
{ {
size_t i, j, len; size_t i, j, len;
const char *data; static char const hex[] = "0123456789abcdef";
char buf[8];
scm_putc ('"', port);
len = scm_i_string_length (exp);
data = scm_i_string_chars (exp);
for (i = 0, j = 0; i < len; ++i)
{
unsigned char ch = data[i];
if ((ch < 32 && ch != '\n') || (127 <= ch && ch < 148))
{
static char const hex[]="0123456789abcdef";
char buf[4];
scm_lfwrite (data+j, i-j, port); scm_putc ('"', port);
buf[0] = '\\'; len = scm_i_string_length (exp);
buf[1] = 'x'; for (i = 0; i < len; ++i)
buf[2] = hex [ch / 16]; {
buf[3] = hex [ch % 16]; scm_t_wchar ch = scm_i_string_ref (exp, i);
scm_lfwrite (buf, 4, port); int printed = 0;
data = scm_i_string_chars (exp);
j = i+1; if (ch == ' ' || ch == '\n')
} {
else if (ch == '"' || ch == '\\') scm_putc (ch, port);
{ printed = 1;
scm_lfwrite (data+j, i-j, port); }
scm_putc ('\\', port); else if (ch == '"' || ch == '\\')
data = scm_i_string_chars (exp); {
j = i; scm_putc ('\\', port);
} scm_charprint (ch, port);
} printed = 1;
scm_lfwrite (data+j, i-j, port); }
scm_putc ('"', port); else
scm_remember_upto_here_1 (exp); if (uc_is_general_category_withtable
} (ch,
else UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M |
scm_lfwrite (scm_i_string_chars (exp), scm_i_string_length (exp), UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P |
port); UC_CATEGORY_MASK_S))
scm_remember_upto_here_1 (exp); {
break; /* Print the character since it is a graphic
character. */
scm_t_wchar *wbuf;
SCM wstr = scm_i_make_wide_string (1, &wbuf);
char *buf;
size_t len;
wbuf[0] = ch;
buf = u32_conv_to_encoding ("ISO-8859-1",
iconveh_error,
(scm_t_uint32 *) wbuf,
1, NULL, NULL, &len);
if (buf != NULL)
{
/* Character is graphic and representable in
this encoding. Print it. */
scm_lfwrite_str (wstr, port);
free (buf);
printed = 1;
}
}
if (!printed)
{
/* Character is graphic but unrepresentable in
this port's encoding or is not graphic. */
if (ch <= 0xFF)
{
buf[0] = '\\';
buf[1] = 'x';
buf[2] = hex[ch / 16];
buf[3] = hex[ch % 16];
scm_lfwrite (buf, 4, port);
}
else if (ch <= 0xFFFF)
{
buf[0] = '\\';
buf[1] = 'u';
buf[2] = hex[(ch & 0xF000) >> 12];
buf[3] = hex[(ch & 0xF00) >> 8];
buf[4] = hex[(ch & 0xF0) >> 4];
buf[5] = hex[(ch & 0xF)];
scm_lfwrite (buf, 6, port);
j = i + 1;
}
else if (ch > 0xFFFF)
{
buf[0] = '\\';
buf[1] = 'U';
buf[2] = hex[(ch & 0xF00000) >> 20];
buf[3] = hex[(ch & 0xF0000) >> 16];
buf[4] = hex[(ch & 0xF000) >> 12];
buf[5] = hex[(ch & 0xF00) >> 8];
buf[6] = hex[(ch & 0xF0) >> 4];
buf[7] = hex[(ch & 0xF)];
scm_lfwrite (buf, 8, port);
j = i + 1;
}
}
}
scm_putc ('"', port);
scm_remember_upto_here_1 (exp);
}
else
scm_lfwrite (scm_i_string_chars (exp), scm_i_string_length (exp),
port);
scm_remember_upto_here_1 (exp);
break;
case scm_tc7_symbol: case scm_tc7_symbol:
if (scm_i_symbol_is_interned (exp)) if (scm_i_symbol_is_interned (exp))
{ {
scm_print_symbol_name (scm_i_symbol_chars (exp), scm_print_symbol_name (scm_i_symbol_chars (exp),
scm_i_symbol_length (exp), scm_i_symbol_length (exp), port);
port);
scm_remember_upto_here_1 (exp); scm_remember_upto_here_1 (exp);
} }
else else
@ -763,6 +821,17 @@ scm_prin1 (SCM exp, SCM port, int writingp)
} }
} }
/* Print a character.
*/
void
scm_charprint (scm_t_uint32 ch, SCM port)
{
scm_t_wchar *wbuf;
SCM wstr = scm_i_make_wide_string (1, &wbuf);
wbuf[0] = ch;
scm_lfwrite_str (wstr, port);
}
/* Print an integer. /* Print an integer.
*/ */

View file

@ -77,6 +77,7 @@ SCM_API SCM scm_print_options (SCM setting);
SCM_API SCM scm_make_print_state (void); SCM_API SCM scm_make_print_state (void);
SCM_API void scm_free_print_state (SCM print_state); SCM_API void scm_free_print_state (SCM print_state);
SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state); SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state);
SCM_API void scm_charprint (scm_t_uint32 c, SCM port);
SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port); SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port); SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port); SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);

View file

@ -387,110 +387,167 @@ scm_read_string (int chr, SCM port)
object (the string returned). */ object (the string returned). */
SCM str = SCM_BOOL_F; SCM str = SCM_BOOL_F;
char c_str[READER_STRING_BUFFER_SIZE];
unsigned c_str_len = 0; unsigned c_str_len = 0;
int c; scm_t_wchar c;
str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
while ('"' != (c = scm_getc (port))) while ('"' != (c = scm_getc (port)))
{ {
if (c == EOF) if (c == EOF)
str_eof: scm_i_input_error (FUNC_NAME, port, {
"end of file in string constant", str_eof:
SCM_EOL); scm_i_input_error (FUNC_NAME, port,
"end of file in string constant", SCM_EOL);
}
if (c_str_len + 1 >= sizeof (c_str)) if (c_str_len + 1 >= scm_i_string_length (str))
{ {
/* Flush the C buffer onto a Scheme string. */ SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
SCM addy;
if (str == SCM_BOOL_F) str = scm_string_append (scm_list_2 (str, addy));
str = scm_c_make_string (0, SCM_MAKE_CHAR ('X')); }
addy = scm_from_locale_stringn (c_str, c_str_len);
str = scm_string_append_shared (scm_list_2 (str, addy));
c_str_len = 0;
}
if (c == '\\') if (c == '\\')
switch (c = scm_getc (port)) {
{ switch (c = scm_getc (port))
case EOF: {
goto str_eof; case EOF:
case '"': goto str_eof;
case '\\': case '"':
break; case '\\':
break;
#if SCM_ENABLE_ELISP #if SCM_ENABLE_ELISP
case '(': case '(':
case ')': case ')':
if (SCM_ESCAPED_PARENS_P) if (SCM_ESCAPED_PARENS_P)
break; break;
goto bad_escaped; goto bad_escaped;
#endif #endif
case '\n': case '\n':
continue; continue;
case '0': case '0':
c = '\0'; c = '\0';
break; break;
case 'f': case 'f':
c = '\f'; c = '\f';
break; break;
case 'n': case 'n':
c = '\n'; c = '\n';
break; break;
case 'r': case 'r':
c = '\r'; c = '\r';
break; break;
case 't': case 't':
c = '\t'; c = '\t';
break; break;
case 'a': case 'a':
c = '\007'; c = '\007';
break; break;
case 'v': case 'v':
c = '\v'; c = '\v';
break; break;
case 'x': case 'x':
{ {
int a, b; scm_t_wchar a, b;
a = scm_getc (port); a = scm_getc (port);
if (a == EOF) goto str_eof; if (a == EOF)
b = scm_getc (port); goto str_eof;
if (b == EOF) goto str_eof; b = scm_getc (port);
if ('0' <= a && a <= '9') a -= '0'; if (b == EOF)
else if ('A' <= a && a <= 'F') a = a - 'A' + 10; goto str_eof;
else if ('a' <= a && a <= 'f') a = a - 'a' + 10; if ('0' <= a && a <= '9')
else goto bad_escaped; a -= '0';
if ('0' <= b && b <= '9') b -= '0'; else if ('A' <= a && a <= 'F')
else if ('A' <= b && b <= 'F') b = b - 'A' + 10; a = a - 'A' + 10;
else if ('a' <= b && b <= 'f') b = b - 'a' + 10; else if ('a' <= a && a <= 'f')
else goto bad_escaped; a = a - 'a' + 10;
c = a * 16 + b; else
break; {
} c = a;
default: goto bad_escaped;
bad_escaped: }
scm_i_input_error (FUNC_NAME, port, if ('0' <= b && b <= '9')
"illegal character in escape sequence: ~S", b -= '0';
scm_list_1 (SCM_MAKE_CHAR (c))); else if ('A' <= b && b <= 'F')
} b = b - 'A' + 10;
c_str[c_str_len++] = c; else if ('a' <= b && b <= 'f')
b = b - 'a' + 10;
else
{
c = b;
goto bad_escaped;
}
c = a * 16 + b;
break;
}
case 'u':
{
scm_t_wchar a;
int i;
c = 0;
for (i = 0; i < 4; i++)
{
a = scm_getc (port);
if (a == EOF)
goto str_eof;
if ('0' <= a && a <= '9')
a -= '0';
else if ('A' <= a && a <= 'F')
a = a - 'A' + 10;
else if ('a' <= a && a <= 'f')
a = a - 'a' + 10;
else
{
c = a;
goto bad_escaped;
}
c = c * 16 + a;
}
break;
}
case 'U':
{
scm_t_wchar a;
int i;
c = 0;
for (i = 0; i < 6; i++)
{
a = scm_getc (port);
if (a == EOF)
goto str_eof;
if ('0' <= a && a <= '9')
a -= '0';
else if ('A' <= a && a <= 'F')
a = a - 'A' + 10;
else if ('a' <= a && a <= 'f')
a = a - 'a' + 10;
else
{
c = a;
goto bad_escaped;
}
c = c * 16 + a;
}
break;
}
default:
bad_escaped:
scm_i_input_error (FUNC_NAME, port,
"illegal character in escape sequence: ~S",
scm_list_1 (SCM_MAKE_CHAR (c)));
}
}
str = scm_i_string_start_writing (str);
scm_i_string_set_x (str, c_str_len++, c);
scm_i_string_stop_writing ();
} }
if (c_str_len > 0) if (c_str_len > 0)
{ {
SCM addy; return scm_i_substring_copy (str, 0, c_str_len);
addy = scm_from_locale_stringn (c_str, c_str_len);
if (str == SCM_BOOL_F)
str = addy;
else
str = scm_string_append_shared (scm_list_2 (str, addy));
} }
else
str = (str == SCM_BOOL_F) ? scm_nullstr : str; return scm_nullstr;
return str;
} }
#undef FUNC_NAME #undef FUNC_NAME

View file

@ -131,6 +131,7 @@ SCM_DEFINE (scm_read_string_x_partial, "read-string!/partial", 1, 3, 0,
don't touch the file descriptor. otherwise the don't touch the file descriptor. otherwise the
"return immediately if something is available" rule may "return immediately if something is available" rule may
be violated. */ be violated. */
str = scm_i_string_start_writing (str);
dest = scm_i_string_writable_chars (str) + offset; dest = scm_i_string_writable_chars (str) + offset;
chars_read = scm_take_from_input_buffers (port, dest, read_len); chars_read = scm_take_from_input_buffers (port, dest, read_len);
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
@ -140,6 +141,7 @@ SCM_DEFINE (scm_read_string_x_partial, "read-string!/partial", 1, 3, 0,
if (chars_read == 0 && read_len > 0) /* don't confuse read_len == 0 with if (chars_read == 0 && read_len > 0) /* don't confuse read_len == 0 with
EOF. */ EOF. */
{ {
str = scm_i_string_start_writing (str);
dest = scm_i_string_writable_chars (str) + offset; dest = scm_i_string_writable_chars (str) + offset;
SCM_SYSCALL (chars_read = read (fdes, dest, read_len)); SCM_SYSCALL (chars_read = read (fdes, dest, read_len));
scm_i_string_stop_writing (); scm_i_string_stop_writing ();

View file

@ -1438,6 +1438,7 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
fd = SCM_FPORT_FDES (sock); fd = SCM_FPORT_FDES (sock);
len = scm_i_string_length (buf); len = scm_i_string_length (buf);
buf = scm_i_string_start_writing (buf);
dest = scm_i_string_writable_chars (buf); dest = scm_i_string_writable_chars (buf);
SCM_SYSCALL (rv = recv (fd, dest, len, flg)); SCM_SYSCALL (rv = recv (fd, dest, len, flg));
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
@ -1482,6 +1483,7 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0,
fd = SCM_FPORT_FDES (sock); fd = SCM_FPORT_FDES (sock);
len = scm_i_string_length (message); len = scm_i_string_length (message);
message = scm_i_string_start_writing (message);
src = scm_i_string_writable_chars (message); src = scm_i_string_writable_chars (message);
SCM_SYSCALL (rv = send (fd, src, len, flg)); SCM_SYSCALL (rv = send (fd, src, len, flg));
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
@ -1550,6 +1552,7 @@ SCM_DEFINE (scm_recvfrom, "recvfrom!", 2, 3, 0,
/* recvfrom will not necessarily return an address. usually nothing /* recvfrom will not necessarily return an address. usually nothing
is returned for stream sockets. */ is returned for stream sockets. */
str = scm_i_string_start_writing (str);
buf = scm_i_string_writable_chars (str); buf = scm_i_string_writable_chars (str);
((struct sockaddr *) &addr)->sa_family = AF_UNSPEC; ((struct sockaddr *) &addr)->sa_family = AF_UNSPEC;
SCM_SYSCALL (rv = recvfrom (fd, buf + offset, SCM_SYSCALL (rv = recvfrom (fd, buf + offset,

View file

@ -549,6 +549,7 @@ SCM_DEFINE (scm_string_copy_x, "string-copy!", 3, 2, 0,
len = cend - cstart; len = cend - cstart;
SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart); SCM_ASSERT_RANGE (3, s, len <= scm_i_string_length (target) - ctstart);
target = scm_i_string_start_writing (target);
ctarget = scm_i_string_writable_chars (target); ctarget = scm_i_string_writable_chars (target);
memmove (ctarget + ctstart, cstr + cstart, len); memmove (ctarget + ctstart, cstr + cstart, len);
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
@ -985,6 +986,7 @@ SCM_DEFINE (scm_substring_fill_x, "string-fill!", 2, 2, 0,
4, end, cend); 4, end, cend);
SCM_VALIDATE_CHAR_COPY (2, chr, c); SCM_VALIDATE_CHAR_COPY (2, chr, c);
str = scm_i_string_start_writing (str);
cstr = scm_i_string_writable_chars (str); cstr = scm_i_string_writable_chars (str);
for (k = cstart; k < cend; k++) for (k = cstart; k < cend; k++)
cstr[k] = c; cstr[k] = c;
@ -2376,6 +2378,7 @@ string_upcase_x (SCM v, size_t start, size_t end)
size_t k; size_t k;
char *dst; char *dst;
v = scm_i_string_start_writing (v);
dst = scm_i_string_writable_chars (v); dst = scm_i_string_writable_chars (v);
for (k = start; k < end; ++k) for (k = start; k < end; ++k)
dst[k] = scm_c_upcase (dst[k]); dst[k] = scm_c_upcase (dst[k]);
@ -2442,6 +2445,7 @@ string_downcase_x (SCM v, size_t start, size_t end)
size_t k; size_t k;
char *dst; char *dst;
v = scm_i_string_start_writing (v);
dst = scm_i_string_writable_chars (v); dst = scm_i_string_writable_chars (v);
for (k = start; k < end; ++k) for (k = start; k < end; ++k)
dst[k] = scm_c_downcase (dst[k]); dst[k] = scm_c_downcase (dst[k]);
@ -2511,6 +2515,7 @@ string_titlecase_x (SCM str, size_t start, size_t end)
size_t i; size_t i;
int in_word = 0; int in_word = 0;
str = scm_i_string_start_writing (str);
sz = (unsigned char *) scm_i_string_writable_chars (str); sz = (unsigned char *) scm_i_string_writable_chars (str);
for(i = start; i < end; i++) for(i = start; i < end; i++)
{ {
@ -2635,6 +2640,7 @@ SCM_DEFINE (scm_string_reverse, "string-reverse", 1, 2, 0,
2, start, cstart, 2, start, cstart,
3, end, cend); 3, end, cend);
result = scm_string_copy (str); result = scm_string_copy (str);
result = scm_i_string_start_writing (result);
ctarget = scm_i_string_writable_chars (result); ctarget = scm_i_string_writable_chars (result);
string_reverse_x (ctarget, cstart, cend); string_reverse_x (ctarget, cstart, cend);
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
@ -2658,6 +2664,7 @@ SCM_DEFINE (scm_string_reverse_x, "string-reverse!", 1, 2, 0,
2, start, cstart, 2, start, cstart,
3, end, cend); 3, end, cend);
str = scm_i_string_start_writing (str);
cstr = scm_i_string_writable_chars (str); cstr = scm_i_string_writable_chars (str);
string_reverse_x (cstr, cstart, cend); string_reverse_x (cstr, cstart, cend);
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
@ -3018,19 +3025,16 @@ SCM_DEFINE (scm_string_for_each, "string-for-each", 2, 2, 0,
"return value is not specified.") "return value is not specified.")
#define FUNC_NAME s_scm_string_for_each #define FUNC_NAME s_scm_string_for_each
{ {
const char *cstr;
size_t cstart, cend; size_t cstart, cend;
scm_t_trampoline_1 proc_tramp = scm_trampoline_1 (proc); scm_t_trampoline_1 proc_tramp = scm_trampoline_1 (proc);
SCM_ASSERT (proc_tramp, proc, SCM_ARG1, FUNC_NAME); SCM_ASSERT (proc_tramp, proc, SCM_ARG1, FUNC_NAME);
MY_VALIDATE_SUBSTRING_SPEC_COPY (2, s, cstr, MY_VALIDATE_SUBSTRING_SPEC (2, s,
3, start, cstart, 3, start, cstart,
4, end, cend); 4, end, cend);
while (cstart < cend) while (cstart < cend)
{ {
unsigned int c = (unsigned char) cstr[cstart]; proc_tramp (proc, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart)));
proc_tramp (proc, SCM_MAKE_CHAR (c));
cstr = scm_i_string_chars (s);
cstart++; cstart++;
} }
@ -3162,6 +3166,7 @@ SCM_DEFINE (scm_string_xcopy_x, "string-xcopy!", 4, 3, 0,
SCM_ASSERT_RANGE (1, tstart, SCM_ASSERT_RANGE (1, tstart,
ctstart + (csto - csfrom) <= scm_i_string_length (target)); ctstart + (csto - csfrom) <= scm_i_string_length (target));
target = scm_i_string_start_writing (target);
p = scm_i_string_writable_chars (target) + ctstart; p = scm_i_string_writable_chars (target) + ctstart;
cs = scm_i_string_chars (s); cs = scm_i_string_chars (s);
while (csfrom < csto) while (csfrom < csto)
@ -3200,8 +3205,8 @@ SCM_DEFINE (scm_string_replace, "string-replace", 2, 4, 0,
MY_VALIDATE_SUBSTRING_SPEC (2, s2, MY_VALIDATE_SUBSTRING_SPEC (2, s2,
5, start2, cstart2, 5, start2, cstart2,
6, end2, cend2); 6, end2, cend2);
result = scm_i_make_string (cstart1 + (cend2 - cstart2) + result = scm_i_make_string ((cstart1 + cend2 - cstart2
scm_i_string_length (s1) - cend1, &p); + scm_i_string_length (s1) - cend1), &p);
cstr1 = scm_i_string_chars (s1); cstr1 = scm_i_string_chars (s1);
cstr2 = scm_i_string_chars (s2); cstr2 = scm_i_string_chars (s2);
memmove (p, cstr1, cstart1 * sizeof (char)); memmove (p, cstr1, cstart1 * sizeof (char));

View file

@ -24,6 +24,8 @@
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <ctype.h>
#include <unistr.h>
#include "libguile/_scm.h" #include "libguile/_scm.h"
#include "libguile/chars.h" #include "libguile/chars.h"
@ -69,10 +71,12 @@
#define STRINGBUF_F_SHARED 0x100 #define STRINGBUF_F_SHARED 0x100
#define STRINGBUF_F_INLINE 0x200 #define STRINGBUF_F_INLINE 0x200
#define STRINGBUF_F_WIDE 0x400
#define STRINGBUF_TAG scm_tc7_stringbuf #define STRINGBUF_TAG scm_tc7_stringbuf
#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED) #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE) #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf)) #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf)) #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
@ -82,6 +86,7 @@
#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \ #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
? STRINGBUF_INLINE_CHARS (buf) \ ? STRINGBUF_INLINE_CHARS (buf) \
: STRINGBUF_OUTLINE_CHARS (buf)) : STRINGBUF_OUTLINE_CHARS (buf))
#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *)SCM_CELL_WORD_1(buf))
#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \ #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
? STRINGBUF_INLINE_LENGTH (buf) \ ? STRINGBUF_INLINE_LENGTH (buf) \
: STRINGBUF_OUTLINE_LENGTH (buf)) : STRINGBUF_OUTLINE_LENGTH (buf))
@ -126,6 +131,23 @@ make_stringbuf (size_t len)
} }
} }
static SCM
make_wide_stringbuf (size_t len)
{
scm_t_wchar *mem;
#if SCM_DEBUG
if (len < 1000)
lenhist[len]++;
else
lenhist[1000]++;
#endif
mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
mem[len] = 0;
return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_WIDE, (scm_t_bits) mem,
(scm_t_bits) len, (scm_t_bits) 0);
}
/* Return a new stringbuf whose underlying storage consists of the LEN+1 /* Return a new stringbuf whose underlying storage consists of the LEN+1
octets pointed to by STR (the last octet is zero). */ octets pointed to by STR (the last octet is zero). */
SCM SCM
@ -147,8 +169,58 @@ void
scm_i_stringbuf_free (SCM buf) scm_i_stringbuf_free (SCM buf)
{ {
if (!STRINGBUF_INLINE (buf)) if (!STRINGBUF_INLINE (buf))
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), {
STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string"); if (!STRINGBUF_WIDE (buf))
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
else
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
sizeof (scm_t_wchar) * (STRINGBUF_OUTLINE_LENGTH (buf)
+ 1), "string");
}
}
static void
widen_stringbuf (SCM buf)
{
size_t i, len;
scm_t_wchar *mem;
if (STRINGBUF_WIDE (buf))
return;
if (STRINGBUF_INLINE (buf))
{
len = STRINGBUF_INLINE_LENGTH (buf);
mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
for (i = 0; i < len; i++)
mem[i] =
(scm_t_wchar) (unsigned char) STRINGBUF_INLINE_CHARS (buf)[i];
mem[len] = 0;
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_INLINE);
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
SCM_SET_CELL_WORD_1 (buf, mem);
SCM_SET_CELL_WORD_2 (buf, len);
}
else
{
len = STRINGBUF_OUTLINE_LENGTH (buf);
mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
for (i = 0; i < len; i++)
mem[i] =
(scm_t_wchar) (unsigned char) STRINGBUF_OUTLINE_CHARS (buf)[i];
mem[len] = 0;
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), len + 1, "string");
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
SCM_SET_CELL_WORD_1 (buf, mem);
SCM_SET_CELL_WORD_2 (buf, len);
}
} }
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER; scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
@ -195,6 +267,18 @@ scm_i_make_string (size_t len, char **charsp)
return res; return res;
} }
SCM
scm_i_make_wide_string (size_t len, scm_t_wchar ** charsp)
{
SCM buf = make_wide_stringbuf (len);
SCM res;
if (charsp)
*charsp = STRINGBUF_WIDE_CHARS (buf);
res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
(scm_t_bits) 0, (scm_t_bits) len);
return res;
}
static void static void
validate_substring_args (SCM str, size_t start, size_t end) validate_substring_args (SCM str, size_t start, size_t end)
{ {
@ -253,12 +337,24 @@ scm_i_substring_copy (SCM str, size_t start, size_t end)
SCM buf, my_buf; SCM buf, my_buf;
size_t str_start; size_t str_start;
get_str_buf_start (&str, &buf, &str_start); get_str_buf_start (&str, &buf, &str_start);
my_buf = make_stringbuf (len); if (scm_i_is_narrow_string (str))
memcpy (STRINGBUF_CHARS (my_buf), {
STRINGBUF_CHARS (buf) + str_start + start, len); my_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (my_buf),
STRINGBUF_CHARS (buf) + str_start + start, len);
}
else
{
my_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
+ start), len);
/* Even though this string is wide, the substring may be narrow.
Consider adding code to narrow string. */
}
scm_remember_upto_here_1 (buf); scm_remember_upto_here_1 (buf);
return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf), return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
(scm_t_bits)0, (scm_t_bits) len); (scm_t_bits) 0, (scm_t_bits) len);
} }
SCM SCM
@ -330,17 +426,45 @@ scm_i_string_length (SCM str)
return STRING_LENGTH (str); return STRING_LENGTH (str);
} }
int
scm_i_is_narrow_string (SCM str)
{
return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
}
const char * const char *
scm_i_string_chars (SCM str) scm_i_string_chars (SCM str)
{ {
SCM buf; SCM buf;
size_t start; size_t start;
get_str_buf_start (&str, &buf, &start); get_str_buf_start (&str, &buf, &start);
return STRINGBUF_CHARS (buf) + start; if (scm_i_is_narrow_string (str))
return STRINGBUF_CHARS (buf) + start;
else
scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
scm_list_1 (str));
return NULL;
} }
char * const scm_t_wchar *
scm_i_string_writable_chars (SCM orig_str) scm_i_string_wide_chars (SCM str)
{
SCM buf;
size_t start;
get_str_buf_start (&str, &buf, &start);
if (!scm_i_is_narrow_string (str))
return STRINGBUF_WIDE_CHARS (buf) + start;
else
scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
scm_list_1 (str));
}
/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
a new string buffer, so that it can be modified without modifying
other strings. */
SCM
scm_i_string_start_writing (SCM orig_str)
{ {
SCM buf, str = orig_str; SCM buf, str = orig_str;
size_t start; size_t start;
@ -352,18 +476,26 @@ scm_i_string_writable_chars (SCM orig_str)
scm_i_pthread_mutex_lock (&stringbuf_write_mutex); scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
if (STRINGBUF_SHARED (buf)) if (STRINGBUF_SHARED (buf))
{ {
/* Clone stringbuf. For this, we put all threads to sleep. /* Clone the stringbuf. */
*/
size_t len = STRING_LENGTH (str); size_t len = STRING_LENGTH (str);
SCM new_buf; SCM new_buf;
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
new_buf = make_stringbuf (len); if (scm_i_is_narrow_string (str))
memcpy (STRINGBUF_CHARS (new_buf), {
STRINGBUF_CHARS (buf) + STRING_START (str), len); new_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + STRING_START (str), len);
}
else
{
new_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
+ STRING_START (str)), len);
}
scm_i_thread_put_to_sleep (); scm_i_thread_put_to_sleep ();
SET_STRING_STRINGBUF (str, new_buf); SET_STRING_STRINGBUF (str, new_buf);
start -= STRING_START (str); start -= STRING_START (str);
@ -374,8 +506,39 @@ scm_i_string_writable_chars (SCM orig_str)
scm_i_pthread_mutex_lock (&stringbuf_write_mutex); scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
} }
return orig_str;
}
return STRINGBUF_CHARS (buf) + start; /* Return a pointer to the chars of a string that fits in a Latin-1
encoding. */
char *
scm_i_string_writable_chars (SCM str)
{
SCM buf;
size_t start;
get_str_buf_start (&str, &buf, &start);
if (scm_i_is_narrow_string (str))
return STRINGBUF_CHARS (buf) + start;
else
scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
scm_list_1 (str));
return NULL;
}
/* Return a pointer to the Unicode codepoints of a string. */
static scm_t_wchar *
scm_i_string_writable_wide_chars (SCM str)
{
SCM buf;
size_t start;
get_str_buf_start (&str, &buf, &start);
if (!scm_i_is_narrow_string (str))
return STRINGBUF_WIDE_CHARS (buf) + start;
else
scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
scm_list_1 (str));
} }
void void
@ -384,6 +547,34 @@ scm_i_string_stop_writing (void)
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex); scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
} }
/* Return the Xth character is C. */
scm_t_wchar
scm_i_string_ref (SCM str, size_t x)
{
if (scm_i_is_narrow_string (str))
return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
else
return scm_i_string_wide_chars (str)[x];
}
void
scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
{
if (chr > 0xFF && scm_i_is_narrow_string (str))
widen_stringbuf (STRING_STRINGBUF (str));
if (scm_i_is_narrow_string (str))
{
char *dst = scm_i_string_writable_chars (str);
dst[p] = (char) (unsigned char) chr;
}
else
{
scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
dst[p] = chr;
}
}
/* Symbols. /* Symbols.
Basic symbol creation and accessing is done here, the rest is in Basic symbol creation and accessing is done here, the rest is in
@ -418,10 +609,21 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
else else
{ {
/* make new buf. */ /* make new buf. */
SCM new_buf = make_stringbuf (length); if (scm_i_is_narrow_string (name))
memcpy (STRINGBUF_CHARS (new_buf), {
STRINGBUF_CHARS (buf) + start, length); SCM new_buf = make_stringbuf (length);
buf = new_buf; memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + start, length);
buf = new_buf;
}
else
{
SCM new_buf = make_wide_stringbuf (length);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
(scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
length);
buf = new_buf;
}
} }
return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf), return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
(scm_t_bits) hash, SCM_UNPACK (props)); (scm_t_bits) hash, SCM_UNPACK (props));
@ -466,11 +668,40 @@ scm_c_symbol_length (SCM sym)
} }
#undef FUNC_NAME #undef FUNC_NAME
int
scm_i_is_narrow_symbol (SCM sym)
{
SCM buf;
buf = SYMBOL_STRINGBUF (sym);
return !STRINGBUF_WIDE (buf);
}
const char * const char *
scm_i_symbol_chars (SCM sym) scm_i_symbol_chars (SCM sym)
{ {
SCM buf = SYMBOL_STRINGBUF (sym); SCM buf;
return STRINGBUF_CHARS (buf);
buf = SYMBOL_STRINGBUF (sym);
if (!STRINGBUF_WIDE (buf))
return STRINGBUF_CHARS (buf);
else
scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
scm_list_1 (sym));
}
/* Return a pointer to the Unicode codepoints of a symbol's name. */
const scm_t_wchar *
scm_i_symbol_wide_chars (SCM sym)
{
SCM buf;
buf = SYMBOL_STRINGBUF (sym);
if (STRINGBUF_WIDE (buf))
return STRINGBUF_WIDE_CHARS (buf);
else
scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
scm_list_1 (sym));
} }
SCM SCM
@ -496,6 +727,15 @@ scm_i_symbol_substring (SCM sym, size_t start, size_t end)
(scm_t_bits)start, (scm_t_bits) end - start); (scm_t_bits)start, (scm_t_bits) end - start);
} }
scm_t_wchar
scm_i_symbol_ref (SCM sym, size_t x)
{
if (scm_i_is_narrow_symbol (sym))
return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
else
return scm_i_symbol_wide_chars (sym)[x];
}
/* Debugging /* Debugging
*/ */
@ -505,15 +745,17 @@ SCM scm_sys_string_dump (SCM);
SCM scm_sys_symbol_dump (SCM); SCM scm_sys_symbol_dump (SCM);
SCM scm_sys_stringbuf_hist (void); SCM scm_sys_stringbuf_hist (void);
SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str), "")
(SCM str),
"")
#define FUNC_NAME s_scm_sys_string_dump #define FUNC_NAME s_scm_sys_string_dump
{ {
SCM_VALIDATE_STRING (1, str); SCM_VALIDATE_STRING (1, str);
fprintf (stderr, "%p:\n", str); fprintf (stderr, "%p:\n", str);
fprintf (stderr, " start: %u\n", STRING_START (str)); fprintf (stderr, " start: %u\n", STRING_START (str));
fprintf (stderr, " len: %u\n", STRING_LENGTH (str)); fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
if (scm_i_is_narrow_string (str))
fprintf (stderr, " format: narrow\n");
else
fprintf (stderr, " format: wide\n");
if (IS_SH_STRING (str)) if (IS_SH_STRING (str))
{ {
fprintf (stderr, " string: %p\n", SH_STRING_STRING (str)); fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
@ -524,36 +766,54 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
{ {
SCM buf = STRING_STRINGBUF (str); SCM buf = STRING_STRINGBUF (str);
fprintf (stderr, " buf: %p\n", buf); fprintf (stderr, " buf: %p\n", buf);
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf)); if (scm_i_is_narrow_string (str))
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
else
fprintf (stderr, " chars: %p\n", STRINGBUF_WIDE_CHARS (buf));
fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf)); fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300)); if (STRINGBUF_SHARED (buf))
fprintf (stderr, " shared: true\n");
else
fprintf (stderr, " shared: false\n");
if (STRINGBUF_INLINE (buf))
fprintf (stderr, " inline: true\n");
else
fprintf (stderr, " inline: false\n");
} }
return SCM_UNSPECIFIED; return SCM_UNSPECIFIED;
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym), "")
(SCM sym),
"")
#define FUNC_NAME s_scm_sys_symbol_dump #define FUNC_NAME s_scm_sys_symbol_dump
{ {
SCM_VALIDATE_SYMBOL (1, sym); SCM_VALIDATE_SYMBOL (1, sym);
fprintf (stderr, "%p:\n", sym); fprintf (stderr, "%p:\n", sym);
fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym)); fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
if (scm_i_is_narrow_symbol (sym))
fprintf (stderr, " format: narrow\n");
else
fprintf (stderr, " format: wide\n");
{ {
SCM buf = SYMBOL_STRINGBUF (sym); SCM buf = SYMBOL_STRINGBUF (sym);
fprintf (stderr, " buf: %p\n", buf); fprintf (stderr, " buf: %p\n", buf);
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf)); if (scm_i_is_narrow_symbol (sym))
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
else
fprintf (stderr, " chars: %p\n", STRINGBUF_WIDE_CHARS (buf));
fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf)); fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf)); if (STRINGBUF_SHARED (buf))
fprintf (stderr, " shared: true\n");
else
fprintf (stderr, " shared: false\n");
} }
return SCM_UNSPECIFIED; return SCM_UNSPECIFIED;
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
(void),
"")
#define FUNC_NAME s_scm_sys_stringbuf_hist #define FUNC_NAME s_scm_sys_stringbuf_hist
{ {
int i; int i;
@ -589,29 +849,46 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1,
#define FUNC_NAME s_scm_string #define FUNC_NAME s_scm_string
{ {
SCM result; SCM result;
SCM rest;
size_t len; size_t len;
char *data; size_t p = 0;
long i;
{ /* Verify that this is a list of chars. */
long i = scm_ilength (chrs); i = scm_ilength (chrs);
len = (size_t) i;
rest = chrs;
SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME); SCM_ASSERT (len >= 0, chrs, SCM_ARG1, FUNC_NAME);
len = i; while (len > 0 && scm_is_pair (rest))
}
result = scm_i_make_string (len, &data);
while (len > 0 && scm_is_pair (chrs))
{ {
SCM elt = SCM_CAR (chrs); SCM elt = SCM_CAR (rest);
SCM_VALIDATE_CHAR (SCM_ARGn, elt); SCM_VALIDATE_CHAR (SCM_ARGn, elt);
*data++ = SCM_CHAR (elt); rest = SCM_CDR (rest);
chrs = SCM_CDR (chrs);
len--; len--;
scm_remember_upto_here_1 (elt);
} }
/* Construct a string containing this list of chars. */
len = (size_t) i;
rest = chrs;
result = scm_i_make_string (len, NULL);
result = scm_i_string_start_writing (result);
while (len > 0 && scm_is_pair (rest))
{
SCM elt = SCM_CAR (rest);
scm_i_string_set_x (result, p, SCM_CHAR (elt));
p++;
rest = SCM_CDR (rest);
len--;
scm_remember_upto_here_1 (elt);
}
scm_i_string_stop_writing ();
if (len > 0) if (len > 0)
scm_misc_error (NULL, "list changed while constructing string", SCM_EOL); scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
if (!scm_is_null (chrs)) if (!scm_is_null (rest))
scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list"); scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
return result; return result;
@ -634,13 +911,16 @@ SCM
scm_c_make_string (size_t len, SCM chr) scm_c_make_string (size_t len, SCM chr)
#define FUNC_NAME NULL #define FUNC_NAME NULL
{ {
char *dst; size_t p;
SCM res = scm_i_make_string (len, &dst); SCM res = scm_i_make_string (len, NULL);
if (!SCM_UNBNDP (chr)) if (!SCM_UNBNDP (chr))
{ {
SCM_VALIDATE_CHAR (0, chr); SCM_VALIDATE_CHAR (0, chr);
memset (dst, SCM_CHAR (chr), len); res = scm_i_string_start_writing (res);
for (p = 0; p < len; p++)
scm_i_string_set_x (res, p, SCM_CHAR (chr));
scm_i_string_stop_writing ();
} }
return res; return res;
@ -657,6 +937,20 @@ SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
} }
#undef FUNC_NAME #undef FUNC_NAME
SCM_DEFINE (scm_string_width, "string-width", 1, 0, 0,
(SCM string),
"Return the bytes used to represent a character in @var{string}."
"This will return 1 or 4.")
#define FUNC_NAME s_scm_string_width
{
SCM_VALIDATE_STRING (1, string);
if (!scm_i_is_narrow_string (string))
return scm_from_int (4);
return scm_from_int (1);
}
#undef FUNC_NAME
size_t size_t
scm_c_string_length (SCM string) scm_c_string_length (SCM string)
{ {
@ -667,8 +961,8 @@ scm_c_string_length (SCM string)
SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0, SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
(SCM str, SCM k), (SCM str, SCM k),
"Return character @var{k} of @var{str} using zero-origin\n" "Return character @var{k} of @var{str} using zero-origin\n"
"indexing. @var{k} must be a valid index of @var{str}.") "indexing. @var{k} must be a valid index of @var{str}.")
#define FUNC_NAME s_scm_string_ref #define FUNC_NAME s_scm_string_ref
{ {
size_t len; size_t len;
@ -682,7 +976,10 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
else else
scm_out_of_range (NULL, k); scm_out_of_range (NULL, k);
return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]); if (scm_i_is_narrow_string (str))
return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
else
return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
} }
#undef FUNC_NAME #undef FUNC_NAME
@ -691,14 +988,18 @@ scm_c_string_ref (SCM str, size_t p)
{ {
if (p >= scm_i_string_length (str)) if (p >= scm_i_string_length (str))
scm_out_of_range (NULL, scm_from_size_t (p)); scm_out_of_range (NULL, scm_from_size_t (p));
return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]); if (scm_i_is_narrow_string (str))
return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
else
return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
} }
SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0, SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
(SCM str, SCM k, SCM chr), (SCM str, SCM k, SCM chr),
"Store @var{chr} in element @var{k} of @var{str} and return\n" "Store @var{chr} in element @var{k} of @var{str} and return\n"
"an unspecified value. @var{k} must be a valid index of\n" "an unspecified value. @var{k} must be a valid index of\n"
"@var{str}.") "@var{str}.")
#define FUNC_NAME s_scm_string_set_x #define FUNC_NAME s_scm_string_set_x
{ {
size_t len; size_t len;
@ -713,11 +1014,10 @@ SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
scm_out_of_range (NULL, k); scm_out_of_range (NULL, k);
SCM_VALIDATE_CHAR (3, chr); SCM_VALIDATE_CHAR (3, chr);
{ str = scm_i_string_start_writing (str);
char *dst = scm_i_string_writable_chars (str); scm_i_string_set_x (str, idx, SCM_CHAR (chr));
dst[idx] = SCM_CHAR (chr); scm_i_string_stop_writing ();
scm_i_string_stop_writing ();
}
return SCM_UNSPECIFIED; return SCM_UNSPECIFIED;
} }
#undef FUNC_NAME #undef FUNC_NAME
@ -727,11 +1027,9 @@ scm_c_string_set_x (SCM str, size_t p, SCM chr)
{ {
if (p >= scm_i_string_length (str)) if (p >= scm_i_string_length (str))
scm_out_of_range (NULL, scm_from_size_t (p)); scm_out_of_range (NULL, scm_from_size_t (p));
{ str = scm_i_string_start_writing (str);
char *dst = scm_i_string_writable_chars (str); scm_i_string_set_x (str, p, SCM_CHAR (chr));
dst[p] = SCM_CHAR (chr); scm_i_string_stop_writing ();
scm_i_string_stop_writing ();
}
} }
SCM_DEFINE (scm_substring, "substring", 2, 1, 0, SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
@ -832,31 +1130,55 @@ SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
(SCM args), (SCM args),
"Return a newly allocated string whose characters form the\n" "Return a newly allocated string whose characters form the\n"
"concatenation of the given strings, @var{args}.") "concatenation of the given strings, @var{args}.")
#define FUNC_NAME s_scm_string_append #define FUNC_NAME s_scm_string_append
{ {
SCM res; SCM res;
size_t i = 0; size_t len = 0;
int wide = 0;
SCM l, s; SCM l, s;
char *data; char *data;
scm_t_wchar *wdata;
int i;
SCM_VALIDATE_REST_ARGUMENT (args); SCM_VALIDATE_REST_ARGUMENT (args);
for (l = args; !scm_is_null (l); l = SCM_CDR (l)) for (l = args; !scm_is_null (l); l = SCM_CDR (l))
{ {
s = SCM_CAR (l); s = SCM_CAR (l);
SCM_VALIDATE_STRING (SCM_ARGn, s); SCM_VALIDATE_STRING (SCM_ARGn, s);
i += scm_i_string_length (s); len += scm_i_string_length (s);
if (!scm_i_is_narrow_string (s))
wide = 1;
} }
res = scm_i_make_string (i, &data); if (!wide)
for (l = args; !scm_is_null (l); l = SCM_CDR (l)) res = scm_i_make_string (len, &data);
else
res = scm_i_make_wide_string (len, &wdata);
for (l = args; !scm_is_null (l); l = SCM_CDR (l))
{ {
size_t len; size_t len;
s = SCM_CAR (l); s = SCM_CAR (l);
SCM_VALIDATE_STRING (SCM_ARGn, s); SCM_VALIDATE_STRING (SCM_ARGn, s);
len = scm_i_string_length (s); len = scm_i_string_length (s);
memcpy (data, scm_i_string_chars (s), len); if (!wide)
data += len; {
memcpy (data, scm_i_string_chars (s), len);
data += len;
}
else
{
if (scm_i_is_narrow_string (s))
{
for (i = 0; i < scm_i_string_length (s); i++)
wdata[i] = (unsigned char) scm_i_string_chars (s)[i];
}
else
u32_cpy ((scm_t_uint32 *) wdata,
(scm_t_uint32 *) scm_i_string_wide_chars (s), len);
wdata += len;
}
scm_remember_upto_here_1 (s); scm_remember_upto_here_1 (s);
} }
return res; return res;
@ -875,8 +1197,11 @@ scm_from_locale_stringn (const char *str, size_t len)
SCM res; SCM res;
char *dst; char *dst;
if (len == (size_t)-1) if (len == (size_t) -1)
len = strlen (str); len = strlen (str);
if (len == 0)
return scm_nullstr;
res = scm_i_make_string (len, &dst); res = scm_i_make_string (len, &dst);
memcpy (dst, str, len); memcpy (dst, str, len);
return res; return res;
@ -885,6 +1210,9 @@ scm_from_locale_stringn (const char *str, size_t len)
SCM SCM
scm_from_locale_string (const char *str) scm_from_locale_string (const char *str)
{ {
if (str == NULL)
return scm_nullstr;
return scm_from_locale_stringn (str, -1); return scm_from_locale_stringn (str, -1);
} }
@ -893,21 +1221,20 @@ scm_take_locale_stringn (char *str, size_t len)
{ {
SCM buf, res; SCM buf, res;
if (len == (size_t)-1) if (len == (size_t) -1)
len = strlen (str); len = strlen (str);
else else
{ {
/* Ensure STR is null terminated. A realloc for 1 extra byte should /* Ensure STR is null terminated. A realloc for 1 extra byte should
often be satisfied from the alignment padding after the block, with often be satisfied from the alignment padding after the block, with
no actual data movement. */ no actual data movement. */
str = scm_realloc (str, len+1); str = scm_realloc (str, len + 1);
str[len] = '\0'; str[len] = '\0';
} }
buf = scm_i_take_stringbufn (str, len); buf = scm_i_take_stringbufn (str, len);
res = scm_double_cell (STRING_TAG, res = scm_double_cell (STRING_TAG,
SCM_UNPACK (buf), SCM_UNPACK (buf), (scm_t_bits) 0, (scm_t_bits) len);
(scm_t_bits) 0, (scm_t_bits) len);
return res; return res;
} }
@ -917,33 +1244,143 @@ scm_take_locale_string (char *str)
return scm_take_locale_stringn (str, -1); return scm_take_locale_stringn (str, -1);
} }
char * /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
scm_to_locale_stringn (SCM str, size_t *lenp) and \UXXXXXX. */
static void
unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
{ {
char *res; char *before, *after;
size_t len; size_t i, j;
before = *bufp;
after = *bufp;
i = 0;
j = 0;
while (i < *lenp)
{
if ((i <= *lenp - 6)
&& before[i] == '\\'
&& before[i + 1] == 'u'
&& before[i + 2] == '0' && before[i + 3] == '0')
{
/* Convert \u00NN to \xNN */
after[j] = '\\';
after[j + 1] = 'x';
after[j + 2] = tolower (before[i + 4]);
after[j + 3] = tolower (before[i + 5]);
i += 6;
j += 4;
}
else if ((i <= *lenp - 10)
&& before[i] == '\\'
&& before[i + 1] == 'U'
&& before[i + 2] == '0' && before[i + 3] == '0')
{
/* Convert \U00NNNNNN to \UNNNNNN */
after[j] = '\\';
after[j + 1] = 'U';
after[j + 2] = tolower (before[i + 4]);
after[j + 3] = tolower (before[i + 5]);
after[j + 4] = tolower (before[i + 6]);
after[j + 5] = tolower (before[i + 7]);
after[j + 6] = tolower (before[i + 8]);
after[j + 7] = tolower (before[i + 9]);
i += 10;
j += 8;
}
else
{
after[j] = before[i];
i++;
j++;
}
}
*lenp = j;
after = scm_realloc (after, j);
}
char *
scm_to_locale_stringn (SCM str, size_t * lenp)
{
const char *enc;
/* In the future, enc will hold the port's encoding. */
enc = NULL;
return scm_to_stringn (str, lenp, enc, iconveh_escape_sequence);
}
/* Low-level scheme to C string conversion function. */
char *
scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
enum iconv_ilseq_handler handler)
{
static const char iso[11] = "ISO-8859-1";
char *buf;
size_t ilen, len, i;
if (!scm_is_string (str)) if (!scm_is_string (str))
scm_wrong_type_arg_msg (NULL, 0, str, "string"); scm_wrong_type_arg_msg (NULL, 0, str, "string");
len = scm_i_string_length (str); ilen = scm_i_string_length (str);
res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
memcpy (res, scm_i_string_chars (str), len); if (ilen == 0)
if (lenp == NULL)
{ {
res[len] = '\0'; buf = scm_malloc (1);
if (strlen (res) != len) buf[0] = '\0';
{ if (lenp)
free (res); *lenp = 0;
scm_misc_error (NULL, return buf;
"string contains #\\nul character: ~S",
scm_list_1 (str));
}
} }
else
if (lenp == NULL)
for (i = 0; i < ilen; i++)
if (scm_i_string_ref (str, i) == '\0')
scm_misc_error (NULL,
"string contains #\\nul character: ~S",
scm_list_1 (str));
if (scm_i_is_narrow_string (str))
{
if (lenp)
{
buf = scm_malloc (ilen);
memcpy (buf, scm_i_string_chars (str), ilen);
*lenp = ilen;
return buf;
}
else
{
buf = scm_malloc (ilen + 1);
memcpy (buf, scm_i_string_chars (str), ilen);
buf[ilen] = '\0';
return buf;
}
}
buf = NULL;
len = 0;
buf = u32_conv_to_encoding (iso,
handler,
(scm_t_uint32 *) scm_i_string_wide_chars (str),
ilen, NULL, NULL, &len);
if (buf == NULL)
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
scm_list_2 (scm_from_locale_string (iso), str));
if (handler == iconveh_escape_sequence)
unistring_escapes_to_guile_escapes (&buf, &len);
if (lenp)
*lenp = len; *lenp = len;
else
{
buf = scm_realloc (buf, len + 1);
buf[len] = '\0';
}
scm_remember_upto_here_1 (str); scm_remember_upto_here_1 (str);
return res; return buf;
} }
char * char *
@ -956,18 +1393,21 @@ size_t
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len) scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
{ {
size_t len; size_t len;
char *result = NULL;
if (!scm_is_string (str)) if (!scm_is_string (str))
scm_wrong_type_arg_msg (NULL, 0, str, "string"); scm_wrong_type_arg_msg (NULL, 0, str, "string");
len = scm_i_string_length (str); result = scm_to_locale_stringn (str, &len);
memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
memcpy (buf, result, (len > max_len) ? max_len : len);
free (result);
scm_remember_upto_here_1 (str); scm_remember_upto_here_1 (str);
return len; return len;
} }
/* converts C scm_array of strings to SCM scm_list of strings. */ /* converts C scm_array of strings to SCM scm_list of strings. */
/* If argc < 0, a null terminated scm_array is assumed. */ /* If argc < 0, a null terminated scm_array is assumed. */
SCM SCM
scm_makfromstrs (int argc, char **argv) scm_makfromstrs (int argc, char **argv)
{ {
int i = argc; int i = argc;
@ -1081,6 +1521,7 @@ scm_i_deprecated_string_chars (SCM str)
/* The following is still wrong, of course... /* The following is still wrong, of course...
*/ */
str = scm_i_string_start_writing (str);
chars = scm_i_string_writable_chars (str); chars = scm_i_string_writable_chars (str);
scm_i_string_stop_writing (); scm_i_string_stop_writing ();
return chars; return chars;

View file

@ -23,6 +23,7 @@
#include <uniconv.h>
#include "libguile/__scm.h" #include "libguile/__scm.h"
@ -46,26 +47,37 @@
Internal, low level interface to the character arrays Internal, low level interface to the character arrays
- Use scm_i_string_chars to get a pointer to the byte array of a - Use scm_is_narrow_string to determine is the string is narrow or
string for reading. Use scm_i_string_length to get the number of wide.
bytes in that array. The array is not null-terminated.
- Use scm_i_string_chars or scm_i_string_wide_chars to get a
pointer to the byte or scm_t_wchar array of a string for reading.
Use scm_i_string_length to get the number of characters in that
array. The array is not null-terminated.
- The array is valid as long as the corresponding SCM object is - The array is valid as long as the corresponding SCM object is
protected but only until the next SCM_TICK. During such a 'safe protected but only until the next SCM_TICK. During such a 'safe
point', strings might change their representation. point', strings might change their representation.
- Use scm_i_string_writable_chars to get the same pointer as with - Use scm_i_string_start_writing to get a version of the string
scm_i_string_chars, but for reading and writing. This is a ready for reading and writing. This is a potentially costly
potentially costly operation since it implements the operation since it implements the copy-on-write behavior. When
copy-on-write behavior. When done with the writing, call done with the writing, call scm_i_string_stop_writing. You must
scm_i_string_stop_writing. You must do this before the next do this before the next SCM_TICK. (This means, before calling
SCM_TICK. (This means, before calling almost any other scm_ almost any other scm_ function and you can't allow throws, of
function and you can't allow throws, of course.) course.)
- New strings can be created with scm_i_make_string. This gives - New strings can be created with scm_i_make_string or
access to a writable pointer that remains valid as long as nobody scm_i_make_wide_string. This gives access to a writable pointer
else makes a copy-on-write substring of the string. Do not call that remains valid as long as nobody else makes a copy-on-write
scm_i_string_stop_writing for this pointer. substring of the string. Do not call scm_i_string_stop_writing
for this pointer.
- Alternately, scm_i_string_ref and scm_i_string_set_x can be used
to read and write strings without worrying about whether the
string is narrow or wide. scm_i_string_set_x still needs to be
bracketed by scm_i_string_start_writing and
scm_i_string_stop_writing.
Legacy interface Legacy interface
@ -74,13 +86,15 @@
- SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately - SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately
calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH
is the same as scm_i_string_length. SCM_STRING_CHARS will throw is the same as scm_i_string_length. SCM_STRING_CHARS will throw
an error for for strings that are not null-terminated. an error for for strings that are not null-terminated. There is
no wide version of this interface.
*/ */
SCM_API SCM scm_string_p (SCM x); SCM_API SCM scm_string_p (SCM x);
SCM_API SCM scm_string (SCM chrs); SCM_API SCM scm_string (SCM chrs);
SCM_API SCM scm_make_string (SCM k, SCM chr); SCM_API SCM scm_make_string (SCM k, SCM chr);
SCM_API SCM scm_string_length (SCM str); SCM_API SCM scm_string_length (SCM str);
SCM_API SCM scm_string_width (SCM str);
SCM_API SCM scm_string_ref (SCM str, SCM k); SCM_API SCM scm_string_ref (SCM str, SCM k);
SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr); SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
SCM_API SCM scm_substring (SCM str, SCM start, SCM end); SCM_API SCM scm_substring (SCM str, SCM start, SCM end);
@ -106,6 +120,9 @@ SCM_API SCM scm_take_locale_string (char *str);
SCM_API SCM scm_take_locale_stringn (char *str, size_t len); SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
SCM_API char *scm_to_locale_string (SCM str); SCM_API char *scm_to_locale_string (SCM str);
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp); SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
const char *encoding,
enum iconv_ilseq_handler handler);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len); SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
SCM_API SCM scm_makfromstrs (int argc, char **argv); SCM_API SCM scm_makfromstrs (int argc, char **argv);
@ -113,15 +130,20 @@ SCM_API SCM scm_makfromstrs (int argc, char **argv);
/* internal accessor functions. Arguments must be valid. */ /* internal accessor functions. Arguments must be valid. */
SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap); SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap);
SCM_INTERNAL SCM scm_i_make_wide_string (size_t len, scm_t_wchar **datap);
SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end); SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end); SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end); SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end); SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end);
SCM_INTERNAL size_t scm_i_string_length (SCM str); SCM_INTERNAL size_t scm_i_string_length (SCM str);
SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str); SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str);
SCM_API const scm_t_wchar *scm_i_string_wide_chars (SCM str);
SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str); SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str);
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void); SCM_INTERNAL void scm_i_string_stop_writing (void);
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
/* internal functions related to symbols. */ /* internal functions related to symbols. */
SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags, SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
@ -133,8 +155,11 @@ SCM_INTERNAL SCM
scm_i_c_take_symbol (char *name, size_t len, scm_i_c_take_symbol (char *name, size_t len,
scm_t_bits flags, unsigned long hash, SCM props); scm_t_bits flags, unsigned long hash, SCM props);
SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym); SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym); SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end); SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
/* internal GC functions. */ /* internal GC functions. */

View file

@ -336,6 +336,7 @@ do { \
#define FETCH() (*ip++) #define FETCH() (*ip++)
#define FETCH_LENGTH(len) do { len=*ip++; len<<=8; len+=*ip++; len<<=8; len+=*ip++; } while (0) #define FETCH_LENGTH(len) do { len=*ip++; len<<=8; len+=*ip++; len<<=8; len+=*ip++; } while (0)
#define FETCH_WIDTH(width) do { width=*ip++; } while (0)
#undef CLOCK #undef CLOCK
#if VM_USE_CLOCK #if VM_USE_CLOCK

View file

@ -72,31 +72,82 @@ VM_DEFINE_LOADER (82, load_number, "load-number")
VM_DEFINE_LOADER (83, load_string, "load-string") VM_DEFINE_LOADER (83, load_string, "load-string")
{ {
size_t len; size_t len;
int width;
SCM str;
FETCH_LENGTH (len); FETCH_LENGTH (len);
FETCH_WIDTH (width);
SYNC_REGISTER (); SYNC_REGISTER ();
PUSH (scm_from_locale_stringn ((char *)ip, len)); if (width == 1)
/* Was: scm_makfromstr (ip, len, 0) */ {
ip += len; char *buf;
str = scm_i_make_string (len, &buf);
memcpy (buf, (char *) ip, len);
}
else if (width == 4)
{
scm_t_wchar *wbuf;
str = scm_i_make_wide_string (len, &wbuf);
memcpy ((char *) wbuf, (char *) ip, len * width);
}
else
SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL);
PUSH (str);
ip += len * width;
NEXT; NEXT;
} }
VM_DEFINE_LOADER (84, load_symbol, "load-symbol") VM_DEFINE_LOADER (84, load_symbol, "load-symbol")
{ {
size_t len; size_t len;
int width;
SCM str;
FETCH_LENGTH (len); FETCH_LENGTH (len);
FETCH_WIDTH (width);
SYNC_REGISTER (); SYNC_REGISTER ();
PUSH (scm_from_locale_symboln ((char *)ip, len)); if (width == 1)
ip += len; {
char *buf;
str = scm_i_make_string (len, &buf);
memcpy (buf, (char *) ip, len);
}
else if (width == 4)
{
scm_t_wchar *wbuf;
str = scm_i_make_wide_string (len, &wbuf);
memcpy ((char *) wbuf, (char *) ip, len * width);
}
else
SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL);
PUSH (scm_string_to_symbol (str));
ip += len * width;
NEXT; NEXT;
} }
VM_DEFINE_LOADER (85, load_keyword, "load-keyword") VM_DEFINE_LOADER (85, load_keyword, "load-keyword")
{ {
size_t len; size_t len;
int width;
SCM str;
FETCH_LENGTH (len); FETCH_LENGTH (len);
FETCH_WIDTH (width);
SYNC_REGISTER (); SYNC_REGISTER ();
PUSH (scm_from_locale_keywordn ((char *)ip, len)); if (width == 1)
ip += len; {
char *buf;
str = scm_i_make_string (len, &buf);
memcpy (buf, (char *) ip, len);
}
else if (width == 4)
{
scm_t_wchar *wbuf;
str = scm_i_make_wide_string (len, &wbuf);
memcpy ((char *) wbuf, (char *) ip, len * width);
}
else
SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL);
PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str)));
ip += len * width;
NEXT; NEXT;
} }
@ -132,13 +183,29 @@ VM_DEFINE_INSTRUCTION (87, link_now, "link-now", 0, 1, 1)
VM_DEFINE_LOADER (88, define, "define") VM_DEFINE_LOADER (88, define, "define")
{ {
SCM sym; SCM str, sym;
size_t len; size_t len;
int width;
FETCH_LENGTH (len); FETCH_LENGTH (len);
FETCH_WIDTH (width);
SYNC_REGISTER (); SYNC_REGISTER ();
sym = scm_from_locale_symboln ((char *)ip, len); if (width == 1)
ip += len; {
char *buf;
str = scm_i_make_string (len, &buf);
memcpy (buf, (char *) ip, len);
}
else if (width == 4)
{
scm_t_wchar *wbuf;
str = scm_i_make_wide_string (len, &wbuf);
memcpy ((char *) wbuf, (char *) ip, len * width);
}
else
SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL);
sym = scm_string_to_symbol (str);
ip += len * width;
SYNC_REGISTER (); SYNC_REGISTER ();
PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T)); PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T));

View file

@ -34,6 +34,10 @@
;; lengths are encoded in 3 bytes ;; lengths are encoded in 3 bytes
(define *len-len* 3) (define *len-len* 3)
;; the number of bytes per string character is encoded in 1 byte
(define *width-len* 1)
(define (byte-length assembly) (define (byte-length assembly)
(pmatch assembly (pmatch assembly
(,label (guard (not (pair? label))) (,label (guard (not (pair? label)))
@ -45,15 +49,15 @@
((load-number ,str) ((load-number ,str)
(+ 1 *len-len* (string-length str))) (+ 1 *len-len* (string-length str)))
((load-string ,str) ((load-string ,str)
(+ 1 *len-len* (string-length str))) (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-symbol ,str) ((load-symbol ,str)
(+ 1 *len-len* (string-length str))) (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-keyword ,str) ((load-keyword ,str)
(+ 1 *len-len* (string-length str))) (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-array ,bv) ((load-array ,bv)
(+ 1 *len-len* (bytevector-length bv))) (+ 1 *len-len* (bytevector-length bv)))
((define ,str) ((define ,str)
(+ 1 *len-len* (string-length str))) (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code) ((load-program ,nargs ,nrest ,nlocs ,labels ,len ,meta . ,code)
(+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0))) (+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
((,inst . _) (guard (>= (instruction-length inst) 0)) ((,inst . _) (guard (>= (instruction-length inst) 0))

View file

@ -65,6 +65,12 @@
(write-byte (logand (ash x -8) 255)) (write-byte (logand (ash x -8) 255))
(write-byte (logand (ash x -16) 255)) (write-byte (logand (ash x -16) 255))
(write-byte (logand (ash x -24) 255))) (write-byte (logand (ash x -24) 255)))
(define (write-uint32 x) (case byte-order
((1234) (write-uint32-le x))
((4321) (write-uint32-be x))
(else (error "unknown endianness" byte-order))))
(define (write-wide-string s)
(string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
(define (write-loader-len len) (define (write-loader-len len)
(write-byte (ash len -16)) (write-byte (ash len -16))
(write-byte (logand (ash len -8) 255)) (write-byte (logand (ash len -8) 255))
@ -72,6 +78,14 @@
(define (write-loader str) (define (write-loader str)
(write-loader-len (string-length str)) (write-loader-len (string-length str))
(write-string str)) (write-string str))
(define (write-sized-loader str)
(let ((len (string-length str))
(wid (string-width str)))
(write-loader-len len)
(write-byte wid)
(if (= wid 4)
(write-wide-string str)
(write-string str))))
(define (write-bytevector bv) (define (write-bytevector bv)
(write-loader-len (bytevector-length bv)) (write-loader-len (bytevector-length bv))
;; Ew! ;; Ew!
@ -89,10 +103,6 @@
(write-uint16 (case byte-order (write-uint16 (case byte-order
((1234) write-uint16-le) ((1234) write-uint16-le)
((4321) write-uint16-be) ((4321) write-uint16-be)
(else (error "unknown endianness" byte-order))))
(write-uint32 (case byte-order
((1234) write-uint32-le)
((4321) write-uint32-be)
(else (error "unknown endianness" byte-order))))) (else (error "unknown endianness" byte-order)))))
(let ((opcode (instruction->opcode inst)) (let ((opcode (instruction->opcode inst))
(len (instruction-length inst))) (len (instruction-length inst)))
@ -126,11 +136,11 @@
((load-unsigned-integer ,str) (write-loader str)) ((load-unsigned-integer ,str) (write-loader str))
((load-integer ,str) (write-loader str)) ((load-integer ,str) (write-loader str))
((load-number ,str) (write-loader str)) ((load-number ,str) (write-loader str))
((load-string ,str) (write-loader str)) ((load-string ,str) (write-sized-loader str))
((load-symbol ,str) (write-loader str)) ((load-symbol ,str) (write-sized-loader str))
((load-keyword ,str) (write-loader str)) ((load-keyword ,str) (write-sized-loader str))
((load-array ,bv) (write-bytevector bv)) ((load-array ,bv) (write-bytevector bv))
((define ,str) (write-loader str)) ((define ,str) (write-sized-loader str))
((br ,l) (write-break l)) ((br ,l) (write-break l))
((br-if ,l) (write-break l)) ((br-if ,l) (write-break l))
((br-if-not ,l) (write-break l)) ((br-if-not ,l) (write-break l))

View file

@ -79,15 +79,15 @@
(char->integer #\1) (char->integer #\4))) (char->integer #\1) (char->integer #\4)))
(comp-test '(load-string "foo") (comp-test '(load-string "foo")
(vector 'load-string 0 0 3 (char->integer #\f) (char->integer #\o) (vector 'load-string 0 0 3 1 (char->integer #\f) (char->integer #\o)
(char->integer #\o))) (char->integer #\o)))
(comp-test '(load-symbol "foo") (comp-test '(load-symbol "foo")
(vector 'load-symbol 0 0 3 (char->integer #\f) (char->integer #\o) (vector 'load-symbol 0 0 3 1 (char->integer #\f) (char->integer #\o)
(char->integer #\o))) (char->integer #\o)))
(comp-test '(load-keyword "qux") (comp-test '(load-keyword "qux")
(vector 'load-keyword 0 0 3 (char->integer #\q) (char->integer #\u) (vector 'load-keyword 0 0 3 1 (char->integer #\q) (char->integer #\u)
(char->integer #\x))) (char->integer #\x)))
(comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return)) (comp-test '(load-program 3 2 1 () 3 #f (make-int8 3) (return))