mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-02 13:00:26 +02:00
Stringbufs immutable by default
* libguile/snarf.h (SCM_IMMUTABLE_STRINGBUF): Remove shared flag. Stringbufs are immutable by default. * libguile/strings.c: Rewrite blurb. Change to have stringbufs be immutable by default and mutable only when marked as such. Going mutable means making a private copy. (STRINGBUF_MUTABLE, STRINGBUF_F_MUTABLE): New definitions. (SET_STRINGBUF_SHARED): Remove. (scm_i_print_stringbuf): Simplify to just alias the stringbuf as-is. (substring_with_immutable_stringbuf): New helper. (scm_i_substring, scm_i_substring_read_only, scm_i_substring_copy): use new helper. (scm_i_string_ensure_mutable_x): New helper. (scm_i_substring_shared): Use scm_i_string_ensure_mutable_x. (stringbuf_write_mutex): Remove; yaaaaaaaay. (scm_i_string_start_writing): Use scm_i_string_ensure_mutable_x. No more mutex. (scm_i_string_stop_writing): Now a no-op. (scm_i_make_symbol): Use substring/copy. (scm_sys_string_dump, scm_sys_symbol_dump): Update. * libguile/strings.h (SCM_I_STRINGBUF_F_SHARED): Remove. (SCM_I_STRINGBUF_F_MUTABLE): Add. * module/system/vm/assembler.scm (link-data): Don't add shared flag any more. Existing compiled flags are harmless tho. * test-suite/tests/strings.test ("string internals"): Update.
This commit is contained in:
parent
c38b9625c8
commit
d0934df1f2
5 changed files with 158 additions and 214 deletions
|
@ -308,7 +308,7 @@ SCM_SNARF_INIT(scm_set_smob_apply((tag), (c_name), (req), (opt), (rest));)
|
||||||
} \
|
} \
|
||||||
c_name = \
|
c_name = \
|
||||||
{ \
|
{ \
|
||||||
scm_tc7_stringbuf | SCM_I_STRINGBUF_F_SHARED, \
|
scm_tc7_stringbuf, \
|
||||||
sizeof (contents) - 1, \
|
sizeof (contents) - 1, \
|
||||||
contents \
|
contents \
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,40 +54,34 @@ SCM_SYMBOL (sym_UTF_8, "UTF-8");
|
||||||
SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
|
SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
|
||||||
SCM_SYMBOL (sym_error, "error");
|
SCM_SYMBOL (sym_error, "error");
|
||||||
|
|
||||||
/* Stringbufs
|
/* A stringbuf is a linear buffer of characters. Every string has a
|
||||||
*
|
stringbuf. Strings may reference just a slice of a stringbuf; that's
|
||||||
* XXX - keeping an accurate refcount during GC seems to be quite
|
often the case for strings made by the "substring" function.
|
||||||
* tricky, so we just keep score of whether a stringbuf might be
|
|
||||||
* shared, not whether it definitely is.
|
Stringbufs may hold either 8-bit characters or 32-bit characters. In
|
||||||
*
|
either case the characters are Unicode codepoints. "Narrow"
|
||||||
* The scheme I (mvo) tried to keep an accurate reference count would
|
stringbufs thus have the ISO-8859-1 (Latin-1) encoding, and "wide"
|
||||||
* recount all strings that point to a stringbuf during the mark-phase
|
stringbufs have the UTF-32 (UCS-4) encoding.
|
||||||
* of the GC. This was done since one cannot access the stringbuf of
|
|
||||||
* a string when that string is freed (in order to decrease the
|
By default, stringbufs are immutable. This enables an O(1)
|
||||||
* reference count). The memory of the stringbuf might have been
|
"substring" operation with no synchronization. A string-set! will
|
||||||
* reused already for something completely different.
|
first ensure that the string's stringbuf is mutable, copying the
|
||||||
*
|
stringbuf if necessary. This is therefore a copy-on-write
|
||||||
* This recounted worked for a small number of threads beating on
|
representation. However, taking a substring of a mutable stringbuf
|
||||||
* cow-strings, but it failed randomly with more than 10 threads, say.
|
is an O(n) operation as it has to create a new immutable stringbuf.
|
||||||
* I couldn't figure out what went wrong, so I used the conservative
|
There are also mutation-sharing substrings as well. */
|
||||||
* approach implemented below.
|
|
||||||
*
|
|
||||||
* There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
|
|
||||||
* strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
|
|
||||||
* strings.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* The size in words of the stringbuf header (type tag + size). */
|
/* The size in words of the stringbuf header (type tag + size). */
|
||||||
#define STRINGBUF_HEADER_SIZE 2U
|
#define STRINGBUF_HEADER_SIZE 2U
|
||||||
|
|
||||||
#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
|
#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
|
||||||
|
|
||||||
#define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
|
|
||||||
#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
|
#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
|
||||||
|
#define STRINGBUF_F_MUTABLE SCM_I_STRINGBUF_F_MUTABLE
|
||||||
|
|
||||||
#define STRINGBUF_TAG scm_tc7_stringbuf
|
#define STRINGBUF_TAG scm_tc7_stringbuf
|
||||||
#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
|
|
||||||
#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
|
#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
|
||||||
|
#define STRINGBUF_MUTABLE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_MUTABLE)
|
||||||
|
|
||||||
#define STRINGBUF_CONTENTS(buf) ((void *) \
|
#define STRINGBUF_CONTENTS(buf) ((void *) \
|
||||||
SCM_CELL_OBJECT_LOC (buf, \
|
SCM_CELL_OBJECT_LOC (buf, \
|
||||||
|
@ -97,16 +91,6 @@ SCM_SYMBOL (sym_error, "error");
|
||||||
|
|
||||||
#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
|
#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
|
||||||
|
|
||||||
#define SET_STRINGBUF_SHARED(buf) \
|
|
||||||
do \
|
|
||||||
{ \
|
|
||||||
/* Don't modify BUF if it's already marked as shared since it might be \
|
|
||||||
a read-only, statically allocated stringbuf. */ \
|
|
||||||
if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
|
|
||||||
SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
|
|
||||||
} \
|
|
||||||
while (0)
|
|
||||||
|
|
||||||
#ifdef SCM_STRING_LENGTH_HISTOGRAM
|
#ifdef SCM_STRING_LENGTH_HISTOGRAM
|
||||||
static size_t lenhist[1001];
|
static size_t lenhist[1001];
|
||||||
#endif
|
#endif
|
||||||
|
@ -231,8 +215,6 @@ narrow_stringbuf (SCM buf)
|
||||||
return new_buf;
|
return new_buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
|
|
||||||
|
|
||||||
|
|
||||||
/* Copy-on-write strings.
|
/* Copy-on-write strings.
|
||||||
*/
|
*/
|
||||||
|
@ -267,15 +249,8 @@ scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
|
||||||
void
|
void
|
||||||
scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate)
|
scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate)
|
||||||
{
|
{
|
||||||
SCM str;
|
SCM str = scm_double_cell (STRING_TAG, SCM_UNPACK(exp), 0,
|
||||||
|
STRINGBUF_LENGTH (exp));
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
SET_STRINGBUF_SHARED (exp);
|
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
|
|
||||||
str = scm_double_cell (RO_STRING_TAG, SCM_UNPACK(exp),
|
|
||||||
0, STRINGBUF_LENGTH (exp));
|
|
||||||
|
|
||||||
scm_puts ("#<stringbuf ", port);
|
scm_puts ("#<stringbuf ", port);
|
||||||
scm_iprin1 (str, port, pstate);
|
scm_iprin1 (str, port, pstate);
|
||||||
scm_puts (">", port);
|
scm_puts (">", port);
|
||||||
|
@ -289,7 +264,6 @@ static void
|
||||||
init_null_stringbuf (void)
|
init_null_stringbuf (void)
|
||||||
{
|
{
|
||||||
null_stringbuf = make_stringbuf (0);
|
null_stringbuf = make_stringbuf (0);
|
||||||
SET_STRINGBUF_SHARED (null_stringbuf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
|
/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
|
||||||
|
@ -359,77 +333,110 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start)
|
||||||
*buf = STRING_STRINGBUF (*str);
|
*buf = STRING_STRINGBUF (*str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SCM
|
||||||
|
substring_with_immutable_stringbuf (SCM str, size_t start, size_t end,
|
||||||
|
int force_copy_p, int read_only_p)
|
||||||
|
{
|
||||||
|
SCM buf;
|
||||||
|
size_t str_start, len;
|
||||||
|
scm_t_bits tag = read_only_p ? RO_STRING_TAG : STRING_TAG;
|
||||||
|
|
||||||
|
get_str_buf_start (&str, &buf, &str_start);
|
||||||
|
len = end - start;
|
||||||
|
start += str_start;
|
||||||
|
|
||||||
|
if (len == 0)
|
||||||
|
return scm_i_make_string (0, NULL, read_only_p);
|
||||||
|
else if (!force_copy_p && SCM_LIKELY (!STRINGBUF_MUTABLE (buf)))
|
||||||
|
return scm_double_cell (tag, SCM_UNPACK (buf), start, len);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SCM new_buf, new_str;
|
||||||
|
|
||||||
|
if (STRINGBUF_WIDE (buf))
|
||||||
|
{
|
||||||
|
new_buf = make_wide_stringbuf (len);
|
||||||
|
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
|
||||||
|
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + start), len);
|
||||||
|
new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len);
|
||||||
|
scm_i_try_narrow_string (new_str);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
new_buf = make_stringbuf (len);
|
||||||
|
memcpy (STRINGBUF_CHARS (new_buf),
|
||||||
|
STRINGBUF_CHARS (buf) + start, len);
|
||||||
|
new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new_str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
scm_i_substring (SCM str, size_t start, size_t end)
|
scm_i_substring (SCM str, size_t start, size_t end)
|
||||||
{
|
{
|
||||||
if (start == end)
|
return substring_with_immutable_stringbuf (str, start, end, 0, 0);
|
||||||
return scm_i_make_string (0, NULL, 0);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SCM buf;
|
|
||||||
size_t str_start;
|
|
||||||
get_str_buf_start (&str, &buf, &str_start);
|
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
SET_STRINGBUF_SHARED (buf);
|
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
|
|
||||||
(scm_t_bits)str_start + start,
|
|
||||||
(scm_t_bits) end - start);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
scm_i_substring_read_only (SCM str, size_t start, size_t end)
|
scm_i_substring_read_only (SCM str, size_t start, size_t end)
|
||||||
{
|
{
|
||||||
if (start == end)
|
return substring_with_immutable_stringbuf (str, start, end, 0, 1);
|
||||||
return scm_i_make_string (0, NULL, 1);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SCM buf;
|
|
||||||
size_t str_start;
|
|
||||||
get_str_buf_start (&str, &buf, &str_start);
|
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
SET_STRINGBUF_SHARED (buf);
|
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
|
|
||||||
(scm_t_bits)str_start + start,
|
|
||||||
(scm_t_bits) end - start);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
scm_i_substring_copy (SCM str, size_t start, size_t end)
|
scm_i_substring_copy (SCM str, size_t start, size_t end)
|
||||||
{
|
{
|
||||||
if (start == end)
|
return substring_with_immutable_stringbuf (str, start, end, 1, 0);
|
||||||
return scm_i_make_string (0, NULL, 0);
|
}
|
||||||
else
|
|
||||||
|
static void
|
||||||
|
scm_i_string_ensure_mutable_x (SCM str)
|
||||||
|
{
|
||||||
|
SCM buf;
|
||||||
|
|
||||||
|
if (IS_SH_STRING (str))
|
||||||
{
|
{
|
||||||
size_t len = end - start;
|
/* Shared-mutation strings always have mutable stringbufs. */
|
||||||
SCM buf, my_buf, substr;
|
buf = STRING_STRINGBUF (SH_STRING_STRING (str));
|
||||||
size_t str_start;
|
if (!STRINGBUF_MUTABLE (buf))
|
||||||
int wide = 0;
|
abort ();
|
||||||
get_str_buf_start (&str, &buf, &str_start);
|
return;
|
||||||
if (scm_i_is_narrow_string (str))
|
|
||||||
{
|
|
||||||
my_buf = make_stringbuf (len);
|
|
||||||
memcpy (STRINGBUF_CHARS (my_buf),
|
|
||||||
STRINGBUF_CHARS (buf) + str_start + start, len);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
my_buf = make_wide_stringbuf (len);
|
|
||||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
|
|
||||||
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
|
|
||||||
+ start), len);
|
|
||||||
wide = 1;
|
|
||||||
}
|
|
||||||
scm_remember_upto_here_1 (buf);
|
|
||||||
substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
|
|
||||||
(scm_t_bits) 0, (scm_t_bits) len);
|
|
||||||
if (wide)
|
|
||||||
scm_i_try_narrow_string (substr);
|
|
||||||
return substr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (IS_RO_STRING (str))
|
||||||
|
scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (str));
|
||||||
|
|
||||||
|
buf = STRING_STRINGBUF (str);
|
||||||
|
|
||||||
|
if (STRINGBUF_MUTABLE (buf))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Otherwise copy and mark the fresh stringbuf as mutable. Note that
|
||||||
|
we copy the whole stringbuf so that the start/len offsets from the
|
||||||
|
original string keep working, so that concurrent accessors on this
|
||||||
|
string don't see things in an inconsistent state. */
|
||||||
|
{
|
||||||
|
SCM new_buf;
|
||||||
|
size_t len = STRINGBUF_LENGTH (buf);
|
||||||
|
|
||||||
|
if (STRINGBUF_WIDE (buf))
|
||||||
|
{
|
||||||
|
new_buf = make_wide_stringbuf (len);
|
||||||
|
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
|
||||||
|
(scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
new_buf = make_stringbuf (len);
|
||||||
|
memcpy (STRINGBUF_CHARS (new_buf), STRINGBUF_CHARS (buf), len);
|
||||||
|
}
|
||||||
|
|
||||||
|
SCM_SET_CELL_WORD_0 (new_buf,
|
||||||
|
SCM_CELL_WORD_0 (new_buf) | STRINGBUF_F_MUTABLE);
|
||||||
|
SET_STRING_STRINGBUF (str, new_buf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SCM
|
SCM
|
||||||
|
@ -439,6 +446,8 @@ scm_i_substring_shared (SCM str, size_t start, size_t end)
|
||||||
return str;
|
return str;
|
||||||
else if (start == end)
|
else if (start == end)
|
||||||
return scm_i_make_string (0, NULL, 0);
|
return scm_i_make_string (0, NULL, 0);
|
||||||
|
else if (IS_RO_STRING (str))
|
||||||
|
return scm_i_substring_read_only (str, start, end);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
size_t len = end - start;
|
size_t len = end - start;
|
||||||
|
@ -447,6 +456,9 @@ scm_i_substring_shared (SCM str, size_t start, size_t end)
|
||||||
start += STRING_START (str);
|
start += STRING_START (str);
|
||||||
str = SH_STRING_STRING (str);
|
str = SH_STRING_STRING (str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scm_i_string_ensure_mutable_x (str);
|
||||||
|
|
||||||
return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
|
return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
|
||||||
(scm_t_bits)start, (scm_t_bits) len);
|
(scm_t_bits)start, (scm_t_bits) len);
|
||||||
}
|
}
|
||||||
|
@ -568,60 +580,13 @@ scm_i_string_wide_chars (SCM str)
|
||||||
scm_list_1 (str));
|
scm_list_1 (str));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
|
/* If the buffer in ORIG_STR is immutable, copy ORIG_STR's characters to
|
||||||
a new string buffer, so that it can be modified without modifying
|
a new string buffer, so that it can be modified without modifying
|
||||||
other strings. Also, lock the string mutex. Later, one must call
|
other strings. */
|
||||||
scm_i_string_stop_writing to unlock the mutex. */
|
|
||||||
SCM
|
SCM
|
||||||
scm_i_string_start_writing (SCM orig_str)
|
scm_i_string_start_writing (SCM orig_str)
|
||||||
{
|
{
|
||||||
SCM buf, str = orig_str;
|
scm_i_string_ensure_mutable_x (orig_str);
|
||||||
size_t start;
|
|
||||||
|
|
||||||
get_str_buf_start (&str, &buf, &start);
|
|
||||||
if (IS_RO_STRING (str))
|
|
||||||
scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
|
|
||||||
|
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
if (STRINGBUF_SHARED (buf))
|
|
||||||
{
|
|
||||||
/* Clone the stringbuf. */
|
|
||||||
size_t len = STRING_LENGTH (str);
|
|
||||||
SCM new_buf;
|
|
||||||
|
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
|
|
||||||
if (scm_i_is_narrow_string (str))
|
|
||||||
{
|
|
||||||
new_buf = make_stringbuf (len);
|
|
||||||
memcpy (STRINGBUF_CHARS (new_buf),
|
|
||||||
STRINGBUF_CHARS (buf) + STRING_START (str), len);
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
new_buf = make_wide_stringbuf (len);
|
|
||||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
|
|
||||||
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
|
|
||||||
+ STRING_START (str)), len);
|
|
||||||
}
|
|
||||||
|
|
||||||
SET_STRING_STRINGBUF (str, new_buf);
|
|
||||||
start -= STRING_START (str);
|
|
||||||
|
|
||||||
/* FIXME: The following operations are not atomic, so other threads
|
|
||||||
looking at STR may see an inconsistent state. Nevertheless it can't
|
|
||||||
hurt much since (i) accessing STR while it is being mutated can't
|
|
||||||
yield a crash, and (ii) concurrent accesses to STR should be
|
|
||||||
protected by a mutex at the application level. The latter may not
|
|
||||||
apply when STR != ORIG_STR, though. */
|
|
||||||
SET_STRING_START (str, 0);
|
|
||||||
SET_STRING_STRINGBUF (str, new_buf);
|
|
||||||
|
|
||||||
buf = new_buf;
|
|
||||||
|
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
}
|
|
||||||
return orig_str;
|
return orig_str;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -661,7 +626,6 @@ scm_i_string_writable_wide_chars (SCM str)
|
||||||
void
|
void
|
||||||
scm_i_string_stop_writing (void)
|
scm_i_string_stop_writing (void)
|
||||||
{
|
{
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the Xth character of STR as a UCS-4 codepoint. */
|
/* Return the Xth character of STR as a UCS-4 codepoint. */
|
||||||
|
@ -768,42 +732,10 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
|
||||||
unsigned long hash, SCM props)
|
unsigned long hash, SCM props)
|
||||||
{
|
{
|
||||||
SCM buf;
|
SCM buf;
|
||||||
size_t start = STRING_START (name);
|
|
||||||
size_t length = STRING_LENGTH (name);
|
size_t length = STRING_LENGTH (name);
|
||||||
|
|
||||||
if (IS_SH_STRING (name))
|
name = scm_i_substring_copy (name, 0, length);
|
||||||
{
|
|
||||||
name = SH_STRING_STRING (name);
|
|
||||||
start += STRING_START (name);
|
|
||||||
}
|
|
||||||
buf = STRING_STRINGBUF (name);
|
buf = STRING_STRINGBUF (name);
|
||||||
|
|
||||||
if (start == 0 && length == STRINGBUF_LENGTH (buf))
|
|
||||||
{
|
|
||||||
/* reuse buf. */
|
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
SET_STRINGBUF_SHARED (buf);
|
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* make new buf. */
|
|
||||||
if (scm_i_is_narrow_string (name))
|
|
||||||
{
|
|
||||||
SCM new_buf = make_stringbuf (length);
|
|
||||||
memcpy (STRINGBUF_CHARS (new_buf),
|
|
||||||
STRINGBUF_CHARS (buf) + start, length);
|
|
||||||
buf = new_buf;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SCM new_buf = make_wide_stringbuf (length);
|
|
||||||
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
|
|
||||||
(scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
|
|
||||||
length);
|
|
||||||
buf = new_buf;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
|
return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
|
||||||
(scm_t_bits) hash, SCM_UNPACK (props));
|
(scm_t_bits) hash, SCM_UNPACK (props));
|
||||||
}
|
}
|
||||||
|
@ -882,9 +814,6 @@ SCM
|
||||||
scm_i_symbol_substring (SCM sym, size_t start, size_t end)
|
scm_i_symbol_substring (SCM sym, size_t start, size_t end)
|
||||||
{
|
{
|
||||||
SCM buf = SYMBOL_STRINGBUF (sym);
|
SCM buf = SYMBOL_STRINGBUF (sym);
|
||||||
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
|
|
||||||
SET_STRINGBUF_SHARED (buf);
|
|
||||||
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
|
|
||||||
return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
|
return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
|
||||||
(scm_t_bits)start, (scm_t_bits) end - start);
|
(scm_t_bits)start, (scm_t_bits) end - start);
|
||||||
}
|
}
|
||||||
|
@ -921,8 +850,8 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
|
||||||
"A new string containing this string's stringbuf's characters\n"
|
"A new string containing this string's stringbuf's characters\n"
|
||||||
"@item stringbuf-length\n"
|
"@item stringbuf-length\n"
|
||||||
"The number of characters in this stringbuf\n"
|
"The number of characters in this stringbuf\n"
|
||||||
"@item stringbuf-shared\n"
|
"@item stringbuf-mutable\n"
|
||||||
"@code{#t} if this stringbuf is shared\n"
|
"@code{#t} if this stringbuf is mutable\n"
|
||||||
"@item stringbuf-wide\n"
|
"@item stringbuf-wide\n"
|
||||||
"@code{#t} if this stringbuf's characters are stored in a\n"
|
"@code{#t} if this stringbuf's characters are stored in a\n"
|
||||||
"32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
|
"32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
|
||||||
|
@ -984,11 +913,11 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
|
||||||
}
|
}
|
||||||
e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
|
e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
|
||||||
scm_from_size_t (STRINGBUF_LENGTH (buf)));
|
scm_from_size_t (STRINGBUF_LENGTH (buf)));
|
||||||
if (STRINGBUF_SHARED (buf))
|
if (STRINGBUF_MUTABLE (buf))
|
||||||
e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
|
e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
|
||||||
SCM_BOOL_T);
|
SCM_BOOL_T);
|
||||||
else
|
else
|
||||||
e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
|
e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
|
||||||
SCM_BOOL_F);
|
SCM_BOOL_F);
|
||||||
if (STRINGBUF_WIDE (buf))
|
if (STRINGBUF_WIDE (buf))
|
||||||
e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
|
e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
|
||||||
|
@ -1015,8 +944,8 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
|
||||||
"A new string containing this symbols's stringbuf's characters\n"
|
"A new string containing this symbols's stringbuf's characters\n"
|
||||||
"@item stringbuf-length\n"
|
"@item stringbuf-length\n"
|
||||||
"The number of characters in this stringbuf\n"
|
"The number of characters in this stringbuf\n"
|
||||||
"@item stringbuf-shared\n"
|
"@item stringbuf-mutable\n"
|
||||||
"@code{#t} if this stringbuf is shared\n"
|
"@code{#t} if this stringbuf is mutable\n"
|
||||||
"@item stringbuf-wide\n"
|
"@item stringbuf-wide\n"
|
||||||
"@code{#t} if this stringbuf's characters are stored in a\n"
|
"@code{#t} if this stringbuf's characters are stored in a\n"
|
||||||
"32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
|
"32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
|
||||||
|
@ -1057,11 +986,11 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
|
||||||
}
|
}
|
||||||
e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
|
e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
|
||||||
scm_from_size_t (STRINGBUF_LENGTH (buf)));
|
scm_from_size_t (STRINGBUF_LENGTH (buf)));
|
||||||
if (STRINGBUF_SHARED (buf))
|
if (STRINGBUF_MUTABLE (buf))
|
||||||
e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
|
e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
|
||||||
SCM_BOOL_T);
|
SCM_BOOL_T);
|
||||||
else
|
else
|
||||||
e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
|
e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
|
||||||
SCM_BOOL_F);
|
SCM_BOOL_F);
|
||||||
if (STRINGBUF_WIDE (buf))
|
if (STRINGBUF_WIDE (buf))
|
||||||
e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
|
e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
|
||||||
|
|
|
@ -182,8 +182,8 @@ SCM_API SCM scm_makfromstrs (int argc, char **argv);
|
||||||
#define scm_tc7_ro_string (scm_tc7_string + 0x200)
|
#define scm_tc7_ro_string (scm_tc7_string + 0x200)
|
||||||
|
|
||||||
/* Flags for shared and wide strings. */
|
/* Flags for shared and wide strings. */
|
||||||
#define SCM_I_STRINGBUF_F_SHARED 0x100
|
|
||||||
#define SCM_I_STRINGBUF_F_WIDE 0x400
|
#define SCM_I_STRINGBUF_F_WIDE 0x400
|
||||||
|
#define SCM_I_STRINGBUF_F_MUTABLE 0x800
|
||||||
|
|
||||||
SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port,
|
SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port,
|
||||||
scm_print_state *pstate);
|
scm_print_state *pstate);
|
||||||
|
|
|
@ -1385,13 +1385,10 @@ should be .data or .rodata), and return the resulting linker object.
|
||||||
(modulo (- alignment (modulo address alignment)) alignment)))
|
(modulo (- alignment (modulo address alignment)) alignment)))
|
||||||
|
|
||||||
(define tc7-vector 13)
|
(define tc7-vector 13)
|
||||||
(define stringbuf-shared-flag #x100)
|
|
||||||
(define stringbuf-wide-flag #x400)
|
(define stringbuf-wide-flag #x400)
|
||||||
(define tc7-stringbuf 39)
|
(define tc7-stringbuf 39)
|
||||||
(define tc7-narrow-stringbuf
|
(define tc7-narrow-stringbuf tc7-stringbuf)
|
||||||
(+ tc7-stringbuf stringbuf-shared-flag))
|
(define tc7-wide-stringbuf (+ tc7-stringbuf stringbuf-wide-flag))
|
||||||
(define tc7-wide-stringbuf
|
|
||||||
(+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
|
|
||||||
(define tc7-ro-string (+ 21 #x200))
|
(define tc7-ro-string (+ 21 #x200))
|
||||||
(define tc7-program 69)
|
(define tc7-program 69)
|
||||||
(define tc7-bytevector 77)
|
(define tc7-bytevector 77)
|
||||||
|
|
|
@ -111,27 +111,45 @@
|
||||||
(not (eq? (assq-ref (%string-dump s2) 'shared)
|
(not (eq? (assq-ref (%string-dump s2) 'shared)
|
||||||
s1))))
|
s1))))
|
||||||
|
|
||||||
(pass-if "ASCII substrings share stringbufs before copy-on-write"
|
(pass-if "ASCII substrings immutable before copy-on-write"
|
||||||
(let* ((s1 "foobar")
|
(let* ((s1 "foobar")
|
||||||
(s2 (substring s1 0 3)))
|
(s2 (substring s1 0 3)))
|
||||||
(assq-ref (%string-dump s1) 'stringbuf-shared)))
|
(and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
|
||||||
|
(not (assq-ref (%string-dump s2) 'stringbuf-mutable)))))
|
||||||
|
|
||||||
(pass-if "BMP substrings share stringbufs before copy-on-write"
|
(pass-if "BMP substrings immutable before copy-on-write"
|
||||||
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
|
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
|
||||||
(s2 (substring s1 0 3)))
|
(s2 (substring s1 0 3)))
|
||||||
(assq-ref (%string-dump s1) 'stringbuf-shared)))
|
(and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
|
||||||
|
(not (assq-ref (%string-dump s2) 'stringbuf-mutable)))))
|
||||||
|
|
||||||
(pass-if "ASCII substrings don't share stringbufs after copy-on-write"
|
(pass-if "ASCII base string still immutable after copy-on-write"
|
||||||
(let* ((s1 "foobar")
|
(let* ((s1 "foobar")
|
||||||
(s2 (substring s1 0 3)))
|
(s2 (substring s1 0 3)))
|
||||||
(string-set! s2 0 #\F)
|
(string-set! s2 0 #\F)
|
||||||
(not (assq-ref (%string-dump s2) 'stringbuf-shared))))
|
(and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
|
||||||
|
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
|
||||||
|
|
||||||
(pass-if "BMP substrings don't share stringbufs after copy-on-write"
|
(pass-if "BMP base string still immutable after copy-on-write"
|
||||||
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
|
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
|
||||||
(s2 (substring s1 0 3)))
|
(s2 (substring s1 0 3)))
|
||||||
(string-set! s2 0 #\F)
|
(string-set! s2 0 #\F)
|
||||||
(not (assq-ref (%string-dump s2) 'stringbuf-shared))))
|
(and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
|
||||||
|
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
|
||||||
|
|
||||||
|
(pass-if "ASCII substrings mutable after shared mutation"
|
||||||
|
(let* ((s1 "foobar")
|
||||||
|
(s2 (substring/shared s1 0 3)))
|
||||||
|
(string-set! s2 0 #\F)
|
||||||
|
(and (assq-ref (%string-dump s1) 'stringbuf-mutable)
|
||||||
|
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
|
||||||
|
|
||||||
|
(pass-if "BMP substrings mutable after shared mutation"
|
||||||
|
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
|
||||||
|
(s2 (substring/shared s1 0 3)))
|
||||||
|
(string-set! s2 0 #\F)
|
||||||
|
(and (assq-ref (%string-dump s1) 'stringbuf-mutable)
|
||||||
|
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
|
||||||
|
|
||||||
(with-test-prefix "encodings"
|
(with-test-prefix "encodings"
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue