1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-02 13:00:26 +02:00

Stringbufs immutable by default

* libguile/snarf.h (SCM_IMMUTABLE_STRINGBUF): Remove shared flag.
  Stringbufs are immutable by default.
* libguile/strings.c: Rewrite blurb.  Change to have stringbufs be
  immutable by default and mutable only when marked as such.  Going
  mutable means making a private copy.
  (STRINGBUF_MUTABLE, STRINGBUF_F_MUTABLE): New definitions.
  (SET_STRINGBUF_SHARED): Remove.
  (scm_i_print_stringbuf): Simplify to just alias the stringbuf as-is.
  (substring_with_immutable_stringbuf): New helper.
  (scm_i_substring, scm_i_substring_read_only, scm_i_substring_copy):
  use new helper.
  (scm_i_string_ensure_mutable_x): New helper.
  (scm_i_substring_shared): Use scm_i_string_ensure_mutable_x.
  (stringbuf_write_mutex): Remove; yaaaaaaaay.
  (scm_i_string_start_writing): Use scm_i_string_ensure_mutable_x.  No
  more mutex.
  (scm_i_string_stop_writing): Now a no-op.
  (scm_i_make_symbol): Use substring/copy.
  (scm_sys_string_dump, scm_sys_symbol_dump): Update.
* libguile/strings.h (SCM_I_STRINGBUF_F_SHARED): Remove.
  (SCM_I_STRINGBUF_F_MUTABLE): Add.
* module/system/vm/assembler.scm (link-data): Don't add shared flag any
  more.  Existing compiled flags are harmless tho.
* test-suite/tests/strings.test ("string internals"): Update.
This commit is contained in:
Andy Wingo 2017-02-16 12:57:46 +01:00
parent c38b9625c8
commit d0934df1f2
5 changed files with 158 additions and 214 deletions

View file

@ -308,7 +308,7 @@ SCM_SNARF_INIT(scm_set_smob_apply((tag), (c_name), (req), (opt), (rest));)
} \ } \
c_name = \ c_name = \
{ \ { \
scm_tc7_stringbuf | SCM_I_STRINGBUF_F_SHARED, \ scm_tc7_stringbuf, \
sizeof (contents) - 1, \ sizeof (contents) - 1, \
contents \ contents \
} }

View file

@ -54,40 +54,34 @@ SCM_SYMBOL (sym_UTF_8, "UTF-8");
SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1"); SCM_SYMBOL (sym_ISO_8859_1, "ISO-8859-1");
SCM_SYMBOL (sym_error, "error"); SCM_SYMBOL (sym_error, "error");
/* Stringbufs /* A stringbuf is a linear buffer of characters. Every string has a
* stringbuf. Strings may reference just a slice of a stringbuf; that's
* XXX - keeping an accurate refcount during GC seems to be quite often the case for strings made by the "substring" function.
* tricky, so we just keep score of whether a stringbuf might be
* shared, not whether it definitely is. Stringbufs may hold either 8-bit characters or 32-bit characters. In
* either case the characters are Unicode codepoints. "Narrow"
* The scheme I (mvo) tried to keep an accurate reference count would stringbufs thus have the ISO-8859-1 (Latin-1) encoding, and "wide"
* recount all strings that point to a stringbuf during the mark-phase stringbufs have the UTF-32 (UCS-4) encoding.
* of the GC. This was done since one cannot access the stringbuf of
* a string when that string is freed (in order to decrease the By default, stringbufs are immutable. This enables an O(1)
* reference count). The memory of the stringbuf might have been "substring" operation with no synchronization. A string-set! will
* reused already for something completely different. first ensure that the string's stringbuf is mutable, copying the
* stringbuf if necessary. This is therefore a copy-on-write
* This recounted worked for a small number of threads beating on representation. However, taking a substring of a mutable stringbuf
* cow-strings, but it failed randomly with more than 10 threads, say. is an O(n) operation as it has to create a new immutable stringbuf.
* I couldn't figure out what went wrong, so I used the conservative There are also mutation-sharing substrings as well. */
* approach implemented below.
*
* There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
* strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
* strings.
*/
/* The size in words of the stringbuf header (type tag + size). */ /* The size in words of the stringbuf header (type tag + size). */
#define STRINGBUF_HEADER_SIZE 2U #define STRINGBUF_HEADER_SIZE 2U
#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM)) #define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
#define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE #define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
#define STRINGBUF_F_MUTABLE SCM_I_STRINGBUF_F_MUTABLE
#define STRINGBUF_TAG scm_tc7_stringbuf #define STRINGBUF_TAG scm_tc7_stringbuf
#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE) #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
#define STRINGBUF_MUTABLE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_MUTABLE)
#define STRINGBUF_CONTENTS(buf) ((void *) \ #define STRINGBUF_CONTENTS(buf) ((void *) \
SCM_CELL_OBJECT_LOC (buf, \ SCM_CELL_OBJECT_LOC (buf, \
@ -97,16 +91,6 @@ SCM_SYMBOL (sym_error, "error");
#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf)) #define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
#define SET_STRINGBUF_SHARED(buf) \
do \
{ \
/* Don't modify BUF if it's already marked as shared since it might be \
a read-only, statically allocated stringbuf. */ \
if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
} \
while (0)
#ifdef SCM_STRING_LENGTH_HISTOGRAM #ifdef SCM_STRING_LENGTH_HISTOGRAM
static size_t lenhist[1001]; static size_t lenhist[1001];
#endif #endif
@ -231,8 +215,6 @@ narrow_stringbuf (SCM buf)
return new_buf; return new_buf;
} }
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
/* Copy-on-write strings. /* Copy-on-write strings.
*/ */
@ -267,15 +249,8 @@ scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
void void
scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate) scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate)
{ {
SCM str; SCM str = scm_double_cell (STRING_TAG, SCM_UNPACK(exp), 0,
STRINGBUF_LENGTH (exp));
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (exp);
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
str = scm_double_cell (RO_STRING_TAG, SCM_UNPACK(exp),
0, STRINGBUF_LENGTH (exp));
scm_puts ("#<stringbuf ", port); scm_puts ("#<stringbuf ", port);
scm_iprin1 (str, port, pstate); scm_iprin1 (str, port, pstate);
scm_puts (">", port); scm_puts (">", port);
@ -289,7 +264,6 @@ static void
init_null_stringbuf (void) init_null_stringbuf (void)
{ {
null_stringbuf = make_stringbuf (0); null_stringbuf = make_stringbuf (0);
SET_STRINGBUF_SHARED (null_stringbuf);
} }
/* Create a scheme string with space for LEN 8-bit Latin-1-encoded /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
@ -359,77 +333,110 @@ get_str_buf_start (SCM *str, SCM *buf, size_t *start)
*buf = STRING_STRINGBUF (*str); *buf = STRING_STRINGBUF (*str);
} }
static SCM
substring_with_immutable_stringbuf (SCM str, size_t start, size_t end,
int force_copy_p, int read_only_p)
{
SCM buf;
size_t str_start, len;
scm_t_bits tag = read_only_p ? RO_STRING_TAG : STRING_TAG;
get_str_buf_start (&str, &buf, &str_start);
len = end - start;
start += str_start;
if (len == 0)
return scm_i_make_string (0, NULL, read_only_p);
else if (!force_copy_p && SCM_LIKELY (!STRINGBUF_MUTABLE (buf)))
return scm_double_cell (tag, SCM_UNPACK (buf), start, len);
else
{
SCM new_buf, new_str;
if (STRINGBUF_WIDE (buf))
{
new_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + start), len);
new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len);
scm_i_try_narrow_string (new_str);
}
else
{
new_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + start, len);
new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len);
}
return new_str;
}
}
SCM SCM
scm_i_substring (SCM str, size_t start, size_t end) scm_i_substring (SCM str, size_t start, size_t end)
{ {
if (start == end) return substring_with_immutable_stringbuf (str, start, end, 0, 0);
return scm_i_make_string (0, NULL, 0);
else
{
SCM buf;
size_t str_start;
get_str_buf_start (&str, &buf, &str_start);
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
(scm_t_bits)str_start + start,
(scm_t_bits) end - start);
}
} }
SCM SCM
scm_i_substring_read_only (SCM str, size_t start, size_t end) scm_i_substring_read_only (SCM str, size_t start, size_t end)
{ {
if (start == end) return substring_with_immutable_stringbuf (str, start, end, 0, 1);
return scm_i_make_string (0, NULL, 1);
else
{
SCM buf;
size_t str_start;
get_str_buf_start (&str, &buf, &str_start);
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
(scm_t_bits)str_start + start,
(scm_t_bits) end - start);
}
} }
SCM SCM
scm_i_substring_copy (SCM str, size_t start, size_t end) scm_i_substring_copy (SCM str, size_t start, size_t end)
{ {
if (start == end) return substring_with_immutable_stringbuf (str, start, end, 1, 0);
return scm_i_make_string (0, NULL, 0); }
else
static void
scm_i_string_ensure_mutable_x (SCM str)
{
SCM buf;
if (IS_SH_STRING (str))
{ {
size_t len = end - start; /* Shared-mutation strings always have mutable stringbufs. */
SCM buf, my_buf, substr; buf = STRING_STRINGBUF (SH_STRING_STRING (str));
size_t str_start; if (!STRINGBUF_MUTABLE (buf))
int wide = 0; abort ();
get_str_buf_start (&str, &buf, &str_start); return;
if (scm_i_is_narrow_string (str))
{
my_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (my_buf),
STRINGBUF_CHARS (buf) + str_start + start, len);
}
else
{
my_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
+ start), len);
wide = 1;
}
scm_remember_upto_here_1 (buf);
substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
(scm_t_bits) 0, (scm_t_bits) len);
if (wide)
scm_i_try_narrow_string (substr);
return substr;
} }
if (IS_RO_STRING (str))
scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (str));
buf = STRING_STRINGBUF (str);
if (STRINGBUF_MUTABLE (buf))
return;
/* Otherwise copy and mark the fresh stringbuf as mutable. Note that
we copy the whole stringbuf so that the start/len offsets from the
original string keep working, so that concurrent accessors on this
string don't see things in an inconsistent state. */
{
SCM new_buf;
size_t len = STRINGBUF_LENGTH (buf);
if (STRINGBUF_WIDE (buf))
{
new_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
(scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
}
else
{
new_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (new_buf), STRINGBUF_CHARS (buf), len);
}
SCM_SET_CELL_WORD_0 (new_buf,
SCM_CELL_WORD_0 (new_buf) | STRINGBUF_F_MUTABLE);
SET_STRING_STRINGBUF (str, new_buf);
}
} }
SCM SCM
@ -439,6 +446,8 @@ scm_i_substring_shared (SCM str, size_t start, size_t end)
return str; return str;
else if (start == end) else if (start == end)
return scm_i_make_string (0, NULL, 0); return scm_i_make_string (0, NULL, 0);
else if (IS_RO_STRING (str))
return scm_i_substring_read_only (str, start, end);
else else
{ {
size_t len = end - start; size_t len = end - start;
@ -447,6 +456,9 @@ scm_i_substring_shared (SCM str, size_t start, size_t end)
start += STRING_START (str); start += STRING_START (str);
str = SH_STRING_STRING (str); str = SH_STRING_STRING (str);
} }
scm_i_string_ensure_mutable_x (str);
return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str), return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
(scm_t_bits)start, (scm_t_bits) len); (scm_t_bits)start, (scm_t_bits) len);
} }
@ -568,60 +580,13 @@ scm_i_string_wide_chars (SCM str)
scm_list_1 (str)); scm_list_1 (str));
} }
/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to /* If the buffer in ORIG_STR is immutable, copy ORIG_STR's characters to
a new string buffer, so that it can be modified without modifying a new string buffer, so that it can be modified without modifying
other strings. Also, lock the string mutex. Later, one must call other strings. */
scm_i_string_stop_writing to unlock the mutex. */
SCM SCM
scm_i_string_start_writing (SCM orig_str) scm_i_string_start_writing (SCM orig_str)
{ {
SCM buf, str = orig_str; scm_i_string_ensure_mutable_x (orig_str);
size_t start;
get_str_buf_start (&str, &buf, &start);
if (IS_RO_STRING (str))
scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
if (STRINGBUF_SHARED (buf))
{
/* Clone the stringbuf. */
size_t len = STRING_LENGTH (str);
SCM new_buf;
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
if (scm_i_is_narrow_string (str))
{
new_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + STRING_START (str), len);
}
else
{
new_buf = make_wide_stringbuf (len);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
(scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
+ STRING_START (str)), len);
}
SET_STRING_STRINGBUF (str, new_buf);
start -= STRING_START (str);
/* FIXME: The following operations are not atomic, so other threads
looking at STR may see an inconsistent state. Nevertheless it can't
hurt much since (i) accessing STR while it is being mutated can't
yield a crash, and (ii) concurrent accesses to STR should be
protected by a mutex at the application level. The latter may not
apply when STR != ORIG_STR, though. */
SET_STRING_START (str, 0);
SET_STRING_STRINGBUF (str, new_buf);
buf = new_buf;
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
}
return orig_str; return orig_str;
} }
@ -661,7 +626,6 @@ scm_i_string_writable_wide_chars (SCM str)
void void
scm_i_string_stop_writing (void) scm_i_string_stop_writing (void)
{ {
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
} }
/* Return the Xth character of STR as a UCS-4 codepoint. */ /* Return the Xth character of STR as a UCS-4 codepoint. */
@ -768,42 +732,10 @@ scm_i_make_symbol (SCM name, scm_t_bits flags,
unsigned long hash, SCM props) unsigned long hash, SCM props)
{ {
SCM buf; SCM buf;
size_t start = STRING_START (name);
size_t length = STRING_LENGTH (name); size_t length = STRING_LENGTH (name);
if (IS_SH_STRING (name)) name = scm_i_substring_copy (name, 0, length);
{
name = SH_STRING_STRING (name);
start += STRING_START (name);
}
buf = STRING_STRINGBUF (name); buf = STRING_STRINGBUF (name);
if (start == 0 && length == STRINGBUF_LENGTH (buf))
{
/* reuse buf. */
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
}
else
{
/* make new buf. */
if (scm_i_is_narrow_string (name))
{
SCM new_buf = make_stringbuf (length);
memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + start, length);
buf = new_buf;
}
else
{
SCM new_buf = make_wide_stringbuf (length);
u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
(scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
length);
buf = new_buf;
}
}
return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf), return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
(scm_t_bits) hash, SCM_UNPACK (props)); (scm_t_bits) hash, SCM_UNPACK (props));
} }
@ -882,9 +814,6 @@ SCM
scm_i_symbol_substring (SCM sym, size_t start, size_t end) scm_i_symbol_substring (SCM sym, size_t start, size_t end)
{ {
SCM buf = SYMBOL_STRINGBUF (sym); SCM buf = SYMBOL_STRINGBUF (sym);
scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf), return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
(scm_t_bits)start, (scm_t_bits) end - start); (scm_t_bits)start, (scm_t_bits) end - start);
} }
@ -921,8 +850,8 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
"A new string containing this string's stringbuf's characters\n" "A new string containing this string's stringbuf's characters\n"
"@item stringbuf-length\n" "@item stringbuf-length\n"
"The number of characters in this stringbuf\n" "The number of characters in this stringbuf\n"
"@item stringbuf-shared\n" "@item stringbuf-mutable\n"
"@code{#t} if this stringbuf is shared\n" "@code{#t} if this stringbuf is mutable\n"
"@item stringbuf-wide\n" "@item stringbuf-wide\n"
"@code{#t} if this stringbuf's characters are stored in a\n" "@code{#t} if this stringbuf's characters are stored in a\n"
"32-bit buffer, or @code{#f} if they are stored in an 8-bit\n" "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
@ -984,11 +913,11 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
} }
e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
scm_from_size_t (STRINGBUF_LENGTH (buf))); scm_from_size_t (STRINGBUF_LENGTH (buf)));
if (STRINGBUF_SHARED (buf)) if (STRINGBUF_MUTABLE (buf))
e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
SCM_BOOL_T); SCM_BOOL_T);
else else
e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
SCM_BOOL_F); SCM_BOOL_F);
if (STRINGBUF_WIDE (buf)) if (STRINGBUF_WIDE (buf))
e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
@ -1015,8 +944,8 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
"A new string containing this symbols's stringbuf's characters\n" "A new string containing this symbols's stringbuf's characters\n"
"@item stringbuf-length\n" "@item stringbuf-length\n"
"The number of characters in this stringbuf\n" "The number of characters in this stringbuf\n"
"@item stringbuf-shared\n" "@item stringbuf-mutable\n"
"@code{#t} if this stringbuf is shared\n" "@code{#t} if this stringbuf is mutable\n"
"@item stringbuf-wide\n" "@item stringbuf-wide\n"
"@code{#t} if this stringbuf's characters are stored in a\n" "@code{#t} if this stringbuf's characters are stored in a\n"
"32-bit buffer, or @code{#f} if they are stored in an 8-bit\n" "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
@ -1057,11 +986,11 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
} }
e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
scm_from_size_t (STRINGBUF_LENGTH (buf))); scm_from_size_t (STRINGBUF_LENGTH (buf)));
if (STRINGBUF_SHARED (buf)) if (STRINGBUF_MUTABLE (buf))
e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
SCM_BOOL_T); SCM_BOOL_T);
else else
e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"),
SCM_BOOL_F); SCM_BOOL_F);
if (STRINGBUF_WIDE (buf)) if (STRINGBUF_WIDE (buf))
e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),

View file

@ -182,8 +182,8 @@ SCM_API SCM scm_makfromstrs (int argc, char **argv);
#define scm_tc7_ro_string (scm_tc7_string + 0x200) #define scm_tc7_ro_string (scm_tc7_string + 0x200)
/* Flags for shared and wide strings. */ /* Flags for shared and wide strings. */
#define SCM_I_STRINGBUF_F_SHARED 0x100
#define SCM_I_STRINGBUF_F_WIDE 0x400 #define SCM_I_STRINGBUF_F_WIDE 0x400
#define SCM_I_STRINGBUF_F_MUTABLE 0x800
SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port, SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port,
scm_print_state *pstate); scm_print_state *pstate);

View file

@ -1385,13 +1385,10 @@ should be .data or .rodata), and return the resulting linker object.
(modulo (- alignment (modulo address alignment)) alignment))) (modulo (- alignment (modulo address alignment)) alignment)))
(define tc7-vector 13) (define tc7-vector 13)
(define stringbuf-shared-flag #x100)
(define stringbuf-wide-flag #x400) (define stringbuf-wide-flag #x400)
(define tc7-stringbuf 39) (define tc7-stringbuf 39)
(define tc7-narrow-stringbuf (define tc7-narrow-stringbuf tc7-stringbuf)
(+ tc7-stringbuf stringbuf-shared-flag)) (define tc7-wide-stringbuf (+ tc7-stringbuf stringbuf-wide-flag))
(define tc7-wide-stringbuf
(+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
(define tc7-ro-string (+ 21 #x200)) (define tc7-ro-string (+ 21 #x200))
(define tc7-program 69) (define tc7-program 69)
(define tc7-bytevector 77) (define tc7-bytevector 77)

View file

@ -111,27 +111,45 @@
(not (eq? (assq-ref (%string-dump s2) 'shared) (not (eq? (assq-ref (%string-dump s2) 'shared)
s1)))) s1))))
(pass-if "ASCII substrings share stringbufs before copy-on-write" (pass-if "ASCII substrings immutable before copy-on-write"
(let* ((s1 "foobar") (let* ((s1 "foobar")
(s2 (substring s1 0 3))) (s2 (substring s1 0 3)))
(assq-ref (%string-dump s1) 'stringbuf-shared))) (and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
(not (assq-ref (%string-dump s2) 'stringbuf-mutable)))))
(pass-if "BMP substrings share stringbufs before copy-on-write" (pass-if "BMP substrings immutable before copy-on-write"
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105") (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
(s2 (substring s1 0 3))) (s2 (substring s1 0 3)))
(assq-ref (%string-dump s1) 'stringbuf-shared))) (and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
(not (assq-ref (%string-dump s2) 'stringbuf-mutable)))))
(pass-if "ASCII substrings don't share stringbufs after copy-on-write" (pass-if "ASCII base string still immutable after copy-on-write"
(let* ((s1 "foobar") (let* ((s1 "foobar")
(s2 (substring s1 0 3))) (s2 (substring s1 0 3)))
(string-set! s2 0 #\F) (string-set! s2 0 #\F)
(not (assq-ref (%string-dump s2) 'stringbuf-shared)))) (and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
(pass-if "BMP substrings don't share stringbufs after copy-on-write" (pass-if "BMP base string still immutable after copy-on-write"
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105") (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
(s2 (substring s1 0 3))) (s2 (substring s1 0 3)))
(string-set! s2 0 #\F) (string-set! s2 0 #\F)
(not (assq-ref (%string-dump s2) 'stringbuf-shared)))) (and (not (assq-ref (%string-dump s1) 'stringbuf-mutable))
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
(pass-if "ASCII substrings mutable after shared mutation"
(let* ((s1 "foobar")
(s2 (substring/shared s1 0 3)))
(string-set! s2 0 #\F)
(and (assq-ref (%string-dump s1) 'stringbuf-mutable)
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
(pass-if "BMP substrings mutable after shared mutation"
(let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
(s2 (substring/shared s1 0 3)))
(string-set! s2 0 #\F)
(and (assq-ref (%string-dump s1) 'stringbuf-mutable)
(assq-ref (%string-dump s2) 'stringbuf-mutable))))
(with-test-prefix "encodings" (with-test-prefix "encodings"