From b25a743cf98975d733b0dbb14067102aee35d4bc Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 23 Jun 2025 14:55:39 +0200 Subject: [PATCH] Get strings, symbols, stringbufs off scm_double_cell * libguile/symbols.h (scm_is_symbol, scm_to_symbol, scm_from_symbol): Define some helpers and a "struct scm_symbol". * libguile/strings-internal.h (scm_i_string_data): Remove. * libguile/print.c (write_char_in_string, write_narrow_string) (write_wide_string): Refactor to avoid per-char narrow checks. (write_character): Move up. (iprin1): Adapt to call write_narrow_string / write_wide_string. * libguile/srfi-13.c (scm_string_eq): Avoid scm_i_string_data. * libguile/strings.c (scm_is_stringbuf, scm_to_stringbuf) (scm_from_stringbuf, stringbuf_is_wide, stringbuf_is_narrow) (stringbuf_is_mutable, stringbuf_set_mutable, stringbuf_length) (as_narrow_stringbuf, as_wide_stringbuf, narrow_stringbuf_chars) (wide_stringbuf_chars, scm_to_string, scm_from_string, string_is_read_only) (string_is_shared, string_stringbuf, string_aliased_string, string_start) (string_length): New inline function helpers, to replace a pile of macros. Adapt all users. --- libguile/print.c | 193 ++++----- libguile/srfi-13.c | 15 +- libguile/strings-internal.h | 1 - libguile/strings.c | 820 ++++++++++++++++++------------------ libguile/symbols.h | 28 +- 5 files changed, 548 insertions(+), 509 deletions(-) diff --git a/libguile/print.c b/libguile/print.c index 926556d34..3dc63a176 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -77,13 +77,6 @@ #include "print.h" - - -/* Character printers. */ - -static void write_string (const void *, int, size_t, SCM); -static void write_character (scm_t_wchar, SCM); - /* {Names of immediate symbols} @@ -487,6 +480,98 @@ print_r7rs_extended_symbol (SCM sym, SCM port) scm_putc ('|', port); } +static void +write_char_in_string (scm_t_wchar ch, SCM port) +{ + /* Write CH to PORT, escaping it if it's non-graphic or not + representable in PORT's encoding. If CH needs to be escaped, + it is escaped using the in-string escape syntax. */ + if (ch == '"') + scm_c_put_latin1_chars (port, (const uint8_t *) "\\\"", 2); + else if (ch == '\\') + scm_c_put_latin1_chars (port, (const uint8_t *) "\\\\", 2); + else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P) + scm_c_put_latin1_chars (port, (const uint8_t *) "\\n", 2); + else if (ch == ' ' || ch == '\n' + || (uc_is_general_category_withtable (ch, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_N | + UC_CATEGORY_MASK_P | + UC_CATEGORY_MASK_S) + && scm_c_can_put_char (port, ch))) + scm_c_put_char (port, ch); + else + scm_c_put_escaped_char (port, ch); +} + +static void +write_narrow_string (const char *str, size_t len, SCM port) +{ + scm_c_put_char (port, (uint8_t) '"'); + + for (size_t i = 0; i < len; ++i) + write_char_in_string ((unsigned char) str[i], port); + + scm_c_put_char (port, (uint8_t) '"'); +} + +static void +write_wide_string (const scm_t_wchar *str, size_t len, SCM port) +{ + scm_c_put_char (port, (uint8_t) '"'); + + for (size_t i = 0; i < len; ++i) + write_char_in_string (str[i], port); + + scm_c_put_char (port, (uint8_t) '"'); +} + +/* Write CH to PORT, escaping it if it's non-graphic or not + representable in PORT's encoding. The character escape syntax is + used. */ +static void +write_character (scm_t_wchar ch, SCM port) +{ + scm_puts ("#\\", port); + + /* Pretty-print a combining characters over dotted circles, if + possible, to make them more visible. */ + if (uc_combining_class (ch) != UC_CCC_NR + && scm_c_can_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE) + && scm_c_can_put_char (port, ch)) + { + scm_c_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE); + scm_c_put_char (port, ch); + } + else if (uc_is_general_category_withtable (ch, + UC_CATEGORY_MASK_L | + UC_CATEGORY_MASK_M | + UC_CATEGORY_MASK_N | + UC_CATEGORY_MASK_P | + UC_CATEGORY_MASK_S) + && scm_c_can_put_char (port, ch)) + /* CH is graphic and encodeable; display it. */ + scm_c_put_char (port, ch); + else + /* CH isn't graphic or cannot be represented in PORT's encoding. */ + { + /* Represent CH using the character escape syntax. */ + const char *name; + + name = scm_i_charname (SCM_MAKE_CHAR (ch)); + if (name != NULL) + scm_puts (name, port); + else if (!SCM_R6RS_ESCAPES_P) + scm_intprint (ch, 8, port); + else + { + scm_puts ("x", port); + scm_intprint (ch, 16, port); + } + } +} + /* FIXME: allow R6RS hex escapes instead of #{...}# or |...|. */ static void print_symbol (SCM sym, SCM port) @@ -629,9 +714,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) size_t len = scm_i_string_length (exp); if (pstate->writingp) - write_string (scm_i_string_data (exp), - scm_i_is_narrow_string (exp), - len, port); + { + if (scm_i_is_narrow_string (exp)) + write_narrow_string (scm_i_string_chars (exp), len, port); + else + write_wide_string (scm_i_string_wide_chars (exp), len, port); + } else scm_c_put_string (port, exp, 0, len); } @@ -822,91 +910,6 @@ scm_prin1 (SCM exp, SCM port, int writingp) scm_dynwind_end (); } -static void -write_string (const void *str, int narrow_p, size_t len, SCM port) -{ - size_t i; - - scm_c_put_char (port, (uint8_t) '"'); - - for (i = 0; i < len; ++i) - { - scm_t_wchar ch; - if (narrow_p) - ch = (scm_t_wchar) ((unsigned char *) (str))[i]; - else - ch = ((scm_t_wchar *) (str))[i]; - - /* Write CH to PORT, escaping it if it's non-graphic or not - representable in PORT's encoding. If CH needs to be escaped, - it is escaped using the in-string escape syntax. */ - if (ch == '"') - scm_c_put_latin1_chars (port, (const uint8_t *) "\\\"", 2); - else if (ch == '\\') - scm_c_put_latin1_chars (port, (const uint8_t *) "\\\\", 2); - else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P) - scm_c_put_latin1_chars (port, (const uint8_t *) "\\n", 2); - else if (ch == ' ' || ch == '\n' - || (uc_is_general_category_withtable (ch, - UC_CATEGORY_MASK_L | - UC_CATEGORY_MASK_M | - UC_CATEGORY_MASK_N | - UC_CATEGORY_MASK_P | - UC_CATEGORY_MASK_S) - && scm_c_can_put_char (port, ch))) - scm_c_put_char (port, ch); - else - scm_c_put_escaped_char (port, ch); - } - - scm_c_put_char (port, (uint8_t) '"'); -} - -/* Write CH to PORT, escaping it if it's non-graphic or not - representable in PORT's encoding. The character escape syntax is - used. */ -static void -write_character (scm_t_wchar ch, SCM port) -{ - scm_puts ("#\\", port); - - /* Pretty-print a combining characters over dotted circles, if - possible, to make them more visible. */ - if (uc_combining_class (ch) != UC_CCC_NR - && scm_c_can_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE) - && scm_c_can_put_char (port, ch)) - { - scm_c_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE); - scm_c_put_char (port, ch); - } - else if (uc_is_general_category_withtable (ch, - UC_CATEGORY_MASK_L | - UC_CATEGORY_MASK_M | - UC_CATEGORY_MASK_N | - UC_CATEGORY_MASK_P | - UC_CATEGORY_MASK_S) - && scm_c_can_put_char (port, ch)) - /* CH is graphic and encodeable; display it. */ - scm_c_put_char (port, ch); - else - /* CH isn't graphic or cannot be represented in PORT's encoding. */ - { - /* Represent CH using the character escape syntax. */ - const char *name; - - name = scm_i_charname (SCM_MAKE_CHAR (ch)); - if (name != NULL) - scm_puts (name, port); - else if (!SCM_R6RS_ESCAPES_P) - scm_intprint (ch, 8, port); - else - { - scm_puts ("x", port); - scm_intprint (ch, 16, port); - } - } -} - /* Print an integer. */ diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index f75f45f0c..4f0a48a31 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -1202,15 +1202,14 @@ SCM_DEFINE (scm_string_eq, "string=", 2, 4, 0, if (len1 != len2) return SCM_BOOL_F; + else if (scm_i_is_narrow_string (s1)) + return scm_from_bool (memcmp (scm_i_string_chars (s1), + scm_i_string_chars (s2), + len1) == 0); else - { - if (!scm_i_is_narrow_string (s1)) - len1 *= 4; - - return scm_from_bool (memcmp (scm_i_string_data (s1), - scm_i_string_data (s2), - len1) == 0); - } + return scm_from_bool (memcmp (scm_i_string_wide_chars (s1), + scm_i_string_wide_chars (s2), + len1 * 4) == 0); } return compare_strings (FUNC_NAME, 0, diff --git a/libguile/strings-internal.h b/libguile/strings-internal.h index 11c0c4887..e064c97ab 100644 --- a/libguile/strings-internal.h +++ b/libguile/strings-internal.h @@ -51,7 +51,6 @@ SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end); SCM_INTERNAL size_t scm_i_string_length (SCM str); SCM_INTERNAL int scm_i_string_is_mutable (SCM str); SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str); -SCM_INTERNAL const void *scm_i_string_data (SCM str); SCM_INTERNAL SCM scm_i_string_start_writing (SCM str); SCM_INTERNAL void scm_i_string_stop_writing (void); diff --git a/libguile/strings.c b/libguile/strings.c index b8196c378..c010bef78 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -82,48 +82,108 @@ SCM_SYMBOL (sym_error, "error"); is an O(n) operation as it has to create a new immutable stringbuf. There are also mutation-sharing substrings as well. */ -/* The size in words of the stringbuf header (type tag + size). */ -#define STRINGBUF_HEADER_SIZE 2U +static inline int +scm_is_stringbuf (SCM x) +{ + return SCM_HAS_TYP7 (x, scm_tc7_stringbuf); +} -#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM)) +static inline struct scm_stringbuf* +scm_to_stringbuf (SCM x) +{ + if (!scm_is_stringbuf (x)) + abort (); + return (struct scm_stringbuf *) SCM_UNPACK_POINTER (x); +} -#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE -#define STRINGBUF_F_MUTABLE SCM_I_STRINGBUF_F_MUTABLE +static inline SCM +scm_from_stringbuf (struct scm_stringbuf *x) +{ + return SCM_PACK_POINTER (x); +} -#define STRINGBUF_TAG scm_tc7_stringbuf -#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE) -#define STRINGBUF_MUTABLE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_MUTABLE) +static inline int +stringbuf_is_wide (struct scm_stringbuf *buf) +{ + return buf->tag_and_flags & SCM_I_STRINGBUF_F_WIDE; +} +static inline int +stringbuf_is_narrow (struct scm_stringbuf *buf) +{ + return !stringbuf_is_wide (buf); +} -#define STRINGBUF_SET_MUTABLE(buf) \ - SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_MUTABLE) +static inline int +stringbuf_is_mutable (struct scm_stringbuf *buf) +{ + return buf->tag_and_flags & SCM_I_STRINGBUF_F_MUTABLE; +} -#define STRINGBUF_CONTENTS(buf) ((void *) \ - SCM_CELL_OBJECT_LOC (buf, \ - STRINGBUF_HEADER_SIZE)) -#define STRINGBUF_CHARS(buf) ((unsigned char *) STRINGBUF_CONTENTS (buf)) -#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CONTENTS (buf)) +static inline void +stringbuf_set_mutable (struct scm_stringbuf *buf) +{ + buf->tag_and_flags |= SCM_I_STRINGBUF_F_MUTABLE; +} -#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf)) +static inline size_t +stringbuf_length (struct scm_stringbuf *buf) +{ + return buf->length; +} + +static struct scm_narrow_stringbuf * +as_narrow_stringbuf (struct scm_stringbuf *buf) +{ + if (stringbuf_is_wide (buf)) + abort (); + return (struct scm_narrow_stringbuf *) buf; +} + +static struct scm_wide_stringbuf * +as_wide_stringbuf (struct scm_stringbuf *buf) +{ + if (!stringbuf_is_wide (buf)) + abort (); + return (struct scm_wide_stringbuf *) buf; +} + +static inline unsigned char* +narrow_stringbuf_chars (struct scm_narrow_stringbuf *buf) +{ + char *chars = buf->contents; + return (unsigned char *) chars; +} + +static inline scm_t_wchar* +wide_stringbuf_chars (struct scm_wide_stringbuf *buf) +{ + return buf->contents; +} #ifdef SCM_STRING_LENGTH_HISTOGRAM static size_t lenhist[1001]; #endif +// FIXME: Perhaps make a documented limit. +static const size_t max_stringbuf_payload_byte_size = + ((size_t) -1) - sizeof (struct scm_stringbuf) - 32; + + +SCM_IMMUTABLE_STRINGBUF (null_stringbuf, ""); + /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded characters. */ -static SCM -make_stringbuf (size_t len) +static struct scm_narrow_stringbuf * +make_narrow_stringbuf (size_t len) { /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and scm_i_symbol_chars, all stringbufs are null-terminated. Once - SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code - has been changed for scm_i_symbol_chars, this null-termination - can be dropped. + SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code has + been changed for scm_i_symbol_chars, this null-termination can be + dropped. */ - SCM buf; - #ifdef SCM_STRING_LENGTH_HISTOGRAM if (len < 1000) lenhist[len]++; @@ -131,32 +191,26 @@ make_stringbuf (size_t len) lenhist[1000]++; #endif - /* Make sure that the total allocation size will not overflow size_t, - with ~30 extra bytes to spare to avoid an overflow within the - allocator. */ - if (INT_ADD_OVERFLOW (len, STRINGBUF_HEADER_BYTES + 32)) - scm_num_overflow ("make_stringbuf"); + if (len >= max_stringbuf_payload_byte_size) + scm_out_of_range ("make_stringbuf", scm_from_size_t (len)); - buf = SCM_PACK_POINTER - (scm_allocate_pointerless (SCM_I_CURRENT_THREAD, - STRINGBUF_HEADER_BYTES + len + 1)); + if (len == 0) + /* Remove const attr. */ + return (struct scm_narrow_stringbuf *) &null_stringbuf; - SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG); - SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len); - - STRINGBUF_CHARS (buf)[len] = 0; + struct scm_narrow_stringbuf *buf = + scm_allocate_pointerless (SCM_I_CURRENT_THREAD, sizeof (*buf) + len + 1); + buf->header.tag_and_flags = scm_tc7_stringbuf; + buf->header.length = len; return buf; } /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded characters. */ -static SCM +static struct scm_wide_stringbuf * make_wide_stringbuf (size_t len) { - SCM buf; - size_t raw_len; - #ifdef SCM_STRING_LENGTH_HISTOGRAM if (len < 1000) lenhist[len]++; @@ -164,123 +218,137 @@ make_wide_stringbuf (size_t len) lenhist[1000]++; #endif - /* Make sure that the total allocation size will not overflow size_t, - with ~30 extra bytes to spare to avoid an overflow within the - allocator. */ - if (len > (((size_t) -(STRINGBUF_HEADER_BYTES + 32 + sizeof (scm_t_wchar))) - / sizeof (scm_t_wchar))) - scm_num_overflow ("make_wide_stringbuf"); + if (len >= max_stringbuf_payload_byte_size / sizeof (scm_t_wchar)) + scm_out_of_range ("make_stringbuf", scm_from_size_t (len)); - raw_len = (len + 1) * sizeof (scm_t_wchar); - buf = SCM_PACK_POINTER - (scm_allocate_pointerless (SCM_I_CURRENT_THREAD, - STRINGBUF_HEADER_BYTES + raw_len)); - - SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE); - SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len); - - STRINGBUF_WIDE_CHARS (buf)[len] = 0; + struct scm_wide_stringbuf *buf = + scm_allocate_pointerless (SCM_I_CURRENT_THREAD, + sizeof (*buf) + (len + 1) * sizeof (scm_t_wchar)); + buf->header.tag_and_flags = scm_tc7_stringbuf | SCM_I_STRINGBUF_F_WIDE; + buf->header.length = len; return buf; } /* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded) characters from BUF. */ -static SCM -wide_stringbuf (SCM buf) +static struct scm_wide_stringbuf * +widen_stringbuf (struct scm_narrow_stringbuf *buf) { - SCM new_buf; + size_t len = stringbuf_length (&buf->header); + struct scm_wide_stringbuf *wide = make_wide_stringbuf (len); + unsigned char *src = narrow_stringbuf_chars (buf); + scm_t_wchar *dst = wide_stringbuf_chars (wide); - if (STRINGBUF_WIDE (buf)) - new_buf = buf; - else - { - size_t i, len; - scm_t_wchar *mem; + for (size_t i = 0; i < len; i++) + dst[i] = src[i]; - len = STRINGBUF_LENGTH (buf); - - new_buf = make_wide_stringbuf (len); - - mem = STRINGBUF_WIDE_CHARS (new_buf); - for (i = 0; i < len; i++) - mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i]; - mem[len] = 0; - } - - return new_buf; + return wide; } /* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded) characters from BUF, if possible. */ -static SCM -narrow_stringbuf (SCM buf) +static struct scm_narrow_stringbuf * +try_narrow_stringbuf (struct scm_wide_stringbuf *buf, size_t start, size_t len) { - SCM new_buf; + if (buf->header.length < start || (buf->header.length - start) > len) + abort (); - if (!STRINGBUF_WIDE (buf)) - new_buf = buf; - else - { - size_t i, len; - scm_t_wchar *wmem; - unsigned char *mem; + scm_t_wchar *src = wide_stringbuf_chars (buf); - len = STRINGBUF_LENGTH (buf); - wmem = STRINGBUF_WIDE_CHARS (buf); + for (size_t i = 0; i < len; i++) + if (src[i + start] > 0xFF) + /* BUF cannot be narrowed. */ + return NULL; - for (i = 0; i < len; i++) - if (wmem[i] > 0xFF) - /* BUF cannot be narrowed. */ - return buf; + struct scm_narrow_stringbuf *narrow = make_narrow_stringbuf (len); + unsigned char *dst = narrow_stringbuf_chars (narrow); - new_buf = make_stringbuf (len); + for (size_t i = 0; i < len; i++) + dst[i] = src[i + start]; - mem = STRINGBUF_CHARS (new_buf); - for (i = 0; i < len; i++) - mem[i] = (unsigned char) wmem[i]; - mem[len] = 0; - } - - return new_buf; + return narrow; } -/* Copy-on-write strings. - */ -#define STRING_TAG scm_tc7_string +static inline struct scm_string* +scm_to_string (SCM x) +{ + if (!scm_is_string (x)) + abort (); + return (struct scm_string *) SCM_UNPACK_POINTER (x); +} -#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str)) -#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str)) -#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str)) +static inline SCM +scm_from_string (struct scm_string *x) +{ + return SCM_PACK_POINTER (x); +} -#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf)) -#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start)) +static inline int +string_is_read_only (struct scm_string *str) +{ + return str->tag_and_flags == scm_tc7_ro_string; +} -#define IS_STRING(str) (SCM_HAS_TYP7 (str, STRING_TAG)) +static const scm_t_bits mutation_sharing_string_tag = scm_tc7_string + 0x100; -/* Read-only strings. - */ +static inline int +string_is_shared (struct scm_string *str) +{ + return str->tag_and_flags == mutation_sharing_string_tag; +} -#define RO_STRING_TAG scm_tc7_ro_string -#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG) +static inline struct scm_stringbuf * +string_stringbuf (struct scm_string *str) +{ + if (string_is_shared (str)) + abort (); + return str->stringbuf; +} -/* Mutation-sharing substrings - */ +static inline struct scm_string * +string_aliased_string (struct scm_string *str) +{ + if (!string_is_shared (str)) + abort (); + return str->string; +} -#define SH_STRING_TAG (scm_tc7_string + 0x100) +static inline size_t +string_start (struct scm_string *str) +{ + return str->start; +} -#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh)) -/* START and LENGTH as for STRINGs. */ +static inline size_t +string_length (struct scm_string *str) +{ + return str->length; +} -#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG) +static inline struct scm_string * +make_string (struct scm_stringbuf *buf, int read_only_p, + size_t start, size_t length) +{ + if (start > buf->length || (buf->length - start) < length) + abort (); + struct scm_string *str = scm_allocate_tagged (SCM_I_CURRENT_THREAD, + sizeof (*str)); + str->tag_and_flags = read_only_p ? scm_tc7_ro_string : scm_tc7_string; + str->stringbuf = buf; + str->start = start; + str->length = length; + return str; +} void scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate) { - SCM str = scm_double_cell (STRING_TAG, SCM_UNPACK(exp), 0, - STRINGBUF_LENGTH (exp)); + struct scm_stringbuf *buf = scm_to_stringbuf (exp); + SCM str = scm_from_string (make_string (buf, 1, 0, stringbuf_length (buf))); + scm_puts ("#", port); @@ -288,14 +356,6 @@ scm_i_print_stringbuf (SCM exp, SCM port, scm_print_state *pstate) SCM scm_nullstr; -static SCM null_stringbuf; - -static void -init_null_stringbuf (void) -{ - null_stringbuf = make_stringbuf (0); -} - /* Create a scheme string with space for LEN 8-bit Latin-1-encoded characters. CHARSP, if not NULL, will be set to location of the char array. If READ_ONLY_P, the returned string is read-only; @@ -303,24 +363,11 @@ init_null_stringbuf (void) SCM scm_i_make_string (size_t len, char **charsp, int read_only_p) { - SCM buf; - SCM res; - - if (len == 0) - { - static scm_i_pthread_once_t once = SCM_I_PTHREAD_ONCE_INIT; - scm_i_pthread_once (&once, init_null_stringbuf); - buf = null_stringbuf; - } - else - buf = make_stringbuf (len); - + struct scm_narrow_stringbuf *buf = make_narrow_stringbuf (len); if (charsp) - *charsp = (char *) STRINGBUF_CHARS (buf); - res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG, - SCM_UNPACK (buf), - (scm_t_bits) 0, (scm_t_bits) len); - return res; + *charsp = buf->contents; + + return scm_from_string (make_string (&buf->header, read_only_p, 0, len)); } /* Create a scheme string with space for LEN 32-bit UCS-4-encoded @@ -330,46 +377,72 @@ scm_i_make_string (size_t len, char **charsp, int read_only_p) SCM scm_i_make_wide_string (size_t len, scm_t_wchar **charsp, int read_only_p) { - SCM buf = make_wide_stringbuf (len); - SCM res; + struct scm_wide_stringbuf *buf = make_wide_stringbuf (len); if (charsp) - *charsp = STRINGBUF_WIDE_CHARS (buf); - res = scm_double_cell (read_only_p ? RO_STRING_TAG : STRING_TAG, - SCM_UNPACK (buf), - (scm_t_bits) 0, (scm_t_bits) len); - return res; + *charsp = buf->contents; + + return scm_from_string (make_string (&buf->header, read_only_p, 0, len)); } static void validate_substring_args (SCM str, size_t start, size_t end) { - if (!IS_STRING (str)) + if (!scm_is_string (str)) scm_wrong_type_arg_msg (NULL, 0, str, "string"); - if (start > STRING_LENGTH (str)) + struct scm_string *s = scm_to_string (str); + if (start > string_length (s)) scm_out_of_range (NULL, scm_from_size_t (start)); - if (end > STRING_LENGTH (str) || end < start) + if (end > string_length (s) || end < start) scm_out_of_range (NULL, scm_from_size_t (end)); } static inline void -get_str_buf_start (SCM *str, SCM *buf, size_t *start) +get_str_buf_start (struct scm_string **str, struct scm_stringbuf **buf, + size_t *start) { - *start = STRING_START (*str); - if (IS_SH_STRING (*str)) + *start = string_start (*str); + if (string_is_shared (*str)) { - *str = SH_STRING_STRING (*str); - *start += STRING_START (*str); + *str = string_aliased_string (*str); + *start += string_start (*str); + } + *buf = string_stringbuf (*str); +} + +static struct scm_stringbuf * +stringbuf_slice (struct scm_stringbuf *buf, size_t start, size_t len) +{ + if (stringbuf_is_wide (buf)) + { + struct scm_wide_stringbuf *old_buf = as_wide_stringbuf (buf); + struct scm_narrow_stringbuf *narrow = + try_narrow_stringbuf (old_buf, start, len); + if (narrow) + return &narrow->header; + + struct scm_wide_stringbuf *new_buf = make_wide_stringbuf (len); + u32_cpy ((uint32_t *) wide_stringbuf_chars (new_buf), + (uint32_t *) (wide_stringbuf_chars (old_buf) + start), len); + + return &new_buf->header; + } + else + { + struct scm_narrow_stringbuf *old_buf = as_narrow_stringbuf (buf); + struct scm_narrow_stringbuf *new_buf = make_narrow_stringbuf (len); + memcpy (narrow_stringbuf_chars (new_buf), + narrow_stringbuf_chars (old_buf) + start, len); + return &new_buf->header; } - *buf = STRING_STRINGBUF (*str); } static SCM -substring_with_immutable_stringbuf (SCM str, size_t start, size_t end, +substring_with_immutable_stringbuf (struct scm_string *str, + size_t start, size_t end, int force_copy_p, int read_only_p) { - SCM buf; + struct scm_stringbuf *buf; size_t str_start, len; - scm_t_bits tag = read_only_p ? RO_STRING_TAG : STRING_TAG; get_str_buf_start (&str, &buf, &str_start); len = end - start; @@ -377,70 +450,53 @@ substring_with_immutable_stringbuf (SCM str, size_t start, size_t end, if (len == 0) return scm_i_make_string (0, NULL, read_only_p); - else if (!force_copy_p && SCM_LIKELY (!STRINGBUF_MUTABLE (buf))) - return scm_double_cell (tag, SCM_UNPACK (buf), start, len); + else if (!force_copy_p && !stringbuf_is_mutable (buf)) + return scm_from_string (make_string (buf, read_only_p, start, len)); else - { - SCM new_buf, new_str; - - if (STRINGBUF_WIDE (buf)) - { - new_buf = make_wide_stringbuf (len); - u32_cpy ((uint32_t *) STRINGBUF_WIDE_CHARS (new_buf), - (uint32_t *) (STRINGBUF_WIDE_CHARS (buf) + start), len); - new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len); - scm_i_try_narrow_string (new_str); - } - else - { - new_buf = make_stringbuf (len); - memcpy (STRINGBUF_CHARS (new_buf), - STRINGBUF_CHARS (buf) + start, len); - new_str = scm_double_cell (tag, SCM_UNPACK (new_buf), 0, len); - } - - return new_str; - } + return scm_from_string + (make_string (stringbuf_slice (buf, start, len), read_only_p, 0, len)); } SCM scm_i_substring (SCM str, size_t start, size_t end) { - return substring_with_immutable_stringbuf (str, start, end, 0, 0); + return substring_with_immutable_stringbuf (scm_to_string (str), start, end, + 0, 0); } SCM scm_i_substring_read_only (SCM str, size_t start, size_t end) { - return substring_with_immutable_stringbuf (str, start, end, 0, 1); + return substring_with_immutable_stringbuf (scm_to_string (str), start, end, + 0, 1); } SCM scm_i_substring_copy (SCM str, size_t start, size_t end) { - return substring_with_immutable_stringbuf (str, start, end, 1, 0); + return substring_with_immutable_stringbuf (scm_to_string (str), start, end, + 1, 0); } static void -scm_i_string_ensure_mutable_x (SCM str) +scm_i_string_ensure_mutable_x (struct scm_string *str) { - SCM buf; - - if (IS_SH_STRING (str)) + if (string_is_shared (str)) { /* Shared-mutation strings always have mutable stringbufs. */ - buf = STRING_STRINGBUF (SH_STRING_STRING (str)); - if (!STRINGBUF_MUTABLE (buf)) + if (!stringbuf_is_mutable + (string_stringbuf (string_aliased_string (str)))) abort (); return; } - if (IS_RO_STRING (str)) - scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (str)); + if (string_is_read_only (str)) + scm_misc_error (NULL, "string is read-only: ~s", + scm_list_1 (scm_from_string (str))); - buf = STRING_STRINGBUF (str); + struct scm_stringbuf *buf = string_stringbuf (str); - if (STRINGBUF_MUTABLE (buf)) + if (stringbuf_is_mutable (buf)) return; /* Otherwise copy and mark the fresh stringbuf as mutable. Note that @@ -448,49 +504,57 @@ scm_i_string_ensure_mutable_x (SCM str) original string keep working, so that concurrent accessors on this string don't see things in an inconsistent state. */ { - SCM new_buf; - size_t len = STRINGBUF_LENGTH (buf); + size_t len = stringbuf_length (buf); - if (STRINGBUF_WIDE (buf)) + if (stringbuf_is_wide (buf)) { - new_buf = make_wide_stringbuf (len); - u32_cpy ((uint32_t *) STRINGBUF_WIDE_CHARS (new_buf), - (uint32_t *) STRINGBUF_WIDE_CHARS (buf), len); + struct scm_wide_stringbuf *new_buf = make_wide_stringbuf (len); + u32_cpy ((uint32_t *) wide_stringbuf_chars (new_buf), + (uint32_t *) wide_stringbuf_chars (as_wide_stringbuf (buf)), + len); + buf = &new_buf->header; } else { - new_buf = make_stringbuf (len); - memcpy (STRINGBUF_CHARS (new_buf), STRINGBUF_CHARS (buf), len); + struct scm_narrow_stringbuf *new_buf = make_narrow_stringbuf (len); + memcpy (narrow_stringbuf_chars (new_buf), + narrow_stringbuf_chars (as_narrow_stringbuf (buf)), + len); + buf = &new_buf->header; } - STRINGBUF_SET_MUTABLE (new_buf); - SET_STRING_STRINGBUF (str, new_buf); + stringbuf_set_mutable (buf); + str->stringbuf = buf; } } SCM scm_i_substring_shared (SCM str, size_t start, size_t end) { - if (start == 0 && end == STRING_LENGTH (str)) + struct scm_string *s = scm_to_string (str); + if (start == 0 && end == string_length (s)) return str; else if (start == end) return scm_i_make_string (0, NULL, 0); - else if (IS_RO_STRING (str)) + else if (string_is_read_only (s)) return scm_i_substring_read_only (str, start, end); - else + + size_t len = end - start; + if (string_is_shared (s)) { - size_t len = end - start; - if (IS_SH_STRING (str)) - { - start += STRING_START (str); - str = SH_STRING_STRING (str); - } - - scm_i_string_ensure_mutable_x (str); - - return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str), - (scm_t_bits)start, (scm_t_bits) len); + start += string_start (s); + s = string_aliased_string (s); } + + scm_i_string_ensure_mutable_x (s); + + struct scm_string *ret = scm_allocate_tagged (SCM_I_CURRENT_THREAD, + sizeof (struct scm_string)); + ret->tag_and_flags = mutation_sharing_string_tag; + ret->string = s; + ret->start = start; + ret->length = len; + return scm_from_string (ret); } SCM @@ -530,13 +594,13 @@ scm_c_substring_shared (SCM str, size_t start, size_t end) size_t scm_i_string_length (SCM str) { - return STRING_LENGTH (str); + return string_length (scm_to_string (str)); } int scm_i_string_is_mutable (SCM str) { - return !IS_RO_STRING (str); + return !string_is_read_only (scm_to_string (str)); } /* True if the string is 'narrow', meaning it has a 8-bit Latin-1 @@ -545,10 +609,11 @@ scm_i_string_is_mutable (SCM str) int scm_i_is_narrow_string (SCM str) { - if (IS_SH_STRING (str)) - str = SH_STRING_STRING (str); + struct scm_string *s = scm_to_string (str); + if (string_is_shared (s)) + s = string_aliased_string (s); - return !STRINGBUF_WIDE (STRING_STRINGBUF (str)); + return stringbuf_is_narrow (string_stringbuf (s)); } /* Try to coerce a string to be narrow. It if is narrow already, do @@ -558,29 +623,23 @@ scm_i_is_narrow_string (SCM str) int scm_i_try_narrow_string (SCM str) { - if (IS_SH_STRING (str)) - str = SH_STRING_STRING (str); + struct scm_string *s = scm_to_string (str); + if (string_is_shared (s)) + s = string_aliased_string (s); - SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str))); + struct scm_stringbuf *buf = string_stringbuf (s); + if (stringbuf_is_narrow (buf)) + return 1; - return scm_i_is_narrow_string (str); -} + struct scm_narrow_stringbuf *narrowed = + try_narrow_stringbuf (as_wide_stringbuf (buf), 0, buf->length); + if (narrowed) + { + s->stringbuf = &narrowed->header; + return 1; + } -/* Return a pointer to the raw data of the string, which can be either Latin-1 - or UCS-4 encoded data, depending on `scm_i_is_narrow_string (STR)'. */ -const void * -scm_i_string_data (SCM str) -{ - SCM buf; - size_t start; - const char *data; - - get_str_buf_start (&str, &buf, &start); - - data = STRINGBUF_CONTENTS (buf); - data += start * (scm_i_is_narrow_string (str) ? 1 : 4); - - return data; + return 0; } /* Returns a pointer to the 8-bit Latin-1 encoded character array of @@ -588,11 +647,16 @@ scm_i_string_data (SCM str) const char * scm_i_string_chars (SCM str) { - SCM buf; + struct scm_string *s = scm_to_string (str); + struct scm_stringbuf *buf; size_t start; - get_str_buf_start (&str, &buf, &start); - if (scm_i_is_narrow_string (str)) - return (const char *) STRINGBUF_CHARS (buf) + start; + get_str_buf_start (&s, &buf, &start); + if (stringbuf_is_narrow (buf)) + { + unsigned char *chars = + narrow_stringbuf_chars (as_narrow_stringbuf (buf)) + start; + return (const char *) chars; + } else scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s", scm_list_1 (str)); @@ -604,12 +668,12 @@ scm_i_string_chars (SCM str) const scm_t_wchar * scm_i_string_wide_chars (SCM str) { - SCM buf; + struct scm_string *s = scm_to_string (str); + struct scm_stringbuf *buf; size_t start; - - get_str_buf_start (&str, &buf, &start); - if (!scm_i_is_narrow_string (str)) - return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start; + get_str_buf_start (&s, &buf, &start); + if (stringbuf_is_wide (buf)) + return wide_stringbuf_chars (as_wide_stringbuf (buf)) + start; else scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s", scm_list_1 (str)); @@ -621,7 +685,7 @@ scm_i_string_wide_chars (SCM str) SCM scm_i_string_start_writing (SCM orig_str) { - scm_i_string_ensure_mutable_x (orig_str); + scm_i_string_ensure_mutable_x (scm_to_string (orig_str)); return orig_str; } @@ -629,12 +693,16 @@ scm_i_string_start_writing (SCM orig_str) char * scm_i_string_writable_chars (SCM str) { - SCM buf; + struct scm_string *s = scm_to_string (str); + struct scm_stringbuf *buf; size_t start; - - get_str_buf_start (&str, &buf, &start); - if (scm_i_is_narrow_string (str)) - return (char *) STRINGBUF_CHARS (buf) + start; + get_str_buf_start (&s, &buf, &start); + if (stringbuf_is_narrow (buf)) + { + unsigned char *chars = + narrow_stringbuf_chars (as_narrow_stringbuf (buf)) + start; + return (char *) chars; + } else scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s", scm_list_1 (str)); @@ -645,12 +713,12 @@ scm_i_string_writable_chars (SCM str) static scm_t_wchar * scm_i_string_writable_wide_chars (SCM str) { - SCM buf; + struct scm_string *s = scm_to_string (str); + struct scm_stringbuf *buf; size_t start; - - get_str_buf_start (&str, &buf, &start); - if (!scm_i_is_narrow_string (str)) - return STRINGBUF_WIDE_CHARS (buf) + start; + get_str_buf_start (&s, &buf, &start); + if (stringbuf_is_wide (buf)) + return wide_stringbuf_chars (as_wide_stringbuf (buf)) + start; else scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s", scm_list_1 (str)); @@ -731,24 +799,28 @@ scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr) void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr) { - if (IS_SH_STRING (str)) + struct scm_string *s = scm_to_string (str); + if (string_length (s) <= p) + abort (); + struct scm_stringbuf *buf; + size_t start; + get_str_buf_start (&s, &buf, &start); + size_t idx = p + start; + + if (stringbuf_is_wide (buf)) { - p += STRING_START (str); - str = SH_STRING_STRING (str); + wide_stringbuf_chars (as_wide_stringbuf (buf))[idx] = chr; } - - if (chr > 0xFF && scm_i_is_narrow_string (str)) - SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str))); - - if (scm_i_is_narrow_string (str)) + else if (chr > 0xFF) { - char *dst = scm_i_string_writable_chars (str); - dst[p] = chr; + struct scm_wide_stringbuf *wide = + widen_stringbuf (as_narrow_stringbuf (buf)); + s->stringbuf = &wide->header; + wide_stringbuf_chars (wide)[idx] = chr; } else { - scm_t_wchar *dst = scm_i_string_writable_wide_chars (str); - dst[p] = chr; + narrow_stringbuf_chars (as_narrow_stringbuf (buf))[idx] = chr; } } @@ -760,28 +832,26 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr) internals of strings and string-like objects confined to this file. */ -#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1 - SCM scm_i_make_symbol (SCM name, scm_t_bits flags, unsigned long hash) { - SCM buf, symbol; - size_t start, length = STRING_LENGTH (name); + struct scm_string *s = scm_to_string (name); + struct scm_stringbuf *buf; + size_t start, length = string_length (s); - get_str_buf_start (&name, &buf, &start); - if (SCM_UNLIKELY (STRINGBUF_MUTABLE (buf) - || start != 0 - || STRINGBUF_LENGTH (buf) != length)) - { - name = scm_i_substring_copy (name, 0, length); - buf = STRING_STRINGBUF (name); - } + get_str_buf_start (&s, &buf, &start); + if (stringbuf_is_mutable (buf) + || start != 0 + || stringbuf_length (buf) != length) + buf = stringbuf_slice (buf, start, length); - symbol = scm_words (scm_tc7_symbol | flags, 3); - SCM_SET_CELL_WORD_1 (symbol, SCM_UNPACK (buf)); - SCM_SET_CELL_WORD_2 (symbol, hash); + struct scm_symbol *symbol = scm_allocate_tagged (SCM_I_CURRENT_THREAD, + sizeof (*symbol)); + symbol->tag_and_flags = scm_tc7_symbol | flags; + symbol->name = buf; + symbol->hash = hash; - return symbol; + return scm_from_symbol (symbol); } /* Returns the number of characters in SYM. This may be different @@ -789,7 +859,7 @@ scm_i_make_symbol (SCM name, scm_t_bits flags, unsigned long hash) size_t scm_i_symbol_length (SCM sym) { - return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); + return stringbuf_length (scm_to_symbol (sym)->name); } size_t @@ -798,7 +868,7 @@ scm_c_symbol_length (SCM sym) { SCM_VALIDATE_SYMBOL (1, sym); - return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); + return scm_i_symbol_length (sym); } #undef FUNC_NAME @@ -807,10 +877,7 @@ scm_c_symbol_length (SCM sym) int scm_i_is_narrow_symbol (SCM sym) { - SCM buf; - - buf = SYMBOL_STRINGBUF (sym); - return !STRINGBUF_WIDE (buf); + return stringbuf_is_narrow (scm_to_symbol (sym)->name); } /* Returns a pointer to the 8-bit Latin-1 encoded character array that @@ -818,11 +885,12 @@ scm_i_is_narrow_symbol (SCM sym) const char * scm_i_symbol_chars (SCM sym) { - SCM buf; - - buf = SYMBOL_STRINGBUF (sym); - if (!STRINGBUF_WIDE (buf)) - return (const char *) STRINGBUF_CHARS (buf); + struct scm_stringbuf *buf = scm_to_symbol (sym)->name; + if (stringbuf_is_narrow (buf)) + { + unsigned char *chars = narrow_stringbuf_chars (as_narrow_stringbuf (buf)); + return (const char *) chars; + } else scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S", scm_list_1 (sym)); @@ -833,11 +901,9 @@ scm_i_symbol_chars (SCM sym) const scm_t_wchar * scm_i_symbol_wide_chars (SCM sym) { - SCM buf; - - buf = SYMBOL_STRINGBUF (sym); - if (STRINGBUF_WIDE (buf)) - return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf); + struct scm_stringbuf *buf = scm_to_symbol (sym)->name; + if (stringbuf_is_wide (buf)) + return wide_stringbuf_chars (as_wide_stringbuf (buf)); else scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S", scm_list_1 (sym)); @@ -846,9 +912,8 @@ scm_i_symbol_wide_chars (SCM sym) SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end) { - SCM buf = SYMBOL_STRINGBUF (sym); - return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf), - (scm_t_bits)start, (scm_t_bits) end - start); + struct scm_stringbuf *buf = scm_to_symbol (sym)->name; + return scm_from_string (make_string (buf, 1, 0, stringbuf_length (buf))); } /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */ @@ -893,71 +958,45 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str), #define FUNC_NAME s_scm_sys_string_dump { SCM e1, e2, e3, e4, e5, e6, e7, e8, e9; - SCM buf; + struct scm_stringbuf *buf; SCM_VALIDATE_STRING (1, str); + struct scm_string *s = scm_to_string (str); /* String info */ e1 = scm_cons (scm_from_latin1_symbol ("string"), str); e2 = scm_cons (scm_from_latin1_symbol ("start"), - scm_from_size_t (STRING_START (str))); + scm_from_size_t (string_start (s))); e3 = scm_cons (scm_from_latin1_symbol ("length"), - scm_from_size_t (STRING_LENGTH (str))); + scm_from_size_t (string_length (s))); - if (IS_SH_STRING (str)) + if (string_is_shared (s)) { e4 = scm_cons (scm_from_latin1_symbol ("shared"), - SH_STRING_STRING (str)); - buf = STRING_STRINGBUF (SH_STRING_STRING (str)); + scm_from_string (string_aliased_string (s))); + buf = string_stringbuf (string_aliased_string (s)); } else { e4 = scm_cons (scm_from_latin1_symbol ("shared"), SCM_BOOL_F); - buf = STRING_STRINGBUF (str); + buf = string_stringbuf (s); } - if (IS_RO_STRING (str)) - e5 = scm_cons (scm_from_latin1_symbol ("read-only"), - SCM_BOOL_T); - else - e5 = scm_cons (scm_from_latin1_symbol ("read-only"), - SCM_BOOL_F); + e5 = scm_cons (scm_from_latin1_symbol ("read-only"), + scm_from_bool (string_is_read_only (s))); /* Stringbuf info */ - if (!STRINGBUF_WIDE (buf)) - { - size_t len = STRINGBUF_LENGTH (buf); - char *cbuf; - SCM sbc = scm_i_make_string (len, &cbuf, 0); - memcpy (cbuf, STRINGBUF_CHARS (buf), len); - e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"), - sbc); - } - else - { - size_t len = STRINGBUF_LENGTH (buf); - scm_t_wchar *cbuf; - SCM sbc = scm_i_make_wide_string (len, &cbuf, 0); - u32_cpy ((uint32_t *) cbuf, - (uint32_t *) STRINGBUF_WIDE_CHARS (buf), len); - e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"), - sbc); - } + e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"), + scm_from_string + (make_string (stringbuf_slice (buf, 0, stringbuf_length (buf)), + 1, 0, stringbuf_length (buf)))); e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), - scm_from_size_t (STRINGBUF_LENGTH (buf))); - if (STRINGBUF_MUTABLE (buf)) - e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"), - SCM_BOOL_T); - else - e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"), - SCM_BOOL_F); - if (STRINGBUF_WIDE (buf)) - e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), - SCM_BOOL_T); - else - e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), - SCM_BOOL_F); + scm_from_size_t (stringbuf_length (buf))); + e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"), + scm_from_bool (stringbuf_is_mutable (buf))); + e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), + scm_from_bool (stringbuf_is_wide (buf))); return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED); } @@ -986,8 +1025,8 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym), "@end table") #define FUNC_NAME s_scm_sys_symbol_dump { - SCM e1, e2, e3, e4, e5, e6, e7; - SCM buf; + SCM e1, e2, e3, e4, e5; + struct scm_stringbuf *buf; SCM_VALIDATE_SYMBOL (1, sym); e1 = scm_cons (scm_from_latin1_symbol ("symbol"), sym); @@ -995,44 +1034,16 @@ SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym), scm_from_ulong (scm_i_symbol_hash (sym))); e3 = scm_cons (scm_from_latin1_symbol ("interned"), scm_symbol_interned_p (sym)); - buf = SYMBOL_STRINGBUF (sym); /* Stringbuf info */ - if (!STRINGBUF_WIDE (buf)) - { - size_t len = STRINGBUF_LENGTH (buf); - char *cbuf; - SCM sbc = scm_i_make_string (len, &cbuf, 0); - memcpy (cbuf, STRINGBUF_CHARS (buf), len); - e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"), - sbc); - } - else - { - size_t len = STRINGBUF_LENGTH (buf); - scm_t_wchar *cbuf; - SCM sbc = scm_i_make_wide_string (len, &cbuf, 0); - u32_cpy ((uint32_t *) cbuf, - (uint32_t *) STRINGBUF_WIDE_CHARS (buf), len); - e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"), - sbc); - } - e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), - scm_from_size_t (STRINGBUF_LENGTH (buf))); - if (STRINGBUF_MUTABLE (buf)) - e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"), - SCM_BOOL_T); - else - e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-mutable"), - SCM_BOOL_F); - if (STRINGBUF_WIDE (buf)) - e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), - SCM_BOOL_T); - else - e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), - SCM_BOOL_F); - return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED); - + buf = scm_to_symbol (sym)->name; + size_t len = stringbuf_length (buf); + e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"), + scm_from_string + (make_string (stringbuf_slice (buf, 0, len), 1, 0, len))); + e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"), + scm_from_bool (stringbuf_is_wide (buf))); + return scm_list_n (e1, e2, e3, e4, e5, SCM_UNDEFINED); } #undef FUNC_NAME @@ -1059,7 +1070,7 @@ SCM_DEFINE (scm_string_p, "string?", 1, 0, 0, "Return @code{#t} if @var{obj} is a string, else @code{#f}.") #define FUNC_NAME s_scm_string_p { - return scm_from_bool (IS_STRING (obj)); + return scm_from_bool (scm_is_string (obj)); } #undef FUNC_NAME @@ -1156,7 +1167,8 @@ SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0, /* Given that make-string is mostly used by Scheme to prepare a mutable string buffer, let's go ahead and mark this as mutable to avoid a copy when this buffer is next written to. */ - STRINGBUF_SET_MUTABLE (STRING_STRINGBUF (ret)); + if (!scm_is_eq (k, SCM_INUM0)) + stringbuf_set_mutable (string_stringbuf (scm_to_string (ret))); return ret; } #undef FUNC_NAME @@ -1189,7 +1201,7 @@ SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0, #define FUNC_NAME s_scm_string_length { SCM_VALIDATE_STRING (1, string); - return scm_from_size_t (STRING_LENGTH (string)); + return scm_from_size_t (string_length (scm_to_string (string))); } #undef FUNC_NAME @@ -1210,9 +1222,9 @@ SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0, size_t scm_c_string_length (SCM string) { - if (!IS_STRING (string)) + if (!scm_is_string (string)) scm_wrong_type_arg_msg (NULL, 0, string, "string"); - return STRING_LENGTH (string); + return string_length (scm_to_string (string)); } SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0, @@ -1938,7 +1950,7 @@ scm_to_latin1_stringn (SCM str, size_t *lenp) if (lenp) *lenp = len; - result = scm_strndup (scm_i_string_data (str), len); + result = scm_strndup (scm_i_string_chars (str), len); } else result = scm_to_stringn (str, lenp, NULL, diff --git a/libguile/symbols.h b/libguile/symbols.h index 0c4ae546f..005cf967d 100644 --- a/libguile/symbols.h +++ b/libguile/symbols.h @@ -30,7 +30,33 @@ -#define scm_is_symbol(x) (SCM_HAS_TYP7 (x, scm_tc7_symbol)) +struct scm_symbol +{ + scm_t_bits tag_and_flags; + struct scm_stringbuf *name; + scm_t_bits hash; +}; + +static inline int +scm_is_symbol (SCM x) +{ + return SCM_HAS_TYP7 (x, scm_tc7_symbol); +} + +static inline struct scm_symbol* +scm_to_symbol (SCM x) +{ + if (!scm_is_symbol (x)) + abort (); + return (struct scm_symbol *) SCM_UNPACK_POINTER (x); +} + +static inline SCM +scm_from_symbol (struct scm_symbol *x) +{ + return SCM_PACK_POINTER (x); +} + #define scm_i_symbol_hash(x) ((unsigned long) SCM_CELL_WORD_2 (x)) #define scm_i_symbol_is_interned(x) \ (!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED))