1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-01 15:20:34 +02:00

Get strings, symbols, stringbufs off scm_double_cell

* libguile/symbols.h (scm_is_symbol, scm_to_symbol, scm_from_symbol):
Define some helpers and a "struct scm_symbol".
* libguile/strings-internal.h (scm_i_string_data): Remove.
* libguile/print.c (write_char_in_string, write_narrow_string)
(write_wide_string): Refactor to avoid per-char narrow checks.
(write_character): Move up.
(iprin1): Adapt to call write_narrow_string / write_wide_string.
* libguile/srfi-13.c (scm_string_eq): Avoid scm_i_string_data.
* libguile/strings.c (scm_is_stringbuf, scm_to_stringbuf)
(scm_from_stringbuf, stringbuf_is_wide, stringbuf_is_narrow)
(stringbuf_is_mutable, stringbuf_set_mutable, stringbuf_length)
(as_narrow_stringbuf, as_wide_stringbuf, narrow_stringbuf_chars)
(wide_stringbuf_chars, scm_to_string, scm_from_string, string_is_read_only)
(string_is_shared, string_stringbuf, string_aliased_string, string_start)
(string_length): New inline function helpers, to replace a pile of
macros.  Adapt all users.
This commit is contained in:
Andy Wingo 2025-06-23 14:55:39 +02:00
parent c8cd88b533
commit b25a743cf9
5 changed files with 548 additions and 509 deletions

View file

@ -77,13 +77,6 @@
#include "print.h"
/* Character printers. */
static void write_string (const void *, int, size_t, SCM);
static void write_character (scm_t_wchar, SCM);
/* {Names of immediate symbols}
@ -487,6 +480,98 @@ print_r7rs_extended_symbol (SCM sym, SCM port)
scm_putc ('|', port);
}
static void
write_char_in_string (scm_t_wchar ch, SCM port)
{
/* Write CH to PORT, escaping it if it's non-graphic or not
representable in PORT's encoding. If CH needs to be escaped,
it is escaped using the in-string escape syntax. */
if (ch == '"')
scm_c_put_latin1_chars (port, (const uint8_t *) "\\\"", 2);
else if (ch == '\\')
scm_c_put_latin1_chars (port, (const uint8_t *) "\\\\", 2);
else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P)
scm_c_put_latin1_chars (port, (const uint8_t *) "\\n", 2);
else if (ch == ' ' || ch == '\n'
|| (uc_is_general_category_withtable (ch,
UC_CATEGORY_MASK_L |
UC_CATEGORY_MASK_M |
UC_CATEGORY_MASK_N |
UC_CATEGORY_MASK_P |
UC_CATEGORY_MASK_S)
&& scm_c_can_put_char (port, ch)))
scm_c_put_char (port, ch);
else
scm_c_put_escaped_char (port, ch);
}
static void
write_narrow_string (const char *str, size_t len, SCM port)
{
scm_c_put_char (port, (uint8_t) '"');
for (size_t i = 0; i < len; ++i)
write_char_in_string ((unsigned char) str[i], port);
scm_c_put_char (port, (uint8_t) '"');
}
static void
write_wide_string (const scm_t_wchar *str, size_t len, SCM port)
{
scm_c_put_char (port, (uint8_t) '"');
for (size_t i = 0; i < len; ++i)
write_char_in_string (str[i], port);
scm_c_put_char (port, (uint8_t) '"');
}
/* Write CH to PORT, escaping it if it's non-graphic or not
representable in PORT's encoding. The character escape syntax is
used. */
static void
write_character (scm_t_wchar ch, SCM port)
{
scm_puts ("#\\", port);
/* Pretty-print a combining characters over dotted circles, if
possible, to make them more visible. */
if (uc_combining_class (ch) != UC_CCC_NR
&& scm_c_can_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE)
&& scm_c_can_put_char (port, ch))
{
scm_c_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE);
scm_c_put_char (port, ch);
}
else if (uc_is_general_category_withtable (ch,
UC_CATEGORY_MASK_L |
UC_CATEGORY_MASK_M |
UC_CATEGORY_MASK_N |
UC_CATEGORY_MASK_P |
UC_CATEGORY_MASK_S)
&& scm_c_can_put_char (port, ch))
/* CH is graphic and encodeable; display it. */
scm_c_put_char (port, ch);
else
/* CH isn't graphic or cannot be represented in PORT's encoding. */
{
/* Represent CH using the character escape syntax. */
const char *name;
name = scm_i_charname (SCM_MAKE_CHAR (ch));
if (name != NULL)
scm_puts (name, port);
else if (!SCM_R6RS_ESCAPES_P)
scm_intprint (ch, 8, port);
else
{
scm_puts ("x", port);
scm_intprint (ch, 16, port);
}
}
}
/* FIXME: allow R6RS hex escapes instead of #{...}# or |...|. */
static void
print_symbol (SCM sym, SCM port)
@ -629,9 +714,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
size_t len = scm_i_string_length (exp);
if (pstate->writingp)
write_string (scm_i_string_data (exp),
scm_i_is_narrow_string (exp),
len, port);
{
if (scm_i_is_narrow_string (exp))
write_narrow_string (scm_i_string_chars (exp), len, port);
else
write_wide_string (scm_i_string_wide_chars (exp), len, port);
}
else
scm_c_put_string (port, exp, 0, len);
}
@ -822,91 +910,6 @@ scm_prin1 (SCM exp, SCM port, int writingp)
scm_dynwind_end ();
}
static void
write_string (const void *str, int narrow_p, size_t len, SCM port)
{
size_t i;
scm_c_put_char (port, (uint8_t) '"');
for (i = 0; i < len; ++i)
{
scm_t_wchar ch;
if (narrow_p)
ch = (scm_t_wchar) ((unsigned char *) (str))[i];
else
ch = ((scm_t_wchar *) (str))[i];
/* Write CH to PORT, escaping it if it's non-graphic or not
representable in PORT's encoding. If CH needs to be escaped,
it is escaped using the in-string escape syntax. */
if (ch == '"')
scm_c_put_latin1_chars (port, (const uint8_t *) "\\\"", 2);
else if (ch == '\\')
scm_c_put_latin1_chars (port, (const uint8_t *) "\\\\", 2);
else if (ch == '\n' && SCM_PRINT_ESCAPE_NEWLINES_P)
scm_c_put_latin1_chars (port, (const uint8_t *) "\\n", 2);
else if (ch == ' ' || ch == '\n'
|| (uc_is_general_category_withtable (ch,
UC_CATEGORY_MASK_L |
UC_CATEGORY_MASK_M |
UC_CATEGORY_MASK_N |
UC_CATEGORY_MASK_P |
UC_CATEGORY_MASK_S)
&& scm_c_can_put_char (port, ch)))
scm_c_put_char (port, ch);
else
scm_c_put_escaped_char (port, ch);
}
scm_c_put_char (port, (uint8_t) '"');
}
/* Write CH to PORT, escaping it if it's non-graphic or not
representable in PORT's encoding. The character escape syntax is
used. */
static void
write_character (scm_t_wchar ch, SCM port)
{
scm_puts ("#\\", port);
/* Pretty-print a combining characters over dotted circles, if
possible, to make them more visible. */
if (uc_combining_class (ch) != UC_CCC_NR
&& scm_c_can_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE)
&& scm_c_can_put_char (port, ch))
{
scm_c_put_char (port, SCM_CODEPOINT_DOTTED_CIRCLE);
scm_c_put_char (port, ch);
}
else if (uc_is_general_category_withtable (ch,
UC_CATEGORY_MASK_L |
UC_CATEGORY_MASK_M |
UC_CATEGORY_MASK_N |
UC_CATEGORY_MASK_P |
UC_CATEGORY_MASK_S)
&& scm_c_can_put_char (port, ch))
/* CH is graphic and encodeable; display it. */
scm_c_put_char (port, ch);
else
/* CH isn't graphic or cannot be represented in PORT's encoding. */
{
/* Represent CH using the character escape syntax. */
const char *name;
name = scm_i_charname (SCM_MAKE_CHAR (ch));
if (name != NULL)
scm_puts (name, port);
else if (!SCM_R6RS_ESCAPES_P)
scm_intprint (ch, 8, port);
else
{
scm_puts ("x", port);
scm_intprint (ch, 16, port);
}
}
}
/* Print an integer.
*/

View file

@ -1202,15 +1202,14 @@ SCM_DEFINE (scm_string_eq, "string=", 2, 4, 0,
if (len1 != len2)
return SCM_BOOL_F;
else if (scm_i_is_narrow_string (s1))
return scm_from_bool (memcmp (scm_i_string_chars (s1),
scm_i_string_chars (s2),
len1) == 0);
else
{
if (!scm_i_is_narrow_string (s1))
len1 *= 4;
return scm_from_bool (memcmp (scm_i_string_data (s1),
scm_i_string_data (s2),
len1) == 0);
}
return scm_from_bool (memcmp (scm_i_string_wide_chars (s1),
scm_i_string_wide_chars (s2),
len1 * 4) == 0);
}
return compare_strings (FUNC_NAME, 0,

View file

@ -51,7 +51,6 @@ SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end);
SCM_INTERNAL size_t scm_i_string_length (SCM str);
SCM_INTERNAL int scm_i_string_is_mutable (SCM str);
SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str);
SCM_INTERNAL const void *scm_i_string_data (SCM str);
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void);

File diff suppressed because it is too large Load diff

View file

@ -30,7 +30,33 @@
#define scm_is_symbol(x) (SCM_HAS_TYP7 (x, scm_tc7_symbol))
struct scm_symbol
{
scm_t_bits tag_and_flags;
struct scm_stringbuf *name;
scm_t_bits hash;
};
static inline int
scm_is_symbol (SCM x)
{
return SCM_HAS_TYP7 (x, scm_tc7_symbol);
}
static inline struct scm_symbol*
scm_to_symbol (SCM x)
{
if (!scm_is_symbol (x))
abort ();
return (struct scm_symbol *) SCM_UNPACK_POINTER (x);
}
static inline SCM
scm_from_symbol (struct scm_symbol *x)
{
return SCM_PACK_POINTER (x);
}
#define scm_i_symbol_hash(x) ((unsigned long) SCM_CELL_WORD_2 (x))
#define scm_i_symbol_is_interned(x) \
(!(SCM_CELL_WORD_0 (x) & SCM_I_F_SYMBOL_UNINTERNED))