1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

refactor scm_i_print_symbol_name

* libguile/print.c (symbol_has_extended_read_syntax)
  (print_normal_symbol, print_extended_symbol, scm_i_print_symbol_name):
  Factor scm_i_print_symbol_name into separate routines.  Add comments.
  There are a number of bugs here.
This commit is contained in:
Andy Wingo 2011-04-11 11:52:35 +02:00
parent adf43b3f08
commit 15671c6e7f

View file

@ -309,15 +309,10 @@ print_circref (SCM port, scm_print_state *pstate, SCM ref)
/* Print the name of a symbol. */
static int
quote_keywordish_symbol (SCM symbol)
quote_keywordish_symbols (void)
{
SCM option;
SCM option = SCM_PRINT_KEYWORD_STYLE;
if (scm_i_symbol_ref (symbol, 0) != ':'
&& scm_i_symbol_ref (symbol, scm_i_symbol_length (symbol) - 1) != ':')
return 0;
option = SCM_PRINT_KEYWORD_STYLE;
if (scm_is_false (option))
return 0;
if (scm_is_eq (option, sym_reader))
@ -325,42 +320,40 @@ quote_keywordish_symbol (SCM symbol)
return 1;
}
void
scm_i_print_symbol_name (SCM str, SCM port)
static int
symbol_has_extended_read_syntax (SCM sym)
{
/* This points to the first character that has not yet been written to the
* port. */
size_t pos = 0;
/* This points to the character we're currently looking at. */
size_t end;
/* If the name contains weird characters, we'll escape them with
* backslashes and set this flag; it indicates that we should surround the
* name with "#{" and "}#". */
int weird = 0;
/* Backslashes are not sufficient to make a name weird, but if a name is
* weird because of other characters, backslahes need to be escaped too.
* The first time we see a backslash, we set maybe_weird, and mw_pos points
* to the backslash. Then if the name turns out to be weird, we re-process
* everything starting from mw_pos.
* We could instead make backslashes always weird. This is not necessary
* to ensure that the output is (read)-able, but it would make this code
* simpler and faster. */
int maybe_weird = 0;
size_t mw_pos = 0;
size_t len = scm_i_symbol_length (str);
scm_t_wchar str0 = scm_i_symbol_ref (str, 0);
size_t pos, len = scm_i_symbol_length (sym);
scm_t_wchar c;
if (len == 0 || str0 == '\'' || str0 == '`' || str0 == ','
|| quote_keywordish_symbol (str)
|| (str0 == '.' && len == 1)
|| scm_is_true (scm_i_string_to_number (scm_symbol_to_string (str), 10)))
/* The empty symbol. */
if (len == 0)
return 1;
c = scm_i_symbol_ref (sym, 0);
/* Single dot; conflicts with dotted-pair notation. */
if (len == 1 && c == '.')
return 1;
/* Other initial-character constraints. */
if (c == '\'' || c == '`' || c == ',')
return 1;
/* Keywords can be identified by trailing colons too. */
if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':')
return quote_keywordish_symbols ();
/* Number-ish symbols. */
if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
return 1;
/* Otherwise assume everything is fine, unless one of these chars is
present. This is incorrect, but it's the way Guile has done it for
quite some time. */
for (pos = 0; pos < len; pos++)
{
scm_lfwrite ("#{", 2, port);
weird = 1;
}
for (end = pos; end < len; ++end)
switch (scm_i_symbol_ref (str, end))
switch (scm_i_symbol_ref (sym, pos))
{
#ifdef BRACKETS_AS_PARENS
case '[':
@ -373,45 +366,76 @@ scm_i_print_symbol_name (SCM str, SCM port)
case '#':
case SCM_WHITE_SPACES:
case SCM_LINE_INCREMENTORS:
weird_handler:
if (maybe_weird)
{
end = mw_pos;
maybe_weird = 0;
}
if (!weird)
{
scm_lfwrite ("#{", 2, port);
weird = 1;
}
if (pos < end)
scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
{
char buf[2];
buf[0] = '\\';
buf[1] = (char) (unsigned char) scm_i_symbol_ref (str, end);
scm_lfwrite (buf, 2, port);
}
pos = end + 1;
break;
case '\\':
if (weird)
goto weird_handler;
if (!maybe_weird)
{
maybe_weird = 1;
mw_pos = pos;
}
break;
return 1;
default:
break;
}
if (pos < end)
scm_lfwrite_substr (scm_symbol_to_string (str), pos, end, port);
if (weird)
}
return 0;
}
static void
print_normal_symbol (SCM sym, SCM port)
{
scm_display (scm_symbol_to_string (sym), port);
}
/* This is not the right logic, because it doesn't do anything special
for }# within a symbol, and there is no read logic to handle
escapes. We'll fix that in a future patch. */
static void
print_extended_symbol (SCM sym, SCM port)
{
size_t pos, len;
scm_t_string_failed_conversion_handler strategy;
len = scm_i_symbol_length (sym);
strategy = scm_i_get_conversion_strategy (port);
scm_lfwrite ("#{", 2, port);
for (pos = 0; pos < len; pos++)
{
scm_t_wchar c = scm_i_symbol_ref (sym, pos);
switch (c)
{
#ifdef BRACKETS_AS_PARENS
case '[':
case ']':
#endif
case '(':
case ')':
case '"':
case ';':
case '#':
case SCM_WHITE_SPACES:
case SCM_LINE_INCREMENTORS:
display_character ('\\', port, iconveh_question_mark);
/* fall through */
default:
if (!display_character (c, port, strategy))
scm_encoding_error ("print_extended_symbol", errno,
"cannot convert to output locale",
port, SCM_MAKE_CHAR (c));
break;
}
}
scm_lfwrite ("}#", 2, port);
}
/* FIXME: allow R6RS hex escapes instead of #{...}#. */
void
scm_i_print_symbol_name (SCM sym, SCM port)
{
if (symbol_has_extended_read_syntax (sym))
print_extended_symbol (sym, port);
else
print_normal_symbol (sym, port);
}
void
scm_print_symbol_name (const char *str, size_t len, SCM port)
{