mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-11 06:20:23 +02:00
symbols with odd characters print better in #{}#
* libguile/print.c (symbol_has_extended_read_syntax): Use a more general, unicode-appropriate algorithm. Hopefully doesn't cause any current #{}# cases to be unescaped. (print_extended_symbol): Use more appropriate unicode algorithm, and emit unicode hex escapes instead of our own lame escapes. * test-suite/tests/symbols.test: Add tests.
This commit is contained in:
parent
d9527cfafd
commit
2e9fc9fc73
2 changed files with 41 additions and 42 deletions
|
@ -320,6 +320,18 @@ quote_keywordish_symbols (void)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INITIAL_IDENTIFIER_MASK \
|
||||||
|
(UC_CATEGORY_MASK_Lu | UC_CATEGORY_MASK_Ll | UC_CATEGORY_MASK_Lt \
|
||||||
|
| UC_CATEGORY_MASK_Lm | UC_CATEGORY_MASK_Lo | UC_CATEGORY_MASK_Mn \
|
||||||
|
| UC_CATEGORY_MASK_Nl | UC_CATEGORY_MASK_No | UC_CATEGORY_MASK_Pd \
|
||||||
|
| UC_CATEGORY_MASK_Pc | UC_CATEGORY_MASK_Po | UC_CATEGORY_MASK_Sc \
|
||||||
|
| UC_CATEGORY_MASK_Sm | UC_CATEGORY_MASK_Sk | UC_CATEGORY_MASK_So \
|
||||||
|
| UC_CATEGORY_MASK_Co)
|
||||||
|
|
||||||
|
#define SUBSEQUENT_IDENTIFIER_MASK \
|
||||||
|
(INITIAL_IDENTIFIER_MASK \
|
||||||
|
| UC_CATEGORY_MASK_Nd | UC_CATEGORY_MASK_Mc | UC_CATEGORY_MASK_Me)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
symbol_has_extended_read_syntax (SCM sym)
|
symbol_has_extended_read_syntax (SCM sym)
|
||||||
{
|
{
|
||||||
|
@ -337,7 +349,7 @@ symbol_has_extended_read_syntax (SCM sym)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* Other initial-character constraints. */
|
/* Other initial-character constraints. */
|
||||||
if (c == '\'' || c == '`' || c == ',')
|
if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#')
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* Keywords can be identified by trailing colons too. */
|
/* Keywords can be identified by trailing colons too. */
|
||||||
|
@ -348,28 +360,20 @@ symbol_has_extended_read_syntax (SCM sym)
|
||||||
if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
|
if (scm_is_true (scm_i_string_to_number (scm_symbol_to_string (sym), 10)))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
/* Otherwise assume everything is fine, unless one of these chars is
|
/* Other disallowed first characters. */
|
||||||
present. This is incorrect, but it's the way Guile has done it for
|
if (!uc_is_general_category_withtable (c, INITIAL_IDENTIFIER_MASK))
|
||||||
quite some time. */
|
return 1;
|
||||||
for (pos = 0; pos < len; pos++)
|
|
||||||
|
/* Otherwise, any character that's in the identifier category mask is
|
||||||
|
fine to pass through as-is, provided it's not one of the ASCII
|
||||||
|
delimiters like `;'. */
|
||||||
|
for (pos = 1; pos < len; pos++)
|
||||||
{
|
{
|
||||||
switch (scm_i_symbol_ref (sym, pos))
|
c = scm_i_symbol_ref (sym, pos);
|
||||||
{
|
if (!uc_is_general_category_withtable (c, SUBSEQUENT_IDENTIFIER_MASK))
|
||||||
#ifdef BRACKETS_AS_PARENS
|
return 1;
|
||||||
case '[':
|
else if (c == '"' || c == ';' || c == '#')
|
||||||
case ']':
|
return 1;
|
||||||
#endif
|
|
||||||
case '(':
|
|
||||||
case ')':
|
|
||||||
case '"':
|
|
||||||
case ';':
|
|
||||||
case '#':
|
|
||||||
case SCM_WHITE_SPACES:
|
|
||||||
case SCM_LINE_INCREMENTORS:
|
|
||||||
return 1;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -381,9 +385,6 @@ print_normal_symbol (SCM sym, SCM port)
|
||||||
scm_display (scm_symbol_to_string (sym), port);
|
scm_display (scm_symbol_to_string (sym), port);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This is not the right logic, because it doesn't do anything special
|
|
||||||
for }# within a symbol, and there is no read logic to handle
|
|
||||||
escapes. We'll fix that in a future patch. */
|
|
||||||
static void
|
static void
|
||||||
print_extended_symbol (SCM sym, SCM port)
|
print_extended_symbol (SCM sym, SCM port)
|
||||||
{
|
{
|
||||||
|
@ -399,27 +400,20 @@ print_extended_symbol (SCM sym, SCM port)
|
||||||
{
|
{
|
||||||
scm_t_wchar c = scm_i_symbol_ref (sym, pos);
|
scm_t_wchar c = scm_i_symbol_ref (sym, pos);
|
||||||
|
|
||||||
switch (c)
|
if (uc_is_general_category_withtable (c,
|
||||||
|
SUBSEQUENT_IDENTIFIER_MASK
|
||||||
|
| UC_CATEGORY_MASK_Zs))
|
||||||
{
|
{
|
||||||
#ifdef BRACKETS_AS_PARENS
|
|
||||||
case '[':
|
|
||||||
case ']':
|
|
||||||
#endif
|
|
||||||
case '(':
|
|
||||||
case ')':
|
|
||||||
case '"':
|
|
||||||
case ';':
|
|
||||||
case '#':
|
|
||||||
case SCM_WHITE_SPACES:
|
|
||||||
case SCM_LINE_INCREMENTORS:
|
|
||||||
display_character ('\\', port, iconveh_question_mark);
|
|
||||||
/* fall through */
|
|
||||||
default:
|
|
||||||
if (!display_character (c, port, strategy))
|
if (!display_character (c, port, strategy))
|
||||||
scm_encoding_error ("print_extended_symbol", errno,
|
scm_encoding_error ("print_extended_symbol", errno,
|
||||||
"cannot convert to output locale",
|
"cannot convert to output locale",
|
||||||
port, SCM_MAKE_CHAR (c));
|
port, SCM_MAKE_CHAR (c));
|
||||||
break;
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
display_string ("\\x", 1, 2, port, iconveh_question_mark);
|
||||||
|
scm_intprint (c, 16, port);
|
||||||
|
display_character (';', port, iconveh_question_mark);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
;;;; symbols.test --- test suite for Guile's symbols -*- scheme -*-
|
;;;; symbols.test --- test suite for Guile's symbols -*- scheme -*-
|
||||||
;;;;
|
;;;;
|
||||||
;;;; Copyright (C) 2001, 2006, 2008, 2009 Free Software Foundation, Inc.
|
;;;; Copyright (C) 2001, 2006, 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||||
;;;;
|
;;;;
|
||||||
;;;; This library is free software; you can redistribute it and/or
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
;;;; modify it under the terms of the GNU Lesser General Public
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
@ -151,3 +151,8 @@
|
||||||
(pass-if "accepts embedded NULs"
|
(pass-if "accepts embedded NULs"
|
||||||
(> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6)))
|
(> (string-length (symbol->string (gensym "foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0foo\0bar\0braz\0"))) 6)))
|
||||||
|
|
||||||
|
(with-test-prefix "extended read syntax"
|
||||||
|
(pass-if (equal? "#{}#" (object->string (string->symbol ""))))
|
||||||
|
(pass-if (equal? "a" (object->string (string->symbol "a"))))
|
||||||
|
(pass-if (equal? "#{a b}#" (object->string (string->symbol "a b"))))
|
||||||
|
(pass-if (equal? "#{\\x7d;}#" (object->string (string->symbol "}")))))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue