mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-18 17:50:29 +02:00
Reader option for R6RS hex escapes
This adds a reader option 'r6rs-hex-escapes that modifies the behavior of numeric escapes in characters and strings. When enabled, variable-length character hex escapes (#\xNNN) are allowed and become the default output format for numerically-escaped characters. Also, string hex escapes switch to a semicolon terminated hex escape (\xNNNN;). * libguile/print.c (PRINT_CHAR_ESCAPE): new macro (iprin1): use new macro PRINT_CHAR_ESCAPE * libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define * libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter (scm_read_string): parse R6RS hex string escapes (scm_read_character): parse R6RS hex character escapes * test-suite/tests/chars.test (with-read-options): new procedure (R6RS hex escapes): new tests * test-suite/tests/strings.test (with-read-options): new procedure (R6RS hex escapes): new tests
This commit is contained in:
parent
8470b3f45b
commit
dea901d66e
5 changed files with 274 additions and 68 deletions
106
libguile/print.c
106
libguile/print.c
|
@ -409,6 +409,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display);
|
|||
|
||||
static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
|
||||
|
||||
|
||||
/* Print a character as an octal or hex escape. */
|
||||
#define PRINT_CHAR_ESCAPE(i, port) \
|
||||
do \
|
||||
{ \
|
||||
if (!SCM_R6RS_ESCAPES_P) \
|
||||
scm_intprint (i, 8, port); \
|
||||
else \
|
||||
{ \
|
||||
scm_puts ("x", port); \
|
||||
scm_intprint (i, 16, port); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
void
|
||||
scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
||||
{
|
||||
|
@ -488,7 +504,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
PRINT_CHAR_ESCAPE (i, port);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -507,12 +523,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
PRINT_CHAR_ESCAPE (i, port);
|
||||
}
|
||||
}
|
||||
else
|
||||
/* Character is a non-graphical character. */
|
||||
scm_intprint (i, 8, port);
|
||||
PRINT_CHAR_ESCAPE (i, port);
|
||||
}
|
||||
else
|
||||
scm_i_charprint (i, port);
|
||||
|
@ -579,9 +595,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
case scm_tc7_string:
|
||||
if (SCM_WRITINGP (pstate))
|
||||
{
|
||||
size_t i, j, len;
|
||||
size_t i, len;
|
||||
static char const hex[] = "0123456789abcdef";
|
||||
char buf[8];
|
||||
char buf[9];
|
||||
|
||||
|
||||
scm_putc ('"', port);
|
||||
|
@ -647,37 +663,61 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
{
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding or is not graphic. */
|
||||
if (ch <= 0xFF)
|
||||
if (!SCM_R6RS_ESCAPES_P)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'x';
|
||||
buf[2] = hex[ch / 16];
|
||||
buf[3] = hex[ch % 16];
|
||||
scm_lfwrite (buf, 4, port);
|
||||
if (ch <= 0xFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'x';
|
||||
buf[2] = hex[ch / 16];
|
||||
buf[3] = hex[ch % 16];
|
||||
scm_lfwrite (buf, 4, port);
|
||||
}
|
||||
else if (ch <= 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'u';
|
||||
buf[2] = hex[(ch & 0xF000) >> 12];
|
||||
buf[3] = hex[(ch & 0xF00) >> 8];
|
||||
buf[4] = hex[(ch & 0xF0) >> 4];
|
||||
buf[5] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 6, port);
|
||||
}
|
||||
else if (ch > 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'U';
|
||||
buf[2] = hex[(ch & 0xF00000) >> 20];
|
||||
buf[3] = hex[(ch & 0xF0000) >> 16];
|
||||
buf[4] = hex[(ch & 0xF000) >> 12];
|
||||
buf[5] = hex[(ch & 0xF00) >> 8];
|
||||
buf[6] = hex[(ch & 0xF0) >> 4];
|
||||
buf[7] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 8, port);
|
||||
}
|
||||
}
|
||||
else if (ch <= 0xFFFF)
|
||||
else
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'u';
|
||||
buf[2] = hex[(ch & 0xF000) >> 12];
|
||||
buf[3] = hex[(ch & 0xF00) >> 8];
|
||||
buf[4] = hex[(ch & 0xF0) >> 4];
|
||||
buf[5] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 6, port);
|
||||
j = i + 1;
|
||||
}
|
||||
else if (ch > 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'U';
|
||||
buf[2] = hex[(ch & 0xF00000) >> 20];
|
||||
buf[3] = hex[(ch & 0xF0000) >> 16];
|
||||
buf[4] = hex[(ch & 0xF000) >> 12];
|
||||
buf[5] = hex[(ch & 0xF00) >> 8];
|
||||
buf[6] = hex[(ch & 0xF0) >> 4];
|
||||
buf[7] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 8, port);
|
||||
j = i + 1;
|
||||
scm_t_wchar ch2 = ch;
|
||||
|
||||
/* Print an R6RS variable-length hex escape: "\xNNNN;"
|
||||
*/
|
||||
int i = 8;
|
||||
buf[i] = ';';
|
||||
i --;
|
||||
if (ch == 0)
|
||||
buf[i--] = '0';
|
||||
else
|
||||
while (ch2 > 0)
|
||||
{
|
||||
buf[i] = hex[ch2 & 0xF];
|
||||
ch2 >>= 4;
|
||||
i --;
|
||||
}
|
||||
buf[i] = 'x';
|
||||
i --;
|
||||
buf[i] = '\\';
|
||||
scm_lfwrite (buf + i, 9 - i, port);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
|
|||
#if SCM_ENABLE_ELISP
|
||||
#define SCM_ELISP_VECTORS_P scm_read_opts[4].val
|
||||
#define SCM_ESCAPED_PARENS_P scm_read_opts[5].val
|
||||
#define SCM_N_READ_OPTIONS 6
|
||||
#endif
|
||||
#define SCM_R6RS_ESCAPES_P scm_read_opts[6].val
|
||||
|
||||
#if SCM_ENABLE_ELISP
|
||||
#define SCM_N_READ_OPTIONS 7
|
||||
#else
|
||||
#define SCM_N_READ_OPTIONS 4
|
||||
#define SCM_N_READ_OPTIONS 5
|
||||
#endif
|
||||
|
||||
#endif /* PRIVATE_OPTIONS */
|
||||
|
|
|
@ -76,6 +76,8 @@ scm_t_option scm_read_opts[] = {
|
|||
{ SCM_OPTION_BOOLEAN, "elisp-strings", 0,
|
||||
"Support `\\(' and `\\)' in strings."},
|
||||
#endif
|
||||
{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
|
||||
"Use R6RS variable-length character and string hex escapes."},
|
||||
{ 0, },
|
||||
};
|
||||
|
||||
|
@ -412,32 +414,37 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
|
|||
|
||||
|
||||
/* Read a hexadecimal number NDIGITS in length. Put its value into the variable
|
||||
C. */
|
||||
#define SCM_READ_HEX_ESCAPE(ndigits) \
|
||||
do \
|
||||
{ \
|
||||
scm_t_wchar a; \
|
||||
size_t i = 0; \
|
||||
c = 0; \
|
||||
while (i < ndigits) \
|
||||
{ \
|
||||
a = scm_getc (port); \
|
||||
if (a == EOF) \
|
||||
goto str_eof; \
|
||||
if ('0' <= a && a <= '9') \
|
||||
a -= '0'; \
|
||||
else if ('A' <= a && a <= 'F') \
|
||||
a = a - 'A' + 10; \
|
||||
else if ('a' <= a && a <= 'f') \
|
||||
a = a - 'a' + 10; \
|
||||
else \
|
||||
{ \
|
||||
c = a; \
|
||||
goto bad_escaped; \
|
||||
} \
|
||||
c = c * 16 + a; \
|
||||
i ++; \
|
||||
} \
|
||||
C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
|
||||
found. */
|
||||
#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
|
||||
do \
|
||||
{ \
|
||||
scm_t_wchar a; \
|
||||
size_t i = 0; \
|
||||
c = 0; \
|
||||
while (i < ndigits) \
|
||||
{ \
|
||||
a = scm_getc (port); \
|
||||
if (a == EOF) \
|
||||
goto str_eof; \
|
||||
if (terminator \
|
||||
&& (a == (scm_t_wchar) terminator) \
|
||||
&& (i > 0)) \
|
||||
break; \
|
||||
if ('0' <= a && a <= '9') \
|
||||
a -= '0'; \
|
||||
else if ('A' <= a && a <= 'F') \
|
||||
a = a - 'A' + 10; \
|
||||
else if ('a' <= a && a <= 'f') \
|
||||
a = a - 'a' + 10; \
|
||||
else \
|
||||
{ \
|
||||
c = a; \
|
||||
goto bad_escaped; \
|
||||
} \
|
||||
c = c * 16 + a; \
|
||||
i ++; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static SCM
|
||||
|
@ -511,13 +518,16 @@ scm_read_string (int chr, SCM port)
|
|||
c = '\010';
|
||||
break;
|
||||
case 'x':
|
||||
SCM_READ_HEX_ESCAPE (2);
|
||||
if (SCM_R6RS_ESCAPES_P)
|
||||
SCM_READ_HEX_ESCAPE (10, ';');
|
||||
else
|
||||
SCM_READ_HEX_ESCAPE (2, '\0');
|
||||
break;
|
||||
case 'u':
|
||||
SCM_READ_HEX_ESCAPE (4);
|
||||
SCM_READ_HEX_ESCAPE (4, '\0');
|
||||
break;
|
||||
case 'U':
|
||||
SCM_READ_HEX_ESCAPE (6);
|
||||
SCM_READ_HEX_ESCAPE (6, '\0');
|
||||
break;
|
||||
default:
|
||||
bad_escaped:
|
||||
|
@ -828,6 +838,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
|
|||
}
|
||||
}
|
||||
|
||||
if (cp == 'x' && (charname_len > 1) && SCM_R6RS_ESCAPES_P)
|
||||
{
|
||||
SCM p;
|
||||
scm_t_wchar chr;
|
||||
|
||||
/* Convert from hex, skipping the initial 'x' character in CHARNAME */
|
||||
p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
|
||||
scm_from_uint (16));
|
||||
if (SCM_I_INUMP (p))
|
||||
{
|
||||
scm_t_wchar c = SCM_I_INUM (p);
|
||||
if (SCM_IS_UNICODE_CHAR (c))
|
||||
return SCM_MAKE_CHAR (c);
|
||||
else
|
||||
scm_i_input_error (FUNC_NAME, port,
|
||||
"out-of-range hex character escape: ~a",
|
||||
scm_list_1 (charname));
|
||||
}
|
||||
}
|
||||
|
||||
/* The names of characters should never have non-Latin1
|
||||
characters. */
|
||||
if (scm_i_is_narrow_string (charname)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue