mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-10 14:00:21 +02:00
Reader option for R6RS hex escapes
This adds a reader option 'r6rs-hex-escapes that modifies the behavior of numeric escapes in characters and strings. When enabled, variable-length character hex escapes (#\xNNN) are allowed and become the default output format for numerically-escaped characters. Also, string hex escapes switch to a semicolon terminated hex escape (\xNNNN;). * libguile/print.c (PRINT_CHAR_ESCAPE): new macro (iprin1): use new macro PRINT_CHAR_ESCAPE * libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define * libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter (scm_read_string): parse R6RS hex string escapes (scm_read_character): parse R6RS hex character escapes * test-suite/tests/chars.test (with-read-options): new procedure (R6RS hex escapes): new tests * test-suite/tests/strings.test (with-read-options): new procedure (R6RS hex escapes): new tests
This commit is contained in:
parent
8470b3f45b
commit
dea901d66e
5 changed files with 274 additions and 68 deletions
106
libguile/print.c
106
libguile/print.c
|
@ -409,6 +409,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display);
|
|||
|
||||
static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
|
||||
|
||||
|
||||
/* Print a character as an octal or hex escape. */
|
||||
#define PRINT_CHAR_ESCAPE(i, port) \
|
||||
do \
|
||||
{ \
|
||||
if (!SCM_R6RS_ESCAPES_P) \
|
||||
scm_intprint (i, 8, port); \
|
||||
else \
|
||||
{ \
|
||||
scm_puts ("x", port); \
|
||||
scm_intprint (i, 16, port); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
|
||||
void
|
||||
scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
||||
{
|
||||
|
@ -488,7 +504,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
PRINT_CHAR_ESCAPE (i, port);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -507,12 +523,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
PRINT_CHAR_ESCAPE (i, port);
|
||||
}
|
||||
}
|
||||
else
|
||||
/* Character is a non-graphical character. */
|
||||
scm_intprint (i, 8, port);
|
||||
PRINT_CHAR_ESCAPE (i, port);
|
||||
}
|
||||
else
|
||||
scm_i_charprint (i, port);
|
||||
|
@ -579,9 +595,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
case scm_tc7_string:
|
||||
if (SCM_WRITINGP (pstate))
|
||||
{
|
||||
size_t i, j, len;
|
||||
size_t i, len;
|
||||
static char const hex[] = "0123456789abcdef";
|
||||
char buf[8];
|
||||
char buf[9];
|
||||
|
||||
|
||||
scm_putc ('"', port);
|
||||
|
@ -647,37 +663,61 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
{
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding or is not graphic. */
|
||||
if (ch <= 0xFF)
|
||||
if (!SCM_R6RS_ESCAPES_P)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'x';
|
||||
buf[2] = hex[ch / 16];
|
||||
buf[3] = hex[ch % 16];
|
||||
scm_lfwrite (buf, 4, port);
|
||||
if (ch <= 0xFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'x';
|
||||
buf[2] = hex[ch / 16];
|
||||
buf[3] = hex[ch % 16];
|
||||
scm_lfwrite (buf, 4, port);
|
||||
}
|
||||
else if (ch <= 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'u';
|
||||
buf[2] = hex[(ch & 0xF000) >> 12];
|
||||
buf[3] = hex[(ch & 0xF00) >> 8];
|
||||
buf[4] = hex[(ch & 0xF0) >> 4];
|
||||
buf[5] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 6, port);
|
||||
}
|
||||
else if (ch > 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'U';
|
||||
buf[2] = hex[(ch & 0xF00000) >> 20];
|
||||
buf[3] = hex[(ch & 0xF0000) >> 16];
|
||||
buf[4] = hex[(ch & 0xF000) >> 12];
|
||||
buf[5] = hex[(ch & 0xF00) >> 8];
|
||||
buf[6] = hex[(ch & 0xF0) >> 4];
|
||||
buf[7] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 8, port);
|
||||
}
|
||||
}
|
||||
else if (ch <= 0xFFFF)
|
||||
else
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'u';
|
||||
buf[2] = hex[(ch & 0xF000) >> 12];
|
||||
buf[3] = hex[(ch & 0xF00) >> 8];
|
||||
buf[4] = hex[(ch & 0xF0) >> 4];
|
||||
buf[5] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 6, port);
|
||||
j = i + 1;
|
||||
}
|
||||
else if (ch > 0xFFFF)
|
||||
{
|
||||
buf[0] = '\\';
|
||||
buf[1] = 'U';
|
||||
buf[2] = hex[(ch & 0xF00000) >> 20];
|
||||
buf[3] = hex[(ch & 0xF0000) >> 16];
|
||||
buf[4] = hex[(ch & 0xF000) >> 12];
|
||||
buf[5] = hex[(ch & 0xF00) >> 8];
|
||||
buf[6] = hex[(ch & 0xF0) >> 4];
|
||||
buf[7] = hex[(ch & 0xF)];
|
||||
scm_lfwrite (buf, 8, port);
|
||||
j = i + 1;
|
||||
scm_t_wchar ch2 = ch;
|
||||
|
||||
/* Print an R6RS variable-length hex escape: "\xNNNN;"
|
||||
*/
|
||||
int i = 8;
|
||||
buf[i] = ';';
|
||||
i --;
|
||||
if (ch == 0)
|
||||
buf[i--] = '0';
|
||||
else
|
||||
while (ch2 > 0)
|
||||
{
|
||||
buf[i] = hex[ch2 & 0xF];
|
||||
ch2 >>= 4;
|
||||
i --;
|
||||
}
|
||||
buf[i] = 'x';
|
||||
i --;
|
||||
buf[i] = '\\';
|
||||
scm_lfwrite (buf + i, 9 - i, port);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
|
|||
#if SCM_ENABLE_ELISP
|
||||
#define SCM_ELISP_VECTORS_P scm_read_opts[4].val
|
||||
#define SCM_ESCAPED_PARENS_P scm_read_opts[5].val
|
||||
#define SCM_N_READ_OPTIONS 6
|
||||
#endif
|
||||
#define SCM_R6RS_ESCAPES_P scm_read_opts[6].val
|
||||
|
||||
#if SCM_ENABLE_ELISP
|
||||
#define SCM_N_READ_OPTIONS 7
|
||||
#else
|
||||
#define SCM_N_READ_OPTIONS 4
|
||||
#define SCM_N_READ_OPTIONS 5
|
||||
#endif
|
||||
|
||||
#endif /* PRIVATE_OPTIONS */
|
||||
|
|
|
@ -76,6 +76,8 @@ scm_t_option scm_read_opts[] = {
|
|||
{ SCM_OPTION_BOOLEAN, "elisp-strings", 0,
|
||||
"Support `\\(' and `\\)' in strings."},
|
||||
#endif
|
||||
{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
|
||||
"Use R6RS variable-length character and string hex escapes."},
|
||||
{ 0, },
|
||||
};
|
||||
|
||||
|
@ -412,32 +414,37 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
|
|||
|
||||
|
||||
/* Read a hexadecimal number NDIGITS in length. Put its value into the variable
|
||||
C. */
|
||||
#define SCM_READ_HEX_ESCAPE(ndigits) \
|
||||
do \
|
||||
{ \
|
||||
scm_t_wchar a; \
|
||||
size_t i = 0; \
|
||||
c = 0; \
|
||||
while (i < ndigits) \
|
||||
{ \
|
||||
a = scm_getc (port); \
|
||||
if (a == EOF) \
|
||||
goto str_eof; \
|
||||
if ('0' <= a && a <= '9') \
|
||||
a -= '0'; \
|
||||
else if ('A' <= a && a <= 'F') \
|
||||
a = a - 'A' + 10; \
|
||||
else if ('a' <= a && a <= 'f') \
|
||||
a = a - 'a' + 10; \
|
||||
else \
|
||||
{ \
|
||||
c = a; \
|
||||
goto bad_escaped; \
|
||||
} \
|
||||
c = c * 16 + a; \
|
||||
i ++; \
|
||||
} \
|
||||
C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
|
||||
found. */
|
||||
#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
|
||||
do \
|
||||
{ \
|
||||
scm_t_wchar a; \
|
||||
size_t i = 0; \
|
||||
c = 0; \
|
||||
while (i < ndigits) \
|
||||
{ \
|
||||
a = scm_getc (port); \
|
||||
if (a == EOF) \
|
||||
goto str_eof; \
|
||||
if (terminator \
|
||||
&& (a == (scm_t_wchar) terminator) \
|
||||
&& (i > 0)) \
|
||||
break; \
|
||||
if ('0' <= a && a <= '9') \
|
||||
a -= '0'; \
|
||||
else if ('A' <= a && a <= 'F') \
|
||||
a = a - 'A' + 10; \
|
||||
else if ('a' <= a && a <= 'f') \
|
||||
a = a - 'a' + 10; \
|
||||
else \
|
||||
{ \
|
||||
c = a; \
|
||||
goto bad_escaped; \
|
||||
} \
|
||||
c = c * 16 + a; \
|
||||
i ++; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
static SCM
|
||||
|
@ -511,13 +518,16 @@ scm_read_string (int chr, SCM port)
|
|||
c = '\010';
|
||||
break;
|
||||
case 'x':
|
||||
SCM_READ_HEX_ESCAPE (2);
|
||||
if (SCM_R6RS_ESCAPES_P)
|
||||
SCM_READ_HEX_ESCAPE (10, ';');
|
||||
else
|
||||
SCM_READ_HEX_ESCAPE (2, '\0');
|
||||
break;
|
||||
case 'u':
|
||||
SCM_READ_HEX_ESCAPE (4);
|
||||
SCM_READ_HEX_ESCAPE (4, '\0');
|
||||
break;
|
||||
case 'U':
|
||||
SCM_READ_HEX_ESCAPE (6);
|
||||
SCM_READ_HEX_ESCAPE (6, '\0');
|
||||
break;
|
||||
default:
|
||||
bad_escaped:
|
||||
|
@ -828,6 +838,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
|
|||
}
|
||||
}
|
||||
|
||||
if (cp == 'x' && (charname_len > 1) && SCM_R6RS_ESCAPES_P)
|
||||
{
|
||||
SCM p;
|
||||
scm_t_wchar chr;
|
||||
|
||||
/* Convert from hex, skipping the initial 'x' character in CHARNAME */
|
||||
p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
|
||||
scm_from_uint (16));
|
||||
if (SCM_I_INUMP (p))
|
||||
{
|
||||
scm_t_wchar c = SCM_I_INUM (p);
|
||||
if (SCM_IS_UNICODE_CHAR (c))
|
||||
return SCM_MAKE_CHAR (c);
|
||||
else
|
||||
scm_i_input_error (FUNC_NAME, port,
|
||||
"out-of-range hex character escape: ~a",
|
||||
scm_list_1 (charname));
|
||||
}
|
||||
}
|
||||
|
||||
/* The names of characters should never have non-Latin1
|
||||
characters. */
|
||||
if (scm_i_is_narrow_string (charname)
|
||||
|
|
|
@ -29,6 +29,16 @@
|
|||
(cons #t "out-of-range"))
|
||||
|
||||
|
||||
;; Run THUNK in the context of the reader options OPTS
|
||||
(define (with-read-options opts thunk)
|
||||
(let ((saved-options (read-options)))
|
||||
(dynamic-wind
|
||||
(lambda ()
|
||||
(read-options opts))
|
||||
thunk
|
||||
(lambda ()
|
||||
(read-options saved-options)))))
|
||||
|
||||
(with-test-prefix "basic char handling"
|
||||
|
||||
(with-test-prefix "evaluator"
|
||||
|
@ -313,3 +323,37 @@
|
|||
(with-output-to-string (lambda () (write #\soh)))
|
||||
"#\\soh"))))
|
||||
|
||||
(with-test-prefix "R6RS hex escapes"
|
||||
|
||||
(pass-if "one-digit hex escape"
|
||||
(eqv? (with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "#\\xA" read)))
|
||||
(integer->char #x0A)))
|
||||
|
||||
(pass-if "two-digit hex escape"
|
||||
(eqv? (with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "#\\xFF" read)))
|
||||
(integer->char #xFF)))
|
||||
|
||||
(pass-if "four-digit hex escape"
|
||||
(eqv? (with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "#\\x00FF" read)))
|
||||
(integer->char #xFF)))
|
||||
|
||||
(pass-if "eight-digit hex escape"
|
||||
(eqv? (with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "#\\x00006587" read)))
|
||||
(integer->char #x6587)))
|
||||
(pass-if "write R6RS escapes"
|
||||
(string=?
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-output-to-string
|
||||
(lambda ()
|
||||
(write (integer->char #x80))))))
|
||||
"#\\x80")))
|
||||
|
||||
|
|
|
@ -2,23 +2,24 @@
|
|||
;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
|
||||
;;;;
|
||||
;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;;
|
||||
;;;; This library is free software; you can redistribute it and/or
|
||||
;;;; modify it under the terms of the GNU Lesser General Public
|
||||
;;;; License as published by the Free Software Foundation; either
|
||||
;;;; version 3 of the License, or (at your option) any later version.
|
||||
;;;;
|
||||
;;;;
|
||||
;;;; This library is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
;;;; Lesser General Public License for more details.
|
||||
;;;;
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;;; License along with this library; if not, write to the Free Software
|
||||
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib))
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:read-only-string
|
||||
(cons 'misc-error "^string is read-only"))
|
||||
|
@ -29,6 +30,16 @@
|
|||
(define exception:wrong-type-arg
|
||||
(cons #t "Wrong type"))
|
||||
|
||||
;; Run THUNK in the context of the reader options OPTS
|
||||
(define (with-read-options opts thunk)
|
||||
(let ((saved-options (read-options)))
|
||||
(dynamic-wind
|
||||
(lambda ()
|
||||
(read-options opts))
|
||||
thunk
|
||||
(lambda ()
|
||||
(read-options saved-options)))))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
@ -229,6 +240,83 @@
|
|||
(pass-if "Guile extensions backslash escapes"
|
||||
(string=? "\0" (string #\nul))))
|
||||
|
||||
|
||||
(with-test-prefix "R6RS hex escapes"
|
||||
|
||||
(pass-if-exception "non-hex char in two-digit hex-escape"
|
||||
exception:illegal-escape
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "\"\\x0g;\"" read))))
|
||||
|
||||
(pass-if-exception "non-hex char in four-digit hex-escape"
|
||||
exception:illegal-escape
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "\"\\x000g;\"" read))))
|
||||
|
||||
(pass-if-exception "non-hex char in six-digit hex-escape"
|
||||
exception:illegal-escape
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "\"\\x00000g;\"" read))))
|
||||
|
||||
(pass-if-exception "no semicolon at termination of one-digit hex-escape"
|
||||
exception:illegal-escape
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "\"\\x0\"" read))))
|
||||
|
||||
(pass-if-exception "no semicolon at termination of three-digit hex-escape"
|
||||
exception:illegal-escape
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "\"\\x000\"" read))))
|
||||
|
||||
(pass-if "two-digit hex escape"
|
||||
(eqv?
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(string-ref (with-input-from-string "\"--\\xff;--\"" read) 2)))
|
||||
(integer->char #xff)))
|
||||
|
||||
(pass-if "four-digit hex escape"
|
||||
(eqv?
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2)))
|
||||
(integer->char #x0100)))
|
||||
|
||||
(pass-if "six-digit hex escape"
|
||||
(eqv?
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2)))
|
||||
(integer->char #x010300)))
|
||||
|
||||
(pass-if "escaped characters match non-escaped ASCII characters"
|
||||
(string=?
|
||||
(with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read)))
|
||||
"ABC"))
|
||||
|
||||
(pass-if "write R6RS escapes"
|
||||
|
||||
(let* ((s1 (apply string
|
||||
(map integer->char '(#x8 ; backspace
|
||||
#x20 ; space
|
||||
#x30 ; zero
|
||||
#x40 ; at sign
|
||||
))))
|
||||
(s2 (with-read-options '(r6rs-hex-escapes)
|
||||
(lambda ()
|
||||
(with-output-to-string
|
||||
(lambda () (write s1)))))))
|
||||
(lset= eqv?
|
||||
(string->list s2)
|
||||
(list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\")))))
|
||||
|
||||
;;
|
||||
;; string?
|
||||
;;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue