1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-10 14:00:21 +02:00

Reader option for R6RS hex escapes

This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings.  When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters.  Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).

* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
  (iprin1): use new macro PRINT_CHAR_ESCAPE

* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define

* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
  (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
  (scm_read_string): parse R6RS hex string escapes
  (scm_read_character): parse R6RS hex character escapes

* test-suite/tests/chars.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

* test-suite/tests/strings.test (with-read-options): new procedure
  (R6RS hex escapes): new tests
This commit is contained in:
Michael Gran 2010-01-12 21:02:41 -08:00
parent 8470b3f45b
commit dea901d66e
5 changed files with 274 additions and 68 deletions

View file

@ -409,6 +409,22 @@ SCM_GPROC(s_display, "display", 1, 1, 0, scm_display, g_display);
static void iprin1 (SCM exp, SCM port, scm_print_state *pstate);
/* Print a character as an octal or hex escape. */
#define PRINT_CHAR_ESCAPE(i, port) \
do \
{ \
if (!SCM_R6RS_ESCAPES_P) \
scm_intprint (i, 8, port); \
else \
{ \
scm_puts ("x", port); \
scm_intprint (i, 16, port); \
} \
} \
while (0)
void
scm_iprin1 (SCM exp, SCM port, scm_print_state *pstate)
{
@ -488,7 +504,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
else
/* Character is graphic but unrepresentable in
this port's encoding. */
scm_intprint (i, 8, port);
PRINT_CHAR_ESCAPE (i, port);
}
else
{
@ -507,12 +523,12 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
else
/* Character is graphic but unrepresentable in
this port's encoding. */
scm_intprint (i, 8, port);
PRINT_CHAR_ESCAPE (i, port);
}
}
else
/* Character is a non-graphical character. */
scm_intprint (i, 8, port);
PRINT_CHAR_ESCAPE (i, port);
}
else
scm_i_charprint (i, port);
@ -579,9 +595,9 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
size_t i, j, len;
size_t i, len;
static char const hex[] = "0123456789abcdef";
char buf[8];
char buf[9];
scm_putc ('"', port);
@ -647,37 +663,61 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
{
/* Character is graphic but unrepresentable in
this port's encoding or is not graphic. */
if (ch <= 0xFF)
if (!SCM_R6RS_ESCAPES_P)
{
buf[0] = '\\';
buf[1] = 'x';
buf[2] = hex[ch / 16];
buf[3] = hex[ch % 16];
scm_lfwrite (buf, 4, port);
if (ch <= 0xFF)
{
buf[0] = '\\';
buf[1] = 'x';
buf[2] = hex[ch / 16];
buf[3] = hex[ch % 16];
scm_lfwrite (buf, 4, port);
}
else if (ch <= 0xFFFF)
{
buf[0] = '\\';
buf[1] = 'u';
buf[2] = hex[(ch & 0xF000) >> 12];
buf[3] = hex[(ch & 0xF00) >> 8];
buf[4] = hex[(ch & 0xF0) >> 4];
buf[5] = hex[(ch & 0xF)];
scm_lfwrite (buf, 6, port);
}
else if (ch > 0xFFFF)
{
buf[0] = '\\';
buf[1] = 'U';
buf[2] = hex[(ch & 0xF00000) >> 20];
buf[3] = hex[(ch & 0xF0000) >> 16];
buf[4] = hex[(ch & 0xF000) >> 12];
buf[5] = hex[(ch & 0xF00) >> 8];
buf[6] = hex[(ch & 0xF0) >> 4];
buf[7] = hex[(ch & 0xF)];
scm_lfwrite (buf, 8, port);
}
}
else if (ch <= 0xFFFF)
else
{
buf[0] = '\\';
buf[1] = 'u';
buf[2] = hex[(ch & 0xF000) >> 12];
buf[3] = hex[(ch & 0xF00) >> 8];
buf[4] = hex[(ch & 0xF0) >> 4];
buf[5] = hex[(ch & 0xF)];
scm_lfwrite (buf, 6, port);
j = i + 1;
}
else if (ch > 0xFFFF)
{
buf[0] = '\\';
buf[1] = 'U';
buf[2] = hex[(ch & 0xF00000) >> 20];
buf[3] = hex[(ch & 0xF0000) >> 16];
buf[4] = hex[(ch & 0xF000) >> 12];
buf[5] = hex[(ch & 0xF00) >> 8];
buf[6] = hex[(ch & 0xF0) >> 4];
buf[7] = hex[(ch & 0xF)];
scm_lfwrite (buf, 8, port);
j = i + 1;
scm_t_wchar ch2 = ch;
/* Print an R6RS variable-length hex escape: "\xNNNN;"
*/
int i = 8;
buf[i] = ';';
i --;
if (ch == 0)
buf[i--] = '0';
else
while (ch2 > 0)
{
buf[i] = hex[ch2 & 0xF];
ch2 >>= 4;
i --;
}
buf[i] = 'x';
i --;
buf[i] = '\\';
scm_lfwrite (buf + i, 9 - i, port);
}
}
}

View file

@ -94,9 +94,13 @@ SCM_API scm_t_option scm_read_opts[];
#if SCM_ENABLE_ELISP
#define SCM_ELISP_VECTORS_P scm_read_opts[4].val
#define SCM_ESCAPED_PARENS_P scm_read_opts[5].val
#define SCM_N_READ_OPTIONS 6
#endif
#define SCM_R6RS_ESCAPES_P scm_read_opts[6].val
#if SCM_ENABLE_ELISP
#define SCM_N_READ_OPTIONS 7
#else
#define SCM_N_READ_OPTIONS 4
#define SCM_N_READ_OPTIONS 5
#endif
#endif /* PRIVATE_OPTIONS */

View file

@ -76,6 +76,8 @@ scm_t_option scm_read_opts[] = {
{ SCM_OPTION_BOOLEAN, "elisp-strings", 0,
"Support `\\(' and `\\)' in strings."},
#endif
{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
"Use R6RS variable-length character and string hex escapes."},
{ 0, },
};
@ -412,32 +414,37 @@ scm_read_sexp (scm_t_wchar chr, SCM port)
/* Read a hexadecimal number NDIGITS in length. Put its value into the variable
C. */
#define SCM_READ_HEX_ESCAPE(ndigits) \
do \
{ \
scm_t_wchar a; \
size_t i = 0; \
c = 0; \
while (i < ndigits) \
{ \
a = scm_getc (port); \
if (a == EOF) \
goto str_eof; \
if ('0' <= a && a <= '9') \
a -= '0'; \
else if ('A' <= a && a <= 'F') \
a = a - 'A' + 10; \
else if ('a' <= a && a <= 'f') \
a = a - 'a' + 10; \
else \
{ \
c = a; \
goto bad_escaped; \
} \
c = c * 16 + a; \
i ++; \
} \
C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
found. */
#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
do \
{ \
scm_t_wchar a; \
size_t i = 0; \
c = 0; \
while (i < ndigits) \
{ \
a = scm_getc (port); \
if (a == EOF) \
goto str_eof; \
if (terminator \
&& (a == (scm_t_wchar) terminator) \
&& (i > 0)) \
break; \
if ('0' <= a && a <= '9') \
a -= '0'; \
else if ('A' <= a && a <= 'F') \
a = a - 'A' + 10; \
else if ('a' <= a && a <= 'f') \
a = a - 'a' + 10; \
else \
{ \
c = a; \
goto bad_escaped; \
} \
c = c * 16 + a; \
i ++; \
} \
} while (0)
static SCM
@ -511,13 +518,16 @@ scm_read_string (int chr, SCM port)
c = '\010';
break;
case 'x':
SCM_READ_HEX_ESCAPE (2);
if (SCM_R6RS_ESCAPES_P)
SCM_READ_HEX_ESCAPE (10, ';');
else
SCM_READ_HEX_ESCAPE (2, '\0');
break;
case 'u':
SCM_READ_HEX_ESCAPE (4);
SCM_READ_HEX_ESCAPE (4, '\0');
break;
case 'U':
SCM_READ_HEX_ESCAPE (6);
SCM_READ_HEX_ESCAPE (6, '\0');
break;
default:
bad_escaped:
@ -828,6 +838,26 @@ scm_read_character (scm_t_wchar chr, SCM port)
}
}
if (cp == 'x' && (charname_len > 1) && SCM_R6RS_ESCAPES_P)
{
SCM p;
scm_t_wchar chr;
/* Convert from hex, skipping the initial 'x' character in CHARNAME */
p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
scm_from_uint (16));
if (SCM_I_INUMP (p))
{
scm_t_wchar c = SCM_I_INUM (p);
if (SCM_IS_UNICODE_CHAR (c))
return SCM_MAKE_CHAR (c);
else
scm_i_input_error (FUNC_NAME, port,
"out-of-range hex character escape: ~a",
scm_list_1 (charname));
}
}
/* The names of characters should never have non-Latin1
characters. */
if (scm_i_is_narrow_string (charname)

View file

@ -29,6 +29,16 @@
(cons #t "out-of-range"))
;; Run THUNK in the context of the reader options OPTS
(define (with-read-options opts thunk)
(let ((saved-options (read-options)))
(dynamic-wind
(lambda ()
(read-options opts))
thunk
(lambda ()
(read-options saved-options)))))
(with-test-prefix "basic char handling"
(with-test-prefix "evaluator"
@ -313,3 +323,37 @@
(with-output-to-string (lambda () (write #\soh)))
"#\\soh"))))
(with-test-prefix "R6RS hex escapes"
(pass-if "one-digit hex escape"
(eqv? (with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "#\\xA" read)))
(integer->char #x0A)))
(pass-if "two-digit hex escape"
(eqv? (with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "#\\xFF" read)))
(integer->char #xFF)))
(pass-if "four-digit hex escape"
(eqv? (with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "#\\x00FF" read)))
(integer->char #xFF)))
(pass-if "eight-digit hex escape"
(eqv? (with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "#\\x00006587" read)))
(integer->char #x6587)))
(pass-if "write R6RS escapes"
(string=?
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-output-to-string
(lambda ()
(write (integer->char #x80))))))
"#\\x80")))

View file

@ -2,23 +2,24 @@
;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
;;;;
;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
;;;;
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
(define-module (test-strings)
#:use-module (test-suite lib))
#:use-module (test-suite lib)
#:use-module (srfi srfi-1))
(define exception:read-only-string
(cons 'misc-error "^string is read-only"))
@ -29,6 +30,16 @@
(define exception:wrong-type-arg
(cons #t "Wrong type"))
;; Run THUNK in the context of the reader options OPTS
(define (with-read-options opts thunk)
(let ((saved-options (read-options)))
(dynamic-wind
(lambda ()
(read-options opts))
thunk
(lambda ()
(read-options saved-options)))))
;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
(apply string (map integer->char args)))
@ -229,6 +240,83 @@
(pass-if "Guile extensions backslash escapes"
(string=? "\0" (string #\nul))))
(with-test-prefix "R6RS hex escapes"
(pass-if-exception "non-hex char in two-digit hex-escape"
exception:illegal-escape
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "\"\\x0g;\"" read))))
(pass-if-exception "non-hex char in four-digit hex-escape"
exception:illegal-escape
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "\"\\x000g;\"" read))))
(pass-if-exception "non-hex char in six-digit hex-escape"
exception:illegal-escape
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "\"\\x00000g;\"" read))))
(pass-if-exception "no semicolon at termination of one-digit hex-escape"
exception:illegal-escape
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "\"\\x0\"" read))))
(pass-if-exception "no semicolon at termination of three-digit hex-escape"
exception:illegal-escape
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "\"\\x000\"" read))))
(pass-if "two-digit hex escape"
(eqv?
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(string-ref (with-input-from-string "\"--\\xff;--\"" read) 2)))
(integer->char #xff)))
(pass-if "four-digit hex escape"
(eqv?
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(string-ref (with-input-from-string "\"--\\x0100;--\"" read) 2)))
(integer->char #x0100)))
(pass-if "six-digit hex escape"
(eqv?
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(string-ref (with-input-from-string "\"--\\x010300;--\"" read) 2)))
(integer->char #x010300)))
(pass-if "escaped characters match non-escaped ASCII characters"
(string=?
(with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-input-from-string "\"\\x41;\\x0042;\\x000043;\"" read)))
"ABC"))
(pass-if "write R6RS escapes"
(let* ((s1 (apply string
(map integer->char '(#x8 ; backspace
#x20 ; space
#x30 ; zero
#x40 ; at sign
))))
(s2 (with-read-options '(r6rs-hex-escapes)
(lambda ()
(with-output-to-string
(lambda () (write s1)))))))
(lset= eqv?
(string->list s2)
(list #\" #\\ #\x #\8 #\; #\space #\0 #\@ #\")))))
;;
;; string?
;;