1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00

read: Support R7RS |...| symbol notation.

* libguile/private-options.h (SCM_R7RS_SYMBOLS_P): New macro.
  (SCM_N_READ_OPTIONS): Increment.

* libguile/read.c (scm_read_opts): Add entry for 'r7rs-symbols'.
  (t_read_opts): Add field for 'r7rs_symbols_p'.
  (scm_read_string_like_syntax): New function based on earlier
  'scm_read_string' that handles either string literals or R7RS quoted
  symbols (delimited by vertical bars), depending on the value of 'chr'.
  (scm_read_string): Reimplement based on 'scm_read_string_like_syntax'.
  (scm_read_r7rs_symbol): New static function.

* doc/ref/api-data.texi (Symbol Read Syntax): Briefly describe the R7RS
  symbol syntax, mention the 'r7rs-symbols' read option, and give some
  examples.

* doc/ref/api-evaluation.texi (Scheme Read): Mention the 'r7rs-symbols'
  read option.

* test-suite/tests/reader.test ("reading"): Add test.
This commit is contained in:
Mark H Weaver 2014-01-12 07:55:22 -05:00
parent b306fae0ab
commit dc59631d30
5 changed files with 58 additions and 10 deletions

View file

@ -5575,6 +5575,16 @@ Although Guile provides this extended read syntax for symbols,
widespread usage of it is discouraged because it is not portable and not widespread usage of it is discouraged because it is not portable and not
very readable. very readable.
Alternatively, if you enable the @code{r7rs-symbols} read option (see
@pxref{Scheme Read}), you can write arbitrary symbols using the same
notation used for strings, except delimited by vertical bars instead of
double quotes.
@example
|foo bar|
|\x3BB; is a greek lambda|
|\| is a vertical bar|
@end example
@node Symbol Uninterned @node Symbol Uninterned
@subsubsection Uninterned Symbols @subsubsection Uninterned Symbols

View file

@ -1,7 +1,7 @@
@c -*-texinfo-*- @c -*-texinfo-*-
@c This is part of the GNU Guile Reference Manual. @c This is part of the GNU Guile Reference Manual.
@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, 2010, 2011, 2012, 2013 @c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009,
@c Free Software Foundation, Inc. @c 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
@c See the file guile.texi for copying conditions. @c See the file guile.texi for copying conditions.
@node Read/Load/Eval/Compile @node Read/Load/Eval/Compile
@ -340,6 +340,7 @@ square-brackets yes Treat `[' and `]' as parentheses, for R6RS compatibility
hungry-eol-escapes no In strings, consume leading whitespace after an hungry-eol-escapes no In strings, consume leading whitespace after an
escaped end-of-line. escaped end-of-line.
curly-infix no Support SRFI-105 curly infix expressions. curly-infix no Support SRFI-105 curly infix expressions.
r7rs-symbols no Support R7RS |...| symbol notation.
@end smalllisp @end smalllisp
Note that Guile also includes a preliminary mechanism for setting read Note that Guile also includes a preliminary mechanism for setting read
@ -377,6 +378,9 @@ For example, to make @code{read} fold all symbols to their lower case
For more information on the effect of the @code{r6rs-hex-escapes} and For more information on the effect of the @code{r6rs-hex-escapes} and
@code{hungry-eol-escapes} options, see (@pxref{String Syntax}). @code{hungry-eol-escapes} options, see (@pxref{String Syntax}).
For more information on the @code{r7rs-symbols} option, see
(@pxref{Symbol Read Syntax}).
@node Scheme Write @node Scheme Write
@subsection Writing Scheme Values @subsection Writing Scheme Values

View file

@ -4,7 +4,7 @@
* We put this in a private header, since layout of data structures * We put this in a private header, since layout of data structures
* is an implementation detail that we want to hide. * is an implementation detail that we want to hide.
* *
* Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc. * Copyright (C) 2007, 2009, 2010, 2011, 2014 Free Software Foundation, Inc.
* *
* This library is free software; you can redistribute it and/or * This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License * modify it under the terms of the GNU Lesser General Public License
@ -68,7 +68,8 @@ SCM_INTERNAL scm_t_option scm_read_opts[];
#define SCM_SQUARE_BRACKETS_P scm_read_opts[5].val #define SCM_SQUARE_BRACKETS_P scm_read_opts[5].val
#define SCM_HUNGRY_EOL_ESCAPES_P scm_read_opts[6].val #define SCM_HUNGRY_EOL_ESCAPES_P scm_read_opts[6].val
#define SCM_CURLY_INFIX_P scm_read_opts[7].val #define SCM_CURLY_INFIX_P scm_read_opts[7].val
#define SCM_R7RS_SYMBOLS_P scm_read_opts[8].val
#define SCM_N_READ_OPTIONS 8 #define SCM_N_READ_OPTIONS 9
#endif /* PRIVATE_OPTIONS */ #endif /* PRIVATE_OPTIONS */

View file

@ -88,6 +88,8 @@ scm_t_option scm_read_opts[] =
"In strings, consume leading whitespace after an escaped end-of-line."}, "In strings, consume leading whitespace after an escaped end-of-line."},
{ SCM_OPTION_BOOLEAN, "curly-infix", 0, { SCM_OPTION_BOOLEAN, "curly-infix", 0,
"Support SRFI-105 curly infix expressions."}, "Support SRFI-105 curly infix expressions."},
{ SCM_OPTION_BOOLEAN, "r7rs-symbols", 0,
"Support R7RS |...| symbol notation."},
{ 0, }, { 0, },
}; };
@ -113,6 +115,7 @@ struct t_read_opts
unsigned int hungry_eol_escapes_p : 1; unsigned int hungry_eol_escapes_p : 1;
unsigned int curly_infix_p : 1; unsigned int curly_infix_p : 1;
unsigned int neoteric_p : 1; unsigned int neoteric_p : 1;
unsigned int r7rs_symbols_p : 1;
}; };
typedef struct t_read_opts scm_t_read_opts; typedef struct t_read_opts scm_t_read_opts;
@ -588,8 +591,11 @@ skip_intraline_whitespace (SCM port)
scm_ungetc (c, port); scm_ungetc (c, port);
} }
/* Read either a double-quoted string or an R7RS-style symbol delimited
by vertical lines, depending on the value of 'chr' ('"' or '|').
Regardless, the result is always returned as a string. */
static SCM static SCM
scm_read_string (int chr, SCM port, scm_t_read_opts *opts) scm_read_string_like_syntax (int chr, SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_lreadr" #define FUNC_NAME "scm_lreadr"
{ {
/* For strings smaller than C_STR, this function creates only one Scheme /* For strings smaller than C_STR, this function creates only one Scheme
@ -603,13 +609,16 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
long line = SCM_LINUM (port); long line = SCM_LINUM (port);
int column = SCM_COL (port) - 1; int column = SCM_COL (port) - 1;
while ('"' != (c = scm_getc (port))) while (chr != (c = scm_getc (port)))
{ {
if (c == EOF) if (c == EOF)
{ {
str_eof: str_eof:
scm_i_input_error (FUNC_NAME, port, scm_i_input_error (FUNC_NAME, port,
"end of file in string constant", SCM_EOL); (chr == '|'
? "end of file in symbol"
: "end of file in string constant"),
SCM_EOL);
} }
if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE) if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
@ -624,7 +633,6 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
{ {
case EOF: case EOF:
goto str_eof; goto str_eof;
case '"':
case '|': case '|':
case '\\': case '\\':
break; break;
@ -657,7 +665,7 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
c = '\010'; c = '\010';
break; break;
case 'x': case 'x':
if (opts->r6rs_escapes_p) if (opts->r6rs_escapes_p || chr == '|')
SCM_READ_HEX_ESCAPE (10, ';'); SCM_READ_HEX_ESCAPE (10, ';');
else else
SCM_READ_HEX_ESCAPE (2, '\0'); SCM_READ_HEX_ESCAPE (2, '\0');
@ -675,6 +683,8 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
break; break;
} }
default: default:
if (c == chr)
break;
bad_escaped: bad_escaped:
scm_i_input_error (FUNC_NAME, port, scm_i_input_error (FUNC_NAME, port,
"illegal character in escape sequence: ~S", "illegal character in escape sequence: ~S",
@ -700,6 +710,17 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
} }
#undef FUNC_NAME #undef FUNC_NAME
static SCM
scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
{
return scm_read_string_like_syntax (chr, port, opts);
}
static SCM
scm_read_r7rs_symbol (int chr, SCM port, scm_t_read_opts *opts)
{
return scm_string_to_symbol (scm_read_string_like_syntax (chr, port, opts));
}
static SCM static SCM
scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
@ -1788,6 +1809,11 @@ read_inner_expression (SCM port, scm_t_read_opts *opts)
return (scm_read_sexp (chr, port, opts)); return (scm_read_sexp (chr, port, opts));
case '"': case '"':
return (scm_read_string (chr, port, opts)); return (scm_read_string (chr, port, opts));
case '|':
if (opts->r7rs_symbols_p)
return scm_read_r7rs_symbol (chr, port, opts);
else
return scm_read_mixed_case_symbol (chr, port, opts);
case '\'': case '\'':
case '`': case '`':
case ',': case ',':
@ -2204,9 +2230,10 @@ SCM_SYMBOL (sym_port_read_options, "port-read-options");
#define READ_OPTION_SQUARE_BRACKETS_P 10 #define READ_OPTION_SQUARE_BRACKETS_P 10
#define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
#define READ_OPTION_CURLY_INFIX_P 14 #define READ_OPTION_CURLY_INFIX_P 14
#define READ_OPTION_R7RS_SYMBOLS_P 16
/* The total width in bits of the per-port overrides */ /* The total width in bits of the per-port overrides */
#define READ_OPTIONS_NUM_BITS 16 #define READ_OPTIONS_NUM_BITS 18
#define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1) #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
#define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
@ -2304,6 +2331,7 @@ init_read_options (SCM port, scm_t_read_opts *opts)
RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p); RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p); RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p);
#undef RESOLVE_BOOLEAN_OPTION #undef RESOLVE_BOOLEAN_OPTION

View file

@ -238,6 +238,11 @@
(with-read-options '(case-insensitive) (with-read-options '(case-insensitive)
(lambda () (lambda ()
(read-string "GuiLe"))))) (read-string "GuiLe")))))
(pass-if-equal "r7rs-symbols"
(list 'a (string->symbol "Hello, this is | a \"test\"") 'b)
(with-read-options '(r7rs-symbols)
(lambda ()
(read-string "(a |H\\x65;llo, this is \\| a \"test\"| b)"))))
(pass-if "prefix keywords" (pass-if "prefix keywords"
(eq? #:keyword (eq? #:keyword
(with-read-options '(keywords prefix case-insensitive) (with-read-options '(keywords prefix case-insensitive)