From b306fae0abe38aac6fede98727a47f57a4ba992f Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Tue, 14 Jan 2014 13:26:30 -0500 Subject: [PATCH 1/3] Document that we support srfi-62 and add it to %cond-expand-features. * module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-62. * doc/ref/srfi-modules.texi (SRFI-0): Add srfi-62 to the list of core features. (SRFI-62): New node. --- doc/ref/srfi-modules.texi | 9 +++++++++ module/ice-9/boot-9.scm | 1 + 2 files changed, 10 insertions(+) diff --git a/doc/ref/srfi-modules.texi b/doc/ref/srfi-modules.texi index 7b3d21aeb..6da2b9e88 100644 --- a/doc/ref/srfi-modules.texi +++ b/doc/ref/srfi-modules.texi @@ -51,6 +51,7 @@ get the relevant SRFI documents from the SRFI home page * SRFI-55:: Requiring Features. * SRFI-60:: Integers as bits. * SRFI-61:: A more general `cond' clause +* SRFI-62:: S-expression comments. * SRFI-67:: Compare procedures * SRFI-69:: Basic hash tables. * SRFI-88:: Keyword objects. @@ -154,6 +155,7 @@ srfi-30 srfi-39 srfi-55 srfi-61 +srfi-62 srfi-105 @end example @@ -4836,6 +4838,13 @@ success. SRFI 61 is implemented in the Guile core; there's no module needed to get SRFI-61 itself. Extended @code{cond} is documented in @ref{Conditionals,, Simple Conditional Evaluation}. +@node SRFI-62 +@subsection SRFI-62 - S-expression comments. +@cindex SRFI-62 + +Starting from version 2.0, Guile's @code{read} supports SRFI-62/R7RS +S-expression comments by default. + @node SRFI-67 @subsection SRFI-67 - Compare procedures @cindex SRFI-67 diff --git a/module/ice-9/boot-9.scm b/module/ice-9/boot-9.scm index 5ee90736d..733c2b1a4 100644 --- a/module/ice-9/boot-9.scm +++ b/module/ice-9/boot-9.scm @@ -4043,6 +4043,7 @@ when none is available, reading FILE-NAME with READER." srfi-39 ;; parameterize srfi-55 ;; require-extension srfi-61 ;; general cond clause + srfi-62 ;; s-expression comments srfi-105 ;; curly infix expressions )) From dc59631d3094ad39bba5e40d5c36200fb99023f9 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Sun, 12 Jan 2014 07:55:22 -0500 Subject: [PATCH 2/3] read: Support R7RS |...| symbol notation. * libguile/private-options.h (SCM_R7RS_SYMBOLS_P): New macro. (SCM_N_READ_OPTIONS): Increment. * libguile/read.c (scm_read_opts): Add entry for 'r7rs-symbols'. (t_read_opts): Add field for 'r7rs_symbols_p'. (scm_read_string_like_syntax): New function based on earlier 'scm_read_string' that handles either string literals or R7RS quoted symbols (delimited by vertical bars), depending on the value of 'chr'. (scm_read_string): Reimplement based on 'scm_read_string_like_syntax'. (scm_read_r7rs_symbol): New static function. * doc/ref/api-data.texi (Symbol Read Syntax): Briefly describe the R7RS symbol syntax, mention the 'r7rs-symbols' read option, and give some examples. * doc/ref/api-evaluation.texi (Scheme Read): Mention the 'r7rs-symbols' read option. * test-suite/tests/reader.test ("reading"): Add test. --- doc/ref/api-data.texi | 10 +++++++++ doc/ref/api-evaluation.texi | 8 ++++++-- libguile/private-options.h | 5 +++-- libguile/read.c | 40 ++++++++++++++++++++++++++++++------ test-suite/tests/reader.test | 5 +++++ 5 files changed, 58 insertions(+), 10 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index e711402f4..9fd353d75 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -5575,6 +5575,16 @@ Although Guile provides this extended read syntax for symbols, widespread usage of it is discouraged because it is not portable and not very readable. +Alternatively, if you enable the @code{r7rs-symbols} read option (see +@pxref{Scheme Read}), you can write arbitrary symbols using the same +notation used for strings, except delimited by vertical bars instead of +double quotes. + +@example +|foo bar| +|\x3BB; is a greek lambda| +|\| is a vertical bar| +@end example @node Symbol Uninterned @subsubsection Uninterned Symbols diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi index 63b1d6059..32d14e1d4 100644 --- a/doc/ref/api-evaluation.texi +++ b/doc/ref/api-evaluation.texi @@ -1,7 +1,7 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, 2010, 2011, 2012, 2013 -@c Free Software Foundation, Inc. +@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2009, +@c 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @node Read/Load/Eval/Compile @@ -340,6 +340,7 @@ square-brackets yes Treat `[' and `]' as parentheses, for R6RS compatibility hungry-eol-escapes no In strings, consume leading whitespace after an escaped end-of-line. curly-infix no Support SRFI-105 curly infix expressions. +r7rs-symbols no Support R7RS |...| symbol notation. @end smalllisp Note that Guile also includes a preliminary mechanism for setting read @@ -377,6 +378,9 @@ For example, to make @code{read} fold all symbols to their lower case For more information on the effect of the @code{r6rs-hex-escapes} and @code{hungry-eol-escapes} options, see (@pxref{String Syntax}). +For more information on the @code{r7rs-symbols} option, see +(@pxref{Symbol Read Syntax}). + @node Scheme Write @subsection Writing Scheme Values diff --git a/libguile/private-options.h b/libguile/private-options.h index 4f580a640..1a4ad0fb4 100644 --- a/libguile/private-options.h +++ b/libguile/private-options.h @@ -4,7 +4,7 @@ * We put this in a private header, since layout of data structures * is an implementation detail that we want to hide. * - * Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2007, 2009, 2010, 2011, 2014 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -68,7 +68,8 @@ SCM_INTERNAL scm_t_option scm_read_opts[]; #define SCM_SQUARE_BRACKETS_P scm_read_opts[5].val #define SCM_HUNGRY_EOL_ESCAPES_P scm_read_opts[6].val #define SCM_CURLY_INFIX_P scm_read_opts[7].val +#define SCM_R7RS_SYMBOLS_P scm_read_opts[8].val -#define SCM_N_READ_OPTIONS 8 +#define SCM_N_READ_OPTIONS 9 #endif /* PRIVATE_OPTIONS */ diff --git a/libguile/read.c b/libguile/read.c index e862c206e..f7edc4f8a 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -88,6 +88,8 @@ scm_t_option scm_read_opts[] = "In strings, consume leading whitespace after an escaped end-of-line."}, { SCM_OPTION_BOOLEAN, "curly-infix", 0, "Support SRFI-105 curly infix expressions."}, + { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0, + "Support R7RS |...| symbol notation."}, { 0, }, }; @@ -113,6 +115,7 @@ struct t_read_opts unsigned int hungry_eol_escapes_p : 1; unsigned int curly_infix_p : 1; unsigned int neoteric_p : 1; + unsigned int r7rs_symbols_p : 1; }; typedef struct t_read_opts scm_t_read_opts; @@ -588,8 +591,11 @@ skip_intraline_whitespace (SCM port) scm_ungetc (c, port); } +/* Read either a double-quoted string or an R7RS-style symbol delimited + by vertical lines, depending on the value of 'chr' ('"' or '|'). + Regardless, the result is always returned as a string. */ static SCM -scm_read_string (int chr, SCM port, scm_t_read_opts *opts) +scm_read_string_like_syntax (int chr, SCM port, scm_t_read_opts *opts) #define FUNC_NAME "scm_lreadr" { /* For strings smaller than C_STR, this function creates only one Scheme @@ -603,13 +609,16 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - while ('"' != (c = scm_getc (port))) + while (chr != (c = scm_getc (port))) { if (c == EOF) { str_eof: scm_i_input_error (FUNC_NAME, port, - "end of file in string constant", SCM_EOL); + (chr == '|' + ? "end of file in symbol" + : "end of file in string constant"), + SCM_EOL); } if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE) @@ -624,7 +633,6 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) { case EOF: goto str_eof; - case '"': case '|': case '\\': break; @@ -657,7 +665,7 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) c = '\010'; break; case 'x': - if (opts->r6rs_escapes_p) + if (opts->r6rs_escapes_p || chr == '|') SCM_READ_HEX_ESCAPE (10, ';'); else SCM_READ_HEX_ESCAPE (2, '\0'); @@ -675,6 +683,8 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) break; } default: + if (c == chr) + break; bad_escaped: scm_i_input_error (FUNC_NAME, port, "illegal character in escape sequence: ~S", @@ -700,6 +710,17 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) } #undef FUNC_NAME +static SCM +scm_read_string (int chr, SCM port, scm_t_read_opts *opts) +{ + return scm_read_string_like_syntax (chr, port, opts); +} + +static SCM +scm_read_r7rs_symbol (int chr, SCM port, scm_t_read_opts *opts) +{ + return scm_string_to_symbol (scm_read_string_like_syntax (chr, port, opts)); +} static SCM scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) @@ -1788,6 +1809,11 @@ read_inner_expression (SCM port, scm_t_read_opts *opts) return (scm_read_sexp (chr, port, opts)); case '"': return (scm_read_string (chr, port, opts)); + case '|': + if (opts->r7rs_symbols_p) + return scm_read_r7rs_symbol (chr, port, opts); + else + return scm_read_mixed_case_symbol (chr, port, opts); case '\'': case '`': case ',': @@ -2204,9 +2230,10 @@ SCM_SYMBOL (sym_port_read_options, "port-read-options"); #define READ_OPTION_SQUARE_BRACKETS_P 10 #define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12 #define READ_OPTION_CURLY_INFIX_P 14 +#define READ_OPTION_R7RS_SYMBOLS_P 16 /* The total width in bits of the per-port overrides */ -#define READ_OPTIONS_NUM_BITS 16 +#define READ_OPTIONS_NUM_BITS 18 #define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1) #define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL @@ -2304,6 +2331,7 @@ init_read_options (SCM port, scm_t_read_opts *opts) RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p); RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p); RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p); + RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p); #undef RESOLVE_BOOLEAN_OPTION diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index e0126fe40..18c0293b8 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -238,6 +238,11 @@ (with-read-options '(case-insensitive) (lambda () (read-string "GuiLe"))))) + (pass-if-equal "r7rs-symbols" + (list 'a (string->symbol "Hello, this is | a \"test\"") 'b) + (with-read-options '(r7rs-symbols) + (lambda () + (read-string "(a |H\\x65;llo, this is \\| a \"test\"| b)")))) (pass-if "prefix keywords" (eq? #:keyword (with-read-options '(keywords prefix case-insensitive) From 6e504a7b44a9a25787bdfb2d86fdddd3029f4ba9 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Tue, 14 Jan 2014 17:38:30 -0500 Subject: [PATCH 3/3] print: Support R7RS |...| symbol notation. * libguile/print.c (scm_print_opts): Add 'r7rs-symbols' print option. (symbol_has_extended_read_syntax): If the 'r7rs-symbols' option is enabled, then disallow '|' and '\' from bare symbols. (print_extended_symbol): Use 'scm_lfwrite' and 'scm_putc' instead of 'display_string' and 'display_character' when printing ASCII literals. (print_r7rs_extended_symbol): New static function. (scm_i_print_symbol_name): If the 'r7rs-symbols' option is enabled, use 'print_r7rs_extended_symbol' instead of 'print_extended_symbol'. * libguile/private-options.h (SCM_PRINT_R7RS_SYMBOLS_P): New macro. (SCM_N_PRINT_OPTIONS): Increment. * doc/ref/api-evaluation.texi (Scheme Write): Mention 'r7rs-symbols' print option. * test-suite/tests/print.test ("write"): Add tests. --- doc/ref/api-evaluation.texi | 2 ++ libguile/print.c | 72 +++++++++++++++++++++++++++++++++---- libguile/private-options.h | 3 +- test-suite/tests/print.test | 56 ++++++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 9 deletions(-) diff --git a/doc/ref/api-evaluation.texi b/doc/ref/api-evaluation.texi index 32d14e1d4..4a5b3d16a 100644 --- a/doc/ref/api-evaluation.texi +++ b/doc/ref/api-evaluation.texi @@ -440,6 +440,8 @@ quote-keywordish-symbols reader How to print symbols that have a colon not '#f'. escape-newlines yes Render newlines as \n when printing using `write'. +r7rs-symbols no Escape symbols using R7RS |...| symbol + notation. @end smalllisp These options may be modified with the print-set! syntax. diff --git a/libguile/print.c b/libguile/print.c index 4e68fd6c4..71bb89fde 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995-1999, 2000, 2001, 2002, 2003, 2004, 2006, 2008, - * 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + * 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -115,6 +115,8 @@ scm_t_option scm_print_opts[] = { "'reader' quotes them when the reader option 'keywords' is not '#f'." }, { SCM_OPTION_BOOLEAN, "escape-newlines", 1, "Render newlines as \\n when printing using `write'." }, + { SCM_OPTION_BOOLEAN, "r7rs-symbols", 0, + "Escape symbols using R7RS |...| symbol notation." }, { 0 }, }; @@ -357,6 +359,10 @@ symbol_has_extended_read_syntax (SCM sym) /* Other initial-character constraints. */ if (c == '\'' || c == '`' || c == ',' || c == '"' || c == ';' || c == '#') return 1; + + /* R7RS allows neither '|' nor '\' in bare symbols. */ + if ((c == '|' || c == '\\') && SCM_PRINT_R7RS_SYMBOLS_P) + return 1; /* Keywords can be identified by trailing colons too. */ if (c == ':' || scm_i_symbol_ref (sym, len - 1) == ':') @@ -380,6 +386,9 @@ symbol_has_extended_read_syntax (SCM sym) return 1; else if (c == '"' || c == ';' || c == '#') return 1; + else if ((c == '|' || c == '\\') && SCM_PRINT_R7RS_SYMBOLS_P) + /* R7RS allows neither '|' nor '\' in bare symbols. */ + return 1; } return 0; @@ -418,23 +427,72 @@ print_extended_symbol (SCM sym, SCM port) } else { - display_string ("\\x", 1, 2, port, iconveh_question_mark); + scm_lfwrite ("\\x", 2, port); scm_intprint (c, 16, port); - display_character (';', port, iconveh_question_mark); + scm_putc (';', port); } } scm_lfwrite ("}#", 2, port); } -/* FIXME: allow R6RS hex escapes instead of #{...}#. */ +static void +print_r7rs_extended_symbol (SCM sym, SCM port) +{ + size_t pos, len; + scm_t_string_failed_conversion_handler strategy; + + len = scm_i_symbol_length (sym); + strategy = PORT_CONVERSION_HANDLER (port); + + scm_putc ('|', port); + + for (pos = 0; pos < len; pos++) + { + scm_t_wchar c = scm_i_symbol_ref (sym, pos); + + switch (c) + { + case '\a': scm_lfwrite ("\\a", 2, port); break; + case '\b': scm_lfwrite ("\\b", 2, port); break; + case '\t': scm_lfwrite ("\\t", 2, port); break; + case '\n': scm_lfwrite ("\\n", 2, port); break; + case '\r': scm_lfwrite ("\\r", 2, port); break; + case '|': scm_lfwrite ("\\|", 2, port); break; + case '\\': scm_lfwrite ("\\x5c;", 5, port); break; + default: + if (uc_is_general_category_withtable (c, + SUBSEQUENT_IDENTIFIER_MASK + | UC_CATEGORY_MASK_Zs)) + { + if (!display_character (c, port, strategy)) + scm_encoding_error ("print_r7rs_extended_symbol", errno, + "cannot convert to output locale", + port, SCM_MAKE_CHAR (c)); + } + else + { + scm_lfwrite ("\\x", 2, port); + scm_intprint (c, 16, port); + scm_putc (';', port); + } + break; + } + } + + scm_putc ('|', port); +} + +/* FIXME: allow R6RS hex escapes instead of #{...}# or |...|. */ void scm_i_print_symbol_name (SCM sym, SCM port) { - if (symbol_has_extended_read_syntax (sym)) - print_extended_symbol (sym, port); - else + if (!symbol_has_extended_read_syntax (sym)) print_normal_symbol (sym, port); + else if (SCM_PRINT_R7RS_SYMBOLS_P) + print_r7rs_extended_symbol (sym, port); + else + print_extended_symbol (sym, port); } void diff --git a/libguile/private-options.h b/libguile/private-options.h index 1a4ad0fb4..a3a0c2b94 100644 --- a/libguile/private-options.h +++ b/libguile/private-options.h @@ -52,7 +52,8 @@ SCM_INTERNAL scm_t_option scm_print_opts[]; #define SCM_PRINT_KEYWORD_STYLE_I 2 #define SCM_PRINT_KEYWORD_STYLE (SCM_PACK (scm_print_opts[2].val)) #define SCM_PRINT_ESCAPE_NEWLINES_P scm_print_opts[3].val -#define SCM_N_PRINT_OPTIONS 4 +#define SCM_PRINT_R7RS_SYMBOLS_P scm_print_opts[4].val +#define SCM_N_PRINT_OPTIONS 5 /* diff --git a/test-suite/tests/print.test b/test-suite/tests/print.test index e60a40f7d..a33776c91 100644 --- a/test-suite/tests/print.test +++ b/test-suite/tests/print.test @@ -1,6 +1,6 @@ ;;;; -*- coding: utf-8; mode: scheme; -*- ;;;; -;;;; Copyright (C) 2010 Free Software Foundation, Inc. +;;;; Copyright (C) 2010, 2014 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -29,6 +29,60 @@ (lambda () (pretty-print 'exp))))))) +(define (with-print-options opts thunk) + (let ((saved-options (print-options))) + (dynamic-wind + (lambda () + (print-options opts)) + thunk + (lambda () + (print-options saved-options))))) + +(define-syntax-rule (write-with-options opts x) + (with-print-options opts (lambda () + (with-output-to-string + (lambda () + (write x)))))) + + +(with-test-prefix "write" + + (with-test-prefix "r7rs-symbols" + + (pass-if-equal "basic" + "|foo bar|" + (write-with-options '(r7rs-symbols) + (string->symbol "foo bar"))) + + (pass-if-equal "escapes" + "|bar \\| backslash \\x5c; alarm \\a backspace \\b tab \\t newline \\n cr \\r null \\x0; del \\x7f;|" + (write-with-options + '(r7rs-symbols) + (string->symbol + (string-append + "bar | backslash \\ alarm \a backspace \b tab \t newline \n cr \r null \0 del " + (string #\del))))) + + (pass-if-equal "starts with bar" + "|\\|foo|" + (write-with-options '(r7rs-symbols) + (string->symbol "|foo"))) + + (pass-if-equal "ends with bar" + "|foo\\||" + (write-with-options '(r7rs-symbols) + (string->symbol "foo|"))) + + (pass-if-equal "starts with backslash" + "|\\x5c;foo|" + (write-with-options '(r7rs-symbols) + (string->symbol "\\foo"))) + + (pass-if-equal "ends with backslash" + "|foo\\x5c;|" + (write-with-options '(r7rs-symbols) + (string->symbol "foo\\"))))) + (with-test-prefix "pretty-print"