From 58996e37bba53ae91e6ecff56aa2bb155047bc1e Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Wed, 8 Feb 2012 03:14:17 -0500 Subject: [PATCH 1/9] Remove incorrect comment in read.c * libguile/read.c (scm_read_sharp): Remove incorrect comment that claims that scm_read_boolean might return a SRFI-4 vector. --- libguile/read.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libguile/read.c b/libguile/read.c index 616672418..cf5d8177a 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -1,5 +1,5 @@ -/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software - * Foundation, Inc. +/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006, + * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -1332,7 +1332,6 @@ scm_read_sharp (scm_t_wchar chr, SCM port) case 't': case 'T': case 'F': - /* This one may return either a boolean or an SRFI-4 vector. */ return (scm_read_boolean (chr, port)); case ':': return (scm_read_keyword (chr, port)); From cfd15439b2d2b7a9410e379dc60c21e9010eccfc Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Wed, 8 Feb 2012 03:00:15 -0500 Subject: [PATCH 2/9] Remove inline and register attributes from read.c * libguile/read.c: Remove all 'inline' and 'register' attributes. --- libguile/read.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/libguile/read.c b/libguile/read.c index cf5d8177a..d026f0565 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -142,13 +142,13 @@ SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0, characters to procedures. */ static SCM *scm_i_read_hash_procedures; -static inline SCM +static SCM scm_i_read_hash_procedures_ref (void) { return scm_fluid_ref (*scm_i_read_hash_procedures); } -static inline void +static void scm_i_read_hash_procedures_set_x (SCM value) { scm_fluid_set_x (*scm_i_read_hash_procedures, value); @@ -197,7 +197,7 @@ scm_i_read_hash_procedures_set_x (SCM value) || ((_chr) == 'd') || ((_chr) == 'l')) /* Read an SCSH block comment. */ -static inline SCM scm_read_scsh_block_comment (scm_t_wchar, SCM); +static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM); static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM); static SCM scm_read_commented_expression (scm_t_wchar, SCM); static SCM scm_read_shebang (scm_t_wchar, SCM); @@ -207,7 +207,7 @@ static SCM scm_get_hash_procedure (int); result in the pre-allocated buffer BUF. Return zero if the whole token has fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of bytes actually read. */ -static inline int +static int read_token (SCM port, char *buf, const size_t buf_size, size_t *read) { *read = 0; @@ -286,7 +286,7 @@ read_complete_token (SCM port, char *buffer, const size_t buffer_size, static int flush_ws (SCM port, const char *eoferr) { - register scm_t_wchar c; + scm_t_wchar c; while (1) switch (c = scm_getc (port)) { @@ -836,7 +836,7 @@ scm_read_syntax (int chr, SCM port) return p; } -static inline SCM +static SCM scm_read_nil (int chr, SCM port) { SCM id = scm_read_mixed_case_symbol (chr, port); @@ -849,7 +849,7 @@ scm_read_nil (int chr, SCM port) return SCM_ELISP_NIL; } -static inline SCM +static SCM scm_read_semicolon_comment (int chr, SCM port) { int c; @@ -990,7 +990,7 @@ scm_read_character (scm_t_wchar chr, SCM port) } #undef FUNC_NAME -static inline SCM +static SCM scm_read_keyword (int chr, SCM port) { SCM symbol; @@ -1009,7 +1009,7 @@ scm_read_keyword (int chr, SCM port) return (scm_symbol_to_keyword (symbol)); } -static inline SCM +static SCM scm_read_vector (int chr, SCM port) { /* Note: We call `scm_read_sexp ()' rather than READER here in order to @@ -1019,7 +1019,7 @@ scm_read_vector (int chr, SCM port) return (scm_vector (scm_read_sexp (chr, port))); } -static inline SCM +static SCM scm_read_srfi4_vector (int chr, SCM port) { return scm_i_read_array (port, chr); @@ -1069,7 +1069,7 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port) return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)); } -static inline SCM +static SCM scm_read_scsh_block_comment (scm_t_wchar chr, SCM port) { int bang_seen = 0; @@ -1414,7 +1414,7 @@ scm_read_expression (SCM port) { while (1) { - register scm_t_wchar chr; + scm_t_wchar chr; chr = scm_getc (port); From d6cb0203cb58ea352b4e9de5eea4325e379c175c Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Wed, 8 Feb 2012 03:10:11 -0500 Subject: [PATCH 3/9] Add and use maybe_annotate_source helper in read.c * libguile/read.c (maybe_annotate_source): New static helper function. (scm_read_sexp, scm_read_quote, scm_read_syntax): Use 'maybe_annotate_source'. --- libguile/read.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/libguile/read.c b/libguile/read.c index d026f0565..4cdde4ab2 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -359,6 +359,14 @@ static SCM scm_read_expression (SCM port); static SCM scm_read_sharp (int chr, SCM port); +static SCM +maybe_annotate_source (SCM x, SCM port, long line, int column) +{ + if (SCM_RECORD_POSITIONS_P) + scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port)); + return x; +} + static SCM scm_read_sexp (scm_t_wchar chr, SCM port) #define FUNC_NAME "scm_i_lreadparen" @@ -423,10 +431,7 @@ scm_read_sexp (scm_t_wchar chr, SCM port) } exit: - if (SCM_RECORD_POSITIONS_P) - scm_i_set_source_properties_x (ans, line, column, SCM_FILENAME (port)); - - return ans; + return maybe_annotate_source (ans, port, line, column); } #undef FUNC_NAME @@ -780,10 +785,7 @@ scm_read_quote (int chr, SCM port) } p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); - if (SCM_RECORD_POSITIONS_P) - scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port)); - - return p; + return maybe_annotate_source (p, port, line, column); } SCM_SYMBOL (sym_syntax, "syntax"); @@ -830,10 +832,7 @@ scm_read_syntax (int chr, SCM port) } p = scm_cons2 (p, scm_read_expression (port), SCM_EOL); - if (SCM_RECORD_POSITIONS_P) - scm_i_set_source_properties_x (p, line, column, SCM_FILENAME (port)); - - return p; + return maybe_annotate_source (p, port, line, column); } static SCM From d5b75b6c803e746e6ec019951716bf4ff2ebc84b Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Wed, 8 Feb 2012 15:29:10 -0500 Subject: [PATCH 4/9] Optimize empty substring case of scm_i_substring_copy * libguile/strings.c (scm_i_substring_copy): When asked to create an empty substring, use 'scm_i_make_string' to make use of its optimization for empty strings that reuses the global null_stringbuf. --- libguile/strings.c | 49 +++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/libguile/strings.c b/libguile/strings.c index a5960bc4e..69632d697 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -372,31 +372,36 @@ scm_i_substring_read_only (SCM str, size_t start, size_t end) SCM scm_i_substring_copy (SCM str, size_t start, size_t end) { - size_t len = end - start; - SCM buf, my_buf, substr; - size_t str_start; - int wide = 0; - get_str_buf_start (&str, &buf, &str_start); - if (scm_i_is_narrow_string (str)) - { - my_buf = make_stringbuf (len); - memcpy (STRINGBUF_CHARS (my_buf), - STRINGBUF_CHARS (buf) + str_start + start, len); - } + if (start == end) + return scm_i_make_string (0, NULL, 0); else { - my_buf = make_wide_stringbuf (len); - u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf), - (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start - + start), len); - wide = 1; + size_t len = end - start; + SCM buf, my_buf, substr; + size_t str_start; + int wide = 0; + get_str_buf_start (&str, &buf, &str_start); + if (scm_i_is_narrow_string (str)) + { + my_buf = make_stringbuf (len); + memcpy (STRINGBUF_CHARS (my_buf), + STRINGBUF_CHARS (buf) + str_start + start, len); + } + else + { + my_buf = make_wide_stringbuf (len); + u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf), + (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start + + start), len); + wide = 1; + } + scm_remember_upto_here_1 (buf); + substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf), + (scm_t_bits) 0, (scm_t_bits) len); + if (wide) + scm_i_try_narrow_string (substr); + return substr; } - scm_remember_upto_here_1 (buf); - substr = scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf), - (scm_t_bits) 0, (scm_t_bits) len); - if (wide) - scm_i_try_narrow_string (substr); - return substr; } SCM From 043850d984c184a1e642a60a38723e63bf3be73a Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Wed, 8 Feb 2012 15:32:55 -0500 Subject: [PATCH 5/9] Unoptimize 'read' to return freshly allocated empty strings * libguile/read.c (scm_read_string): Return a freshly allocated string every time, even for empty strings. The motivation is to allow source properties to be added to all strings. Previously, the shared global 'scm_nullstr' was returned for empty strings. Note that empty strings still share a common global 'null_stringbuf'. * test-suite/tests/srfi-13.test (substring/shared): Fix tests to reflect the fact that empty string literals are no longer guaranteed to be 'eq?' to each other. --- libguile/read.c | 8 +------- test-suite/tests/srfi-13.test | 14 ++++++++------ 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/libguile/read.c b/libguile/read.c index 4cdde4ab2..dcd37e1d9 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -580,13 +580,7 @@ scm_read_string (int chr, SCM port) scm_i_string_set_x (str, c_str_len++, c); scm_i_string_stop_writing (); } - - if (c_str_len > 0) - { - return scm_i_substring_copy (str, 0, c_str_len); - } - - return scm_nullstr; + return scm_i_substring_copy (str, 0, c_str_len); } #undef FUNC_NAME diff --git a/test-suite/tests/srfi-13.test b/test-suite/tests/srfi-13.test index 5575a700f..de6df8e52 100644 --- a/test-suite/tests/srfi-13.test +++ b/test-suite/tests/srfi-13.test @@ -1,7 +1,7 @@ ;;;; srfi-13.test --- Test suite for Guile's SRFI-13 functions. -*- scheme -*- ;;;; Martin Grabmueller, 2001-05-07 ;;;; -;;;; Copyright (C) 2001, 2004, 2005, 2006, 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 2001, 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -561,13 +561,15 @@ (with-test-prefix "substring/shared" (pass-if "empty string" - (eq? "" (substring/shared "" 0))) - - (pass-if "non-empty string" - (string=? "foo" (substring/shared "foo-bar" 0 3))) + (let ((s "")) + (eq? s (substring/shared s 0)))) (pass-if "non-empty string, not eq?" - (string=? "foo-bar" (substring/shared "foo-bar" 0 7)))) + (string=? "foo" (substring/shared "foo-bar" 0 3))) + + (pass-if "shared copy of non-empty string is eq?" + (let ((s "foo-bar")) + (eq? s (substring/shared s 0 7))))) (with-test-prefix "string-copy!" From b131b233ff9530546ca7afbb4daa682b65015e8b Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Wed, 8 Feb 2012 15:51:38 -0500 Subject: [PATCH 6/9] Add source properties to many more types of data * libguile/read.c (scm_read_array): New internal helper that calls scm_i_read_array and sets its source property if the 'positions' reader option is set. (scm_read_string): Set source properties on strings if the 'positions' reader option is set. (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the 'line' and 'column' of the first character of the datum being read. Set source properties if the 'positions' reader option is set. (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp. * doc/ref/api-debug.texi (Source Properties): Update manual. --- doc/ref/api-debug.texi | 14 +++++----- libguile/read.c | 59 ++++++++++++++++++++++++++++-------------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/doc/ref/api-debug.texi b/doc/ref/api-debug.texi index cf9ea5aca..c5fbe5629 100644 --- a/doc/ref/api-debug.texi +++ b/doc/ref/api-debug.texi @@ -238,11 +238,11 @@ that, if an error occurs when evaluating the transformed expression, Guile's debugger can point back to the file and location where the expression originated. -The way that source properties are stored means that Guile can only -associate source properties with parenthesized expressions, and not, for -example, with individual symbols, numbers or strings. The difference -can be seen by typing @code{(xxx)} and @code{xxx} at the Guile prompt -(where the variable @code{xxx} has not been defined): +The way that source properties are stored means that Guile cannot +associate source properties with individual numbers, symbols, +characters, booleans, or keywords. This can be seen by typing +@code{(xxx)} and @code{xxx} at the Guile prompt (where the variable +@code{xxx} has not been defined): @example scheme@@(guile-user)> (xxx) @@ -288,8 +288,8 @@ Return the property specified by @var{key} from @var{obj}'s source properties. @end deffn -If the @code{positions} reader option is enabled, each parenthesized -expression will have values set for the @code{filename}, @code{line} and +If the @code{positions} reader option is enabled, supported expressions +will have values set for the @code{filename}, @code{line} and @code{column} properties. Source properties are also associated with syntax objects. Procedural diff --git a/libguile/read.c b/libguile/read.c index dcd37e1d9..4b1975008 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -356,7 +356,7 @@ flush_ws (SCM port, const char *eoferr) /* Token readers. */ static SCM scm_read_expression (SCM port); -static SCM scm_read_sharp (int chr, SCM port); +static SCM scm_read_sharp (int chr, SCM port, long line, int column); static SCM @@ -497,6 +497,10 @@ scm_read_string (int chr, SCM port) unsigned c_str_len = 0; scm_t_wchar c; + /* Need to capture line and column numbers here. */ + long line = SCM_LINUM (port); + int column = SCM_COL (port) - 1; + str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0); while ('"' != (c = scm_getc (port))) { @@ -580,7 +584,8 @@ scm_read_string (int chr, SCM port) scm_i_string_set_x (str, c_str_len++, c); scm_i_string_stop_writing (); } - return scm_i_substring_copy (str, 0, c_str_len); + return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len), + port, line, column); } #undef FUNC_NAME @@ -1003,23 +1008,34 @@ scm_read_keyword (int chr, SCM port) } static SCM -scm_read_vector (int chr, SCM port) +scm_read_vector (int chr, SCM port, long line, int column) { /* Note: We call `scm_read_sexp ()' rather than READER here in order to guarantee that it's going to do what we want. After all, this is an implementation detail of `scm_read_vector ()', not a desirable property. */ - return (scm_vector (scm_read_sexp (chr, port))); + return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port)), + port, line, column); } static SCM -scm_read_srfi4_vector (int chr, SCM port) +scm_read_array (int chr, SCM port, long line, int column) { - return scm_i_read_array (port, chr); + SCM result = scm_i_read_array (port, chr); + if (scm_is_false (result)) + return result; + else + return maybe_annotate_source (result, port, line, column); } static SCM -scm_read_bytevector (scm_t_wchar chr, SCM port) +scm_read_srfi4_vector (int chr, SCM port, long line, int column) +{ + return scm_read_array (chr, port, line, column); +} + +static SCM +scm_read_bytevector (scm_t_wchar chr, SCM port, long line, int column) { chr = scm_getc (port); if (chr != 'u') @@ -1033,7 +1049,9 @@ scm_read_bytevector (scm_t_wchar chr, SCM port) if (chr != '(') goto syntax; - return scm_u8_list_to_bytevector (scm_read_sexp (chr, port)); + return maybe_annotate_source + (scm_u8_list_to_bytevector (scm_read_sexp (chr, port)), + port, line, column); syntax: scm_i_input_error ("read_bytevector", port, @@ -1043,7 +1061,7 @@ scm_read_bytevector (scm_t_wchar chr, SCM port) } static SCM -scm_read_guile_bit_vector (scm_t_wchar chr, SCM port) +scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, long line, int column) { /* Read the `#*10101'-style read syntax for bit vectors in Guile. This is terribly inefficient but who cares? */ @@ -1059,7 +1077,9 @@ scm_read_guile_bit_vector (scm_t_wchar chr, SCM port) if (chr != EOF) scm_ungetc (chr, port); - return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)); + return maybe_annotate_source + (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)), + port, line, column); } static SCM @@ -1295,7 +1315,7 @@ scm_read_sharp_extension (int chr, SCM port) /* The reader for the sharp `#' character. It basically dispatches reads among the above token readers. */ static SCM -scm_read_sharp (scm_t_wchar chr, SCM port) +scm_read_sharp (scm_t_wchar chr, SCM port, long line, int column) #define FUNC_NAME "scm_lreadr" { SCM result; @@ -1311,17 +1331,17 @@ scm_read_sharp (scm_t_wchar chr, SCM port) case '\\': return (scm_read_character (chr, port)); case '(': - return (scm_read_vector (chr, port)); + return (scm_read_vector (chr, port, line, column)); case 's': case 'u': case 'f': case 'c': /* This one may return either a boolean or an SRFI-4 vector. */ - return (scm_read_srfi4_vector (chr, port)); + return (scm_read_srfi4_vector (chr, port, line, column)); case 'v': - return (scm_read_bytevector (chr, port)); + return (scm_read_bytevector (chr, port, line, column)); case '*': - return (scm_read_guile_bit_vector (chr, port)); + return (scm_read_guile_bit_vector (chr, port, line, column)); case 't': case 'T': case 'F': @@ -1338,7 +1358,7 @@ scm_read_sharp (scm_t_wchar chr, SCM port) case 'h': case 'l': #endif - return (scm_i_read_array (port, chr)); + return (scm_read_array (chr, port, line, column)); case 'i': case 'e': @@ -1350,7 +1370,7 @@ scm_read_sharp (scm_t_wchar chr, SCM port) if (next_c != EOF) scm_ungetc (next_c, port); if (next_c == '(') - return scm_i_read_array (port, chr); + return scm_read_array (chr, port, line, column); /* Fall through. */ } #endif @@ -1433,8 +1453,9 @@ scm_read_expression (SCM port) return (scm_read_quote (chr, port)); case '#': { - SCM result; - result = scm_read_sharp (chr, port); + long line = SCM_LINUM (port); + int column = SCM_COL (port) - 1; + SCM result = scm_read_sharp (chr, port, line, column); if (scm_is_eq (result, SCM_UNSPECIFIED)) /* We read a comment or some such. */ break; From e7cf0457d7c71acd2c597d1644328960f136e4bc Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Tue, 7 Feb 2012 19:40:29 -0500 Subject: [PATCH 7/9] Support => within case, and improve error messages for cond and case * module/ice-9/boot-9.scm (cond, case): Reimplement using syntax-case, with improved error messages and support for '=>' within 'case' as mandated by the R7RS. Add warnings for duplicate case datums and case datums that cannot be meaningfully compared using 'eqv?'. * module/system/base/message.scm (%warning-types): Add 'bad-case-datum' and 'duplicate-case-datum' warning types. * test-suite/tests/syntax.test (cond, case): Update tests to reflect improved error reporting. Add tests for '=>' within 'case'. * test-suite/tests/tree-il.test (partial evaluation): Update tests to reflect changes in how 'case' is expanded. * doc/ref/api-control.texi (Conditionals): Document '=>' within 'case'. --- doc/ref/api-control.texi | 19 +++- module/ice-9/boot-9.scm | 192 +++++++++++++++++++++++---------- module/system/base/message.scm | 14 +++ test-suite/tests/syntax.test | 77 ++++++++++--- test-suite/tests/tree-il.test | 16 ++- 5 files changed, 234 insertions(+), 84 deletions(-) diff --git a/doc/ref/api-control.texi b/doc/ref/api-control.texi index fc5935070..ca7ad4af6 100644 --- a/doc/ref/api-control.texi +++ b/doc/ref/api-control.texi @@ -212,18 +212,30 @@ result of the @code{cond}-expression. @end deffn @deffn syntax case key clause1 clause2 @dots{} -@var{key} may be any expression, the @var{clause}s must have the form +@var{key} may be any expression, and the @var{clause}s must have the form @lisp ((@var{datum1} @dots{}) @var{expr1} @var{expr2} @dots{}) @end lisp +or + +@lisp +((@var{datum1} @dots{}) => @var{expression}) +@end lisp + and the last @var{clause} may have the form @lisp (else @var{expr1} @var{expr2} @dots{}) @end lisp +or + +@lisp +(else => @var{expression}) +@end lisp + All @var{datum}s must be distinct. First, @var{key} is evaluated. The result of this evaluation is compared against all @var{datum} values using @code{eqv?}. When this comparison succeeds, the expression(s) following @@ -234,6 +246,11 @@ If the @var{key} matches no @var{datum} and there is an @code{else}-clause, the expressions following the @code{else} are evaluated. If there is no such clause, the result of the expression is unspecified. + +For the @code{=>} clause types, @var{expression} is evaluated and the +resulting procedure is applied to the value of @var{key}. The result of +this procedure application is then the result of the +@code{case}-expression. @end deffn diff --git a/module/ice-9/boot-9.scm b/module/ice-9/boot-9.scm index d1bbd95ff..41ce92483 100644 --- a/module/ice-9/boot-9.scm +++ b/module/ice-9/boot-9.scm @@ -411,70 +411,150 @@ If there is no handler at all, Guile prints an error and then exits." ((_ x) x) ((_ x y ...) (let ((t x)) (if t t (or y ...)))))) +(include-from-path "ice-9/quasisyntax") + (define-syntax-rule (when test stmt stmt* ...) (if test (begin stmt stmt* ...))) (define-syntax-rule (unless test stmt stmt* ...) (if (not test) (begin stmt stmt* ...))) -;; The "maybe-more" bits are something of a hack, so that we can support -;; SRFI-61. Rewrites into a standalone syntax-case macro would be -;; appreciated. (define-syntax cond - (syntax-rules (=> else) - ((_ "maybe-more" test consequent) - (if test consequent)) - - ((_ "maybe-more" test consequent clause ...) - (if test consequent (cond clause ...))) - - ((_ (else else1 else2 ...)) - (begin else1 else2 ...)) - - ((_ (test => receiver) more-clause ...) - (let ((t test)) - (cond "maybe-more" t (receiver t) more-clause ...))) - - ((_ (generator guard => receiver) more-clause ...) - (call-with-values (lambda () generator) - (lambda t - (cond "maybe-more" - (apply guard t) (apply receiver t) more-clause ...)))) - - ((_ (test => receiver ...) more-clause ...) - (syntax-violation 'cond "wrong number of receiver expressions" - '(test => receiver ...))) - ((_ (generator guard => receiver ...) more-clause ...) - (syntax-violation 'cond "wrong number of receiver expressions" - '(generator guard => receiver ...))) - - ((_ (test) more-clause ...) - (let ((t test)) - (cond "maybe-more" t t more-clause ...))) - - ((_ (test body1 body2 ...) more-clause ...) - (cond "maybe-more" - test (begin body1 body2 ...) more-clause ...)))) + (lambda (whole-expr) + (define (fold f seed xs) + (let loop ((xs xs) (seed seed)) + (if (null? xs) seed + (loop (cdr xs) (f (car xs) seed))))) + (define (reverse-map f xs) + (fold (lambda (x seed) (cons (f x) seed)) + '() xs)) + (syntax-case whole-expr () + ((_ clause clauses ...) + #`(begin + #,@(fold (lambda (clause-builder tail) + (clause-builder tail)) + #'() + (reverse-map + (lambda (clause) + (define* (bad-clause #:optional (msg "invalid clause")) + (syntax-violation 'cond msg whole-expr clause)) + (syntax-case clause (=> else) + ((else e e* ...) + (lambda (tail) + (if (null? tail) + #'((begin e e* ...)) + (bad-clause "else must be the last clause")))) + ((else . _) (bad-clause)) + ((test => receiver) + (lambda (tail) + #`((let ((t test)) + (if t + (receiver t) + #,@tail))))) + ((test => receiver ...) + (bad-clause "wrong number of receiver expressions")) + ((generator guard => receiver) + (lambda (tail) + #`((call-with-values (lambda () generator) + (lambda vals + (if (apply guard vals) + (apply receiver vals) + #,@tail)))))) + ((generator guard => receiver ...) + (bad-clause "wrong number of receiver expressions")) + ((test) + (lambda (tail) + #`((let ((t test)) + (if t t #,@tail))))) + ((test e e* ...) + (lambda (tail) + #`((if test + (begin e e* ...) + #,@tail)))) + (_ (bad-clause)))) + #'(clause clauses ...)))))))) (define-syntax case - (syntax-rules (else) - ((case (key ...) - clauses ...) - (let ((atom-key (key ...))) - (case atom-key clauses ...))) - ((case key - (else result1 result2 ...)) - (begin result1 result2 ...)) - ((case key - ((atoms ...) result1 result2 ...)) - (if (memv key '(atoms ...)) - (begin result1 result2 ...))) - ((case key - ((atoms ...) result1 result2 ...) - clause clauses ...) - (if (memv key '(atoms ...)) - (begin result1 result2 ...) - (case key clause clauses ...))))) + (lambda (whole-expr) + (define (fold f seed xs) + (let loop ((xs xs) (seed seed)) + (if (null? xs) seed + (loop (cdr xs) (f (car xs) seed))))) + (define (fold2 f a b xs) + (let loop ((xs xs) (a a) (b b)) + (if (null? xs) (values a b) + (call-with-values + (lambda () (f (car xs) a b)) + (lambda (a b) + (loop (cdr xs) a b)))))) + (define (reverse-map-with-seed f seed xs) + (fold2 (lambda (x ys seed) + (call-with-values + (lambda () (f x seed)) + (lambda (y seed) + (values (cons y ys) seed)))) + '() seed xs)) + (syntax-case whole-expr () + ((_ expr clause clauses ...) + (with-syntax ((key #'key)) + #`(let ((key expr)) + #,@(fold + (lambda (clause-builder tail) + (clause-builder tail)) + #'() + (reverse-map-with-seed + (lambda (clause seen) + (define* (bad-clause #:optional (msg "invalid clause")) + (syntax-violation 'case msg whole-expr clause)) + (syntax-case clause () + ((test . rest) + (with-syntax + ((clause-expr + (syntax-case #'rest (=>) + ((=> receiver) #'(receiver key)) + ((=> receiver ...) + (bad-clause + "wrong number of receiver expressions")) + ((e e* ...) #'(begin e e* ...)) + (_ (bad-clause))))) + (syntax-case #'test (else) + ((datums ...) + (let ((seen + (fold + (lambda (datum seen) + (define (warn-datum type) + ((@ (system base message) + warning) + type + (append (source-properties datum) + (source-properties + (syntax->datum #'test))) + datum + (syntax->datum clause) + (syntax->datum whole-expr))) + (if (memv datum seen) + (warn-datum 'duplicate-case-datum)) + (if (or (pair? datum) + (array? datum) + (generalized-vector? datum)) + (warn-datum 'bad-case-datum)) + (cons datum seen)) + seen + (map syntax->datum #'(datums ...))))) + (values (lambda (tail) + #`((if (memv key '(datums ...)) + clause-expr + #,@tail))) + seen))) + (else (values (lambda (tail) + (if (null? tail) + #'(clause-expr) + (bad-clause + "else must be the last clause"))) + seen)) + (_ (bad-clause))))) + (_ (bad-clause)))) + '() #'(clause clauses ...))))))))) (define-syntax do (syntax-rules () @@ -502,8 +582,6 @@ If there is no handler at all, Guile prints an error and then exits." (define-syntax-rule (delay exp) (make-promise (lambda () exp))) -(include-from-path "ice-9/quasisyntax") - (define-syntax current-source-location (lambda (x) (syntax-case x () diff --git a/module/system/base/message.scm b/module/system/base/message.scm index 8cf285afd..9accf712a 100644 --- a/module/system/base/message.scm +++ b/module/system/base/message.scm @@ -126,6 +126,20 @@ "~A: warning: possibly wrong number of arguments to `~A'~%" loc name)))) + (duplicate-case-datum + "report a duplicate datum in a case expression" + ,(lambda (port loc datum clause case-expr) + (emit port + "~A: warning: duplicate datum ~S in clause ~S of case expression ~S~%" + loc datum clause case-expr))) + + (bad-case-datum + "report a case datum that cannot be meaningfully compared using `eqv?'" + ,(lambda (port loc datum clause case-expr) + (emit port + "~A: warning: datum ~S cannot be meaningfully compared using `eqv?' in clause ~S of case expression ~S~%" + loc datum clause case-expr))) + (format "report wrong number of arguments to `format'" ,(lambda (port loc . rest) diff --git a/test-suite/tests/syntax.test b/test-suite/tests/syntax.test index fcc0349ba..cdaee716b 100644 --- a/test-suite/tests/syntax.test +++ b/test-suite/tests/syntax.test @@ -648,11 +648,13 @@ (pass-if-syntax-error "missing recipient" '(cond . "wrong number of receiver expressions") - (cond (#t identity =>))) + (eval '(cond (#t identity =>)) + (interaction-environment))) (pass-if-syntax-error "extra recipient" '(cond . "wrong number of receiver expressions") - (cond (#t identity => identity identity)))) + (eval '(cond (#t identity => identity identity)) + (interaction-environment)))) (with-test-prefix "bad or missing clauses" @@ -662,43 +664,48 @@ (interaction-environment))) (pass-if-syntax-error "(cond #t)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond #t) (interaction-environment))) (pass-if-syntax-error "(cond 1)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond 1) (interaction-environment))) (pass-if-syntax-error "(cond 1 2)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond 1 2) (interaction-environment))) (pass-if-syntax-error "(cond 1 2 3)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond 1 2 3) (interaction-environment))) (pass-if-syntax-error "(cond 1 2 3 4)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond 1 2 3 4) (interaction-environment))) (pass-if-syntax-error "(cond ())" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond ()) (interaction-environment))) (pass-if-syntax-error "(cond () 1)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond () 1) (interaction-environment))) (pass-if-syntax-error "(cond (1) 1)" - exception:generic-syncase-error + '(cond . "invalid clause") (eval '(cond (1) 1) + (interaction-environment))) + + (pass-if-syntax-error "(cond (else #f) (#t #t))" + '(cond . "else must be the last clause") + (eval '(cond (else #f) (#t #t)) (interaction-environment)))) (with-test-prefix "wrong number of arguments" @@ -712,10 +719,46 @@ (pass-if "clause with empty labels list" (case 1 (() #f) (else #t))) + (with-test-prefix "case handles '=> correctly" + + (pass-if "(1 2 3) => list" + (equal? (case 1 ((1 2 3) => list)) + '(1))) + + (pass-if "else => list" + (equal? (case 6 + ((1 2 3) 'wrong) + (else => list)) + '(6))) + + (with-test-prefix "bound '=> is handled correctly" + + (pass-if "(1) => 'ok" + (let ((=> 'foo)) + (eq? (case 1 ((1) => 'ok)) 'ok))) + + (pass-if "else =>" + (let ((=> 'foo)) + (eq? (case 1 (else =>)) 'foo))) + + (pass-if "else => list" + (let ((=> 'foo)) + (eq? (case 1 (else => identity)) identity)))) + + (pass-if-syntax-error "missing recipient" + '(case . "wrong number of receiver expressions") + (eval '(case 1 ((1) =>)) + (interaction-environment))) + + (pass-if-syntax-error "extra recipient" + '(case . "wrong number of receiver expressions") + (eval '(case 1 ((1) => identity identity)) + (interaction-environment)))) + (with-test-prefix "case is hygienic" (pass-if-syntax-error "bound 'else is handled correctly" - exception:generic-syncase-error + '(case . "invalid clause") (eval '(let ((else #f)) (case 1 (else #f))) (interaction-environment)))) @@ -742,22 +785,22 @@ (interaction-environment))) (pass-if-syntax-error "(case 1 \"foo\")" - exception:generic-syncase-error + '(case . "invalid clause") (eval '(case 1 "foo") (interaction-environment))) (pass-if-syntax-error "(case 1 ())" - exception:generic-syncase-error + '(case . "invalid clause") (eval '(case 1 ()) (interaction-environment))) (pass-if-syntax-error "(case 1 (\"foo\"))" - exception:generic-syncase-error + '(case . "invalid clause") (eval '(case 1 ("foo")) (interaction-environment))) (pass-if-syntax-error "(case 1 (\"foo\" \"bar\"))" - exception:generic-syncase-error + '(case . "invalid clause") (eval '(case 1 ("foo" "bar")) (interaction-environment))) @@ -767,7 +810,7 @@ (interaction-environment))) (pass-if-syntax-error "(case 1 ((2) \"bar\") (else))" - exception:generic-syncase-error + '(case . "invalid clause") (eval '(case 1 ((2) "bar") (else)) (interaction-environment))) @@ -777,7 +820,7 @@ (interaction-environment))) (pass-if-syntax-error "(case 1 (else #f) ((1) #t))" - exception:generic-syncase-error + '(case . "else must be the last clause") (eval '(case 1 (else #f) ((1) #t)) (interaction-environment))))) diff --git a/test-suite/tests/tree-il.test b/test-suite/tests/tree-il.test index 8e294a748..68827a870 100644 --- a/test-suite/tests/tree-il.test +++ b/test-suite/tests/tree-il.test @@ -1156,14 +1156,14 @@ (case foo ((3 2 1) 'a) (else 'b)) - (if (let (t) (_) ((toplevel foo)) - (if (apply (primitive eqv?) (lexical t _) (const 3)) + (let (key) (_) ((toplevel foo)) + (if (if (apply (primitive eqv?) (lexical key _) (const 3)) (const #t) - (if (apply (primitive eqv?) (lexical t _) (const 2)) + (if (apply (primitive eqv?) (lexical key _) (const 2)) (const #t) - (apply (primitive eqv?) (lexical t _) (const 1))))) - (const a) - (const b))) + (apply (primitive eqv?) (lexical key _) (const 1)))) + (const a) + (const b)))) (pass-if-peval ;; Memv with non-constant key, empty list, test context. Currently @@ -1171,9 +1171,7 @@ (case foo (() 'a) (else 'b)) - (if (begin (toplevel foo) (const #f)) - (const a) - (const b))) + (begin (toplevel foo) (const b))) ;; ;; Below are cases where constant propagation should bail out. From e3d4597469a543d97c4997b128509c2ceb13ca2b Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 9 Feb 2012 23:14:11 +0100 Subject: [PATCH 8/9] more efficient scm_to_utf8_stringn, scm_to_utf32_stringn * libguile/strings.c (scm_to_utf8_stringn): More efficient implementation than calling scm_to_stringn. (scm_to_utf32_stringn): Likewise. --- libguile/strings.c | 56 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/libguile/strings.c b/libguile/strings.c index 69632d697..71eee6c48 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1844,10 +1844,47 @@ scm_to_utf8_string (SCM str) return scm_to_utf8_stringn (str, NULL); } +static size_t +latin1_u8_strlen (const scm_t_uint8 *str, size_t len) +{ + size_t ret, i; + for (i = 0, ret = 0; i < len; i++) + ret += (str[i] < 128) ? 1 : 2; + return ret; +} + +static scm_t_uint8* +latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len, + scm_t_uint8 *u8_result, size_t *u8_lenp) +{ + size_t i, n; + size_t u8_len = latin1_u8_strlen (str, latin_len); + + if (!(u8_result && u8_lenp && *u8_lenp > u8_len)) + u8_result = scm_malloc (u8_len + 1); + if (u8_lenp) + *u8_lenp = u8_len; + + for (i = 0, n = 0; i < latin_len; i++) + n += u8_uctomb (u8_result + n, str[i], u8_len - n); + if (n != u8_len) + abort (); + u8_result[n] = 0; + + return u8_result; +} + char * scm_to_utf8_stringn (SCM str, size_t *lenp) { - return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR); + if (scm_i_is_narrow_string (str)) + return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str), + scm_i_string_length (str), + NULL, lenp); + else + return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str), + scm_i_string_length (str), + NULL, lenp); } scm_t_wchar * @@ -1865,9 +1902,20 @@ scm_to_utf32_stringn (SCM str, size_t *lenp) SCM_VALIDATE_STRING (1, str); if (scm_i_is_narrow_string (str)) - result = (scm_t_wchar *) - scm_to_stringn (str, lenp, "UTF-32", - SCM_FAILED_CONVERSION_ERROR); + { + scm_t_uint8 *codepoints; + size_t i, len; + + codepoints = (scm_t_uint8*) scm_i_string_chars (str); + len = scm_i_string_length (str); + if (lenp) + *lenp = len; + + result = scm_malloc ((len + 1) * sizeof (scm_t_wchar)); + for (i = 0; i < len; i++) + result[i] = codepoints[i]; + result[len] = 0; + } else { size_t len; From c2c3bddb1d0b2180282d78262e84c3ae7a44731f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Thu, 9 Feb 2012 23:15:25 +0100 Subject: [PATCH 9/9] more efficient scm_string_to_utf8, scm_string_to_utf32 * libguile/bytevectors.c (scm_string_to_utf8): More efficient implementation. (scm_string_to_utf32): Likewise. --- libguile/bytevectors.c | 56 ++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c index fff53550c..dc326f526 100644 --- a/libguile/bytevectors.c +++ b/libguile/bytevectors.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. +/* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -1954,33 +1954,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8", #define FUNC_NAME s_scm_string_to_utf8 { SCM utf; - uint8_t *c_utf; - size_t c_strlen, c_utf_len = 0; + scm_t_uint8 *c_utf; + size_t c_utf_len = 0; SCM_VALIDATE_STRING (1, str); - c_strlen = scm_i_string_length (str); - if (scm_i_is_narrow_string (str)) - c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark, - scm_i_string_chars (str), c_strlen, - NULL, NULL, &c_utf_len); - else - { - const scm_t_wchar *wbuf = scm_i_string_wide_chars (str); - c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len); - } - if (SCM_UNLIKELY (c_utf == NULL)) - scm_syserror (FUNC_NAME); - else - { - scm_dynwind_begin (0); - scm_dynwind_free (c_utf); - - utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8); - memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len); - - scm_dynwind_end (); - } + c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len); + utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8); + memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len); + free (c_utf); return (utf); } @@ -1997,6 +1979,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16", } #undef FUNC_NAME +static void +swap_u32 (scm_t_wchar *vals, size_t len) +{ + size_t n; + for (n = 0; n < len; n++) + vals[n] = bswap_32 (vals[n]); +} + SCM_DEFINE (scm_string_to_utf32, "string->utf32", 1, 1, 0, (SCM str, SCM endianness), @@ -2004,7 +1994,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32", "encoding of @var{str}.") #define FUNC_NAME s_scm_string_to_utf32 { - STRING_TO_UTF (32); + SCM bv; + scm_t_wchar *wchars; + size_t wchar_len, bytes_len; + + wchars = scm_to_utf32_stringn (str, &wchar_len); + bytes_len = wchar_len * sizeof (scm_t_wchar); + if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness, + scm_i_native_endianness)) + swap_u32 (wchars, wchar_len); + + bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8); + memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len); + free (wchars); + + return bv; } #undef FUNC_NAME