From d5c420a88880caf1e364af7339b66932411b5234 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Tue, 7 May 2019 02:28:26 -0400 Subject: [PATCH] Add 'scm_c_make_char' and use it where appropriate. This reverts the change to SCM_MAKE_CHAR made in the previous commit 63818453ad226cd3c2d1fd8ade12e3d7c1d43c05, which used an arithmetic trick to avoid evaluating its argument more than once. Here, we restore the previous implementation of SCM_MAKE_CHAR, which evaluates its argument twice. Instead, we introduce a new inlinable function 'scm_c_make_char' and replace uses of SCM_MAKE_CHAR with calls to 'scm_c_make_char' where appropriate. * libguile/chars.h (scm_c_make_char): New inline function. * libguile/inline.c: Include chars.h. * libguile/srfi-13.c (REF_IN_CHARSET, scm_string_any, scm_string_every) (scm_string_trim, scm_string_trim_right, scm_string_trim_both) (scm_string_index, scm_string_index_right, scm_string_skip) (scm_string_skip_right, scm_string_count, string_titlecase_x) (string_reverse_x, scm_string_fold, scm_string_fold_right) (scm_string_for_each, scm_string_filter, scm_string_delete): Use 'scm_c_make_char' instead of 'SCM_MAKE_CHAR' in cases where the argument calls a function. * libguile/chars.c (scm_char_upcase, scm_char_downcase, scm_char_titlecase), libguile/ports.c (scm_port_decode_char), libguile/print.c (scm_simple_format), libguile/read.c (scm_read_character), libguile/strings.c (scm_string_ref, scm_c_string_ref), --- libguile/chars.c | 8 ++++---- libguile/chars.h | 29 ++++++++++++++++++++--------- libguile/inline.c | 3 ++- libguile/ports.c | 2 +- libguile/print.c | 4 ++-- libguile/read.c | 2 +- libguile/srfi-13.c | 38 +++++++++++++++++++------------------- libguile/strings.c | 8 ++++---- 8 files changed, 53 insertions(+), 41 deletions(-) diff --git a/libguile/chars.c b/libguile/chars.c index 21df24730..fe55f9e2e 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -1,4 +1,4 @@ -/* Copyright 1995-1996,1998,2000-2001,2004,2006,2008-2011,2014,2018 +/* Copyright 1995-1996,1998,2000-2001,2004,2006,2008-2011,2014,2018-2019 Free Software Foundation, Inc. This file is part of Guile. @@ -460,7 +460,7 @@ SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0, #define FUNC_NAME s_scm_char_upcase { SCM_VALIDATE_CHAR (1, chr); - return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr))); + return scm_c_make_char (scm_c_upcase (SCM_CHAR (chr))); } #undef FUNC_NAME @@ -471,7 +471,7 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0, #define FUNC_NAME s_scm_char_downcase { SCM_VALIDATE_CHAR (1, chr); - return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr))); + return scm_c_make_char (scm_c_downcase (SCM_CHAR(chr))); } #undef FUNC_NAME @@ -481,7 +481,7 @@ SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0, #define FUNC_NAME s_scm_char_titlecase { SCM_VALIDATE_CHAR (1, chr); - return SCM_MAKE_CHAR (scm_c_titlecase (SCM_CHAR(chr))); + return scm_c_make_char (scm_c_titlecase (SCM_CHAR(chr))); } #undef FUNC_NAME diff --git a/libguile/chars.h b/libguile/chars.h index 8cf8a11ef..f6d4c6354 100644 --- a/libguile/chars.h +++ b/libguile/chars.h @@ -22,7 +22,8 @@ -#include +#include "libguile/error.h" +#include "libguile/inline.h" /* Immediate Characters @@ -34,14 +35,13 @@ (0 to 255) to Latin-1 codepoints (0 to 255) while allowing higher codepoints (256 to 1114111) to pass through unchanged. - To avoid evaluating X more than once, we use an arithmetic trick: we - compute (X mod 2^N) mod (2^N - 256), which is equal to the required - mapping in the range -256 .. (2^N - 257). Here, N is the number of - bits in scm_t_bits. Note that (scm_t_bits) (x) implicitly computes - (X mod 2^N), and (scm_t_bits) -256 equals (2^N - 256). GCC is able - to optimize away these operations in practice. */ -#define SCM_MAKE_CHAR(x) \ - (SCM_MAKE_ITAG8 ((scm_t_bits) (x) % (scm_t_bits) -256, scm_tc8_char)) + This macro evaluates X twice, which may lead to side effects if used + incorrectly. It's also likely to be inefficient if X calls a + procedure. Use 'scm_c_make_char' in those cases. */ +#define SCM_MAKE_CHAR(x) \ + ((x) <= 1 \ + ? SCM_MAKE_ITAG8 ((scm_t_bits) (unsigned char) (x), scm_tc8_char) \ + : SCM_MAKE_ITAG8 ((scm_t_bits) (x), scm_tc8_char)) #define SCM_CODEPOINT_DOTTED_CIRCLE (0x25cc) #define SCM_CODEPOINT_SURROGATE_START (0xd800) @@ -86,12 +86,23 @@ SCM_API SCM scm_char_upcase (SCM chr); SCM_API SCM scm_char_downcase (SCM chr); SCM_API SCM scm_char_titlecase (SCM chr); SCM_API SCM scm_char_general_category (SCM chr); + +SCM_INLINE SCM scm_c_make_char (scm_t_wchar c); SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c); + SCM_INTERNAL const char *scm_i_charname (SCM chr); SCM_INTERNAL SCM scm_i_charname_to_char (const char *charname, size_t charname_len); SCM_INTERNAL void scm_init_chars (void); +#if SCM_CAN_INLINE || defined SCM_INLINE_C_IMPLEMENTING_INLINES +SCM_INLINE_IMPLEMENTATION SCM +scm_c_make_char (scm_t_wchar c) +{ + return SCM_MAKE_CHAR(c); +} +#endif + #endif /* SCM_CHARS_H */ diff --git a/libguile/inline.c b/libguile/inline.c index 6ab14c2dd..900b3253e 100644 --- a/libguile/inline.c +++ b/libguile/inline.c @@ -1,4 +1,4 @@ -/* Copyright 2001,2006,2008,2011-2013,2018 +/* Copyright 2001,2006,2008,2011-2013,2018-2019 Free Software Foundation, Inc. This file is part of Guile. @@ -25,6 +25,7 @@ #define SCM_INLINE_C_IMPLEMENTING_INLINES 1 #include "array-handle.h" +#include "chars.h" #include "gc.h" #include "pairs.h" #include "ports.h" diff --git a/libguile/ports.c b/libguile/ports.c index 4a2e384db..0ec4c1411 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -1921,7 +1921,7 @@ SCM_DEFINE (scm_port_decode_char, "port-decode-char", 4, 0, 0, output. */ return SCM_BOOL_F; - return SCM_MAKE_CHAR (utf8_to_codepoint (utf8_buf, output_size)); + return scm_c_make_char (utf8_to_codepoint (utf8_buf, output_size)); } } #undef FUNC_NAME diff --git a/libguile/print.c b/libguile/print.c index ecda3fb6e..b10f0f8a8 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -1184,14 +1184,14 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1, continue; default: SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use (ice-9 format) instead", - scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p)))); + scm_list_1 (scm_c_make_char (scm_i_string_ref (message, p)))); } if (!scm_is_pair (args)) SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A", - scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p)))); + scm_list_1 (scm_c_make_char (scm_i_string_ref (message, p)))); scm_lfwrite_substr (message, start, p - 1, port); /* we pass destination here */ diff --git a/libguile/read.c b/libguile/read.c index 019ffff7c..f146f0ef0 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -1100,7 +1100,7 @@ scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts) /* Ignore dotted circles, which may be used to keep combining characters from combining with the backslash in #\charname. */ if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2) - return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1)); + return scm_c_make_char (scm_i_string_ref (charname, 1)); if (cp >= '0' && cp < '8') { diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index 3de8b5d3e..97c372674 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -60,7 +60,7 @@ } while (0) #define REF_IN_CHARSET(s, i, cs) \ - (scm_is_true (scm_char_set_contains_p ((cs), SCM_MAKE_CHAR (scm_i_string_ref (s, i))))) + (scm_is_true (scm_char_set_contains_p ((cs), scm_c_make_char (scm_i_string_ref (s, i))))) SCM_DEFINE (scm_string_null_p, "string-null?", 1, 0, 0, (SCM str), @@ -140,7 +140,7 @@ SCM_DEFINE (scm_string_any, "string-any-c-code", 2, 2, 0, while (cstart < cend) { res = scm_call_1 (char_pred, - SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_true (res)) break; cstart++; @@ -207,7 +207,7 @@ SCM_DEFINE (scm_string_every, "string-every-c-code", 2, 2, 0, while (cstart < cend) { res = scm_call_1 (char_pred, - SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_false (res)) break; cstart++; @@ -765,7 +765,7 @@ SCM_DEFINE (scm_string_trim, "string-trim", 1, 3, 0, { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_false (res)) break; cstart++; @@ -841,7 +841,7 @@ SCM_DEFINE (scm_string_trim_right, "string-trim-right", 1, 3, 0, { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cend - 1))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cend - 1))); if (scm_is_false (res)) break; cend--; @@ -935,7 +935,7 @@ SCM_DEFINE (scm_string_trim_both, "string-trim-both", 1, 3, 0, { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_false (res)) break; cstart++; @@ -944,7 +944,7 @@ SCM_DEFINE (scm_string_trim_both, "string-trim-both", 1, 3, 0, { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cend - 1))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cend - 1))); if (scm_is_false (res)) break; cend--; @@ -1705,7 +1705,7 @@ SCM_DEFINE (scm_string_index, "string-index", 2, 2, 0, while (cstart < cend) { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_true (res)) goto found; cstart++; @@ -1771,7 +1771,7 @@ SCM_DEFINE (scm_string_index_right, "string-index-right", 2, 2, 0, { SCM res; cend--; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cend))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cend))); if (scm_is_true (res)) goto found; } @@ -1857,7 +1857,7 @@ SCM_DEFINE (scm_string_skip, "string-skip", 2, 2, 0, while (cstart < cend) { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_false (res)) goto found; cstart++; @@ -1924,7 +1924,7 @@ SCM_DEFINE (scm_string_skip_right, "string-skip-right", 2, 2, 0, { SCM res; cend--; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cend))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cend))); if (scm_is_false (res)) goto found; } @@ -1990,7 +1990,7 @@ SCM_DEFINE (scm_string_count, "string-count", 2, 2, 0, while (cstart < cend) { SCM res; - res = scm_call_1 (char_pred, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + res = scm_call_1 (char_pred, scm_c_make_char (scm_i_string_ref (s, cstart))); if (scm_is_true (res)) count++; cstart++; @@ -2247,7 +2247,7 @@ string_titlecase_x (SCM str, size_t start, size_t end) str = scm_i_string_start_writing (str); for(i = start; i < end; i++) { - ch = SCM_MAKE_CHAR (scm_i_string_ref (str, i)); + ch = scm_c_make_char (scm_i_string_ref (str, i)); if (scm_is_true (scm_char_alphabetic_p (ch))) { if (!in_word) @@ -2345,7 +2345,7 @@ string_reverse_x (SCM str, size_t cstart, size_t cend) cend--; while (cstart < cend) { - tmp = SCM_MAKE_CHAR (scm_i_string_ref (str, cstart)); + tmp = scm_c_make_char (scm_i_string_ref (str, cstart)); scm_i_string_set_x (str, cstart, scm_i_string_ref (str, cend)); scm_i_string_set_x (str, cend, SCM_CHAR (tmp)); cstart++; @@ -2582,7 +2582,7 @@ SCM_DEFINE (scm_string_fold, "string-fold", 3, 2, 0, result = knil; while (cstart < cend) { - result = scm_call_2 (kons, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart)), result); + result = scm_call_2 (kons, scm_c_make_char (scm_i_string_ref (s, cstart)), result); cstart++; } @@ -2610,7 +2610,7 @@ SCM_DEFINE (scm_string_fold_right, "string-fold-right", 3, 2, 0, result = knil; while (cstart < cend) { - result = scm_call_2 (kons, SCM_MAKE_CHAR (scm_i_string_ref (s, cend-1)), result); + result = scm_call_2 (kons, scm_c_make_char (scm_i_string_ref (s, cend-1)), result); cend--; } @@ -2767,7 +2767,7 @@ SCM_DEFINE (scm_string_for_each, "string-for-each", 2, 2, 0, 4, end, cend); while (cstart < cend) { - scm_call_1 (proc, SCM_MAKE_CHAR (scm_i_string_ref (s, cstart))); + scm_call_1 (proc, scm_c_make_char (scm_i_string_ref (s, cstart))); cstart++; } @@ -3208,7 +3208,7 @@ SCM_DEFINE (scm_string_filter, "string-filter", 2, 2, 0, while (idx < cend) { SCM res, ch; - ch = SCM_MAKE_CHAR (scm_i_string_ref (s, idx)); + ch = scm_c_make_char (scm_i_string_ref (s, idx)); res = scm_call_1 (char_pred, ch); if (scm_is_true (res)) ls = scm_cons (ch, ls); @@ -3343,7 +3343,7 @@ SCM_DEFINE (scm_string_delete, "string-delete", 2, 2, 0, idx = cstart; while (idx < cend) { - SCM res, ch = SCM_MAKE_CHAR (scm_i_string_ref (s, idx)); + SCM res, ch = scm_c_make_char (scm_i_string_ref (s, idx)); res = scm_call_1 (char_pred, ch); if (scm_is_false (res)) ls = scm_cons (ch, ls); diff --git a/libguile/strings.c b/libguile/strings.c index b366f5b37..e6ae5cbd5 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1214,9 +1214,9 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0, scm_out_of_range (NULL, k); if (scm_i_is_narrow_string (str)) - return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]); + return scm_c_make_char (scm_i_string_chars (str)[idx]); else - return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]); + return scm_c_make_char (scm_i_string_wide_chars (str)[idx]); } #undef FUNC_NAME @@ -1226,9 +1226,9 @@ scm_c_string_ref (SCM str, size_t p) if (p >= scm_i_string_length (str)) scm_out_of_range (NULL, scm_from_size_t (p)); if (scm_i_is_narrow_string (str)) - return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]); + return scm_c_make_char (scm_i_string_chars (str)[p]); else - return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]); + return scm_c_make_char (scm_i_string_wide_chars (str)[p]); }