From 47f2bce5ae332c274270882c259776b8e4ea12f7 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Sun, 31 Mar 2013 21:12:31 -0400 Subject: [PATCH 1/5] Avoid unnecessary peeks for EOF in r6rs-ports. * libguile/r6rs-ports.c (scm_get_bytevector_n, scm_get_bytevector_n_x, scm_get_bytevector_some, scm_get_bytevector_all): Avoid peeking for EOF when we already know. --- libguile/r6rs-ports.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c index d5fcd2076..7ee7a69f0 100644 --- a/libguile/r6rs-ports.c +++ b/libguile/r6rs-ports.c @@ -496,16 +496,11 @@ SCM_DEFINE (scm_get_bytevector_n, "get-bytevector-n", 2, 0, 0, /* Don't invoke `scm_c_read ()' since it may block. */ c_read = 0; - if ((c_read == 0) && (c_count > 0)) + if (c_read < c_count) { - if (scm_peek_byte_or_eof (port) == EOF) - result = SCM_EOF_VAL; + if (c_read == 0) + result = SCM_EOF_VAL; else - result = scm_null_bytevector; - } - else - { - if (c_read < c_count) result = scm_c_shrink_bytevector (result, c_read); } @@ -543,13 +538,8 @@ SCM_DEFINE (scm_get_bytevector_n_x, "get-bytevector-n!", 4, 0, 0, /* Don't invoke `scm_c_read ()' since it may block. */ c_read = 0; - if ((c_read == 0) && (c_count > 0)) - { - if (scm_peek_byte_or_eof (port) == EOF) - result = SCM_EOF_VAL; - else - result = SCM_I_MAKINUM (0); - } + if (c_read == 0 && c_count > 0) + result = SCM_EOF_VAL; else result = scm_from_size_t (c_read); @@ -599,11 +589,12 @@ SCM_DEFINE (scm_get_bytevector_some, "get-bytevector-some", 1, 0, 0, c_bv[c_total] = (char) c_chr; c_total++; } + else + break; } /* XXX: We want to check for the availability of a byte, but that's what `scm_char_ready_p' actually does. */ - while (scm_is_true (scm_char_ready_p (port)) - && (scm_peek_byte_or_eof (port) != EOF)); + while (scm_is_true (scm_char_ready_p (port))); if (c_total == 0) { @@ -662,7 +653,7 @@ SCM_DEFINE (scm_get_bytevector_all, "get-bytevector-all", 1, 0, 0, c_read = scm_c_read (port, c_bv + c_total, c_count); c_total += c_read, c_count -= c_read; } - while (scm_peek_byte_or_eof (port) != EOF); + while (c_count == 0); if (c_total == 0) { From 3b80c3585221a899c9a6b87e742d267655bb0abc Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 1 Apr 2013 01:21:30 -0400 Subject: [PATCH 2/5] Fix typos in 'string-join' docs: 'string-infix' -> 'strict-infix'. * doc/ref/api-data.texi (String Constructors): In definition of 'string-join', change 'string-infix' -> 'strict-infix'. * libguile/srfi-13.c (scm_string_join): In docstring, change 'string-infix' -> 'strict-infix'. --- doc/ref/api-data.texi | 2 +- libguile/srfi-13.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 17baed27d..0a5a2de77 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -3148,7 +3148,7 @@ placed between the strings, and defaults to the symbol @item infix Insert the separator between list elements. An empty string will produce an empty list. -@item string-infix +@item strict-infix Like @code{infix}, but will raise an error if given the empty list. @item suffix diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index 97c5a1d64..81065da43 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -384,7 +384,7 @@ SCM_DEFINE (scm_string_join, "string-join", 1, 2, 0, "@item infix\n" "Insert the separator between list elements. An empty string\n" "will produce an empty list.\n" - "@item string-infix\n" + "@item strict-infix\n" "Like @code{infix}, but will raise an error if given the empty\n" "list.\n" "@item suffix\n" From 2f13a46672a347be0f7bb1b360a213f6d6ce0288 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 1 Apr 2013 03:37:36 -0400 Subject: [PATCH 3/5] Make 'string-append' more efficient and robust. * libguile/strings.c (scm_string_append): Check for numerical overflow while computing the length of the result. Double-check that we don't overflow the result string, and that it is the correct length in the end (in case another thread changed the list). When copying a narrow string to a wide result, avoid calling 'scm_i_string_length' and 'scm_i_string_chars' on each character. --- libguile/strings.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/libguile/strings.c b/libguile/strings.c index 5d0db2301..1b241e52c 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -1401,7 +1401,8 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, #define FUNC_NAME s_scm_string_append { SCM res; - size_t len = 0; + size_t total = 0; + size_t len; int wide = 0; SCM l, s; size_t i; @@ -1416,15 +1417,18 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, { s = SCM_CAR (l); SCM_VALIDATE_STRING (SCM_ARGn, s); - len += scm_i_string_length (s); + len = scm_i_string_length (s); + if (((size_t) -1) - total < len) + scm_num_overflow (s_scm_string_append); + total += len; if (!scm_i_is_narrow_string (s)) wide = 1; } data.narrow = NULL; if (!wide) - res = scm_i_make_string (len, &data.narrow, 0); + res = scm_i_make_string (total, &data.narrow, 0); else - res = scm_i_make_wide_string (len, &data.wide, 0); + res = scm_i_make_wide_string (total, &data.wide, 0); for (l = args; !scm_is_null (l); l = SCM_CDR (l)) { @@ -1432,6 +1436,8 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, s = SCM_CAR (l); SCM_VALIDATE_STRING (SCM_ARGn, s); len = scm_i_string_length (s); + if (len > total) + SCM_MISC_ERROR ("list changed during string-append", SCM_EOL); if (!wide) { memcpy (data.narrow, scm_i_string_chars (s), len); @@ -1441,16 +1447,20 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, { if (scm_i_is_narrow_string (s)) { - for (i = 0; i < scm_i_string_length (s); i++) - data.wide[i] = (unsigned char) scm_i_string_chars (s)[i]; + const char *src = scm_i_string_chars (s); + for (i = 0; i < len; i++) + data.wide[i] = (unsigned char) src[i]; } else u32_cpy ((scm_t_uint32 *) data.wide, (scm_t_uint32 *) scm_i_string_wide_chars (s), len); data.wide += len; } + total -= len; scm_remember_upto_here_1 (s); } + if (total != 0) + SCM_MISC_ERROR ("list changed during string-append", SCM_EOL); return res; } #undef FUNC_NAME From 786ab4258fbf605f46287da5e7550d3ab4b68589 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 1 Apr 2013 03:47:29 -0400 Subject: [PATCH 4/5] Optimize 'string-join'. * libguile/srfi-13.c (string-join): Rewrite to call 'scm_string_append' only once. --- libguile/srfi-13.c | 123 +++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 65 deletions(-) diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index 81065da43..e4268879e 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -394,91 +394,84 @@ SCM_DEFINE (scm_string_join, "string-join", 1, 2, 0, "@end table") #define FUNC_NAME s_scm_string_join { -#define GRAM_INFIX 0 -#define GRAM_STRICT_INFIX 1 -#define GRAM_SUFFIX 2 -#define GRAM_PREFIX 3 - SCM tmp; - SCM result; - int gram = GRAM_INFIX; - size_t del_len = 0; - long strings = scm_ilength (ls); + SCM append_list = SCM_EOL; + long list_len = scm_ilength (ls); + size_t delimiter_len = 0; /* Validate the string list. */ - if (strings < 0) + if (list_len < 0) SCM_WRONG_TYPE_ARG (1, ls); /* Validate the delimiter and record its length. */ if (SCM_UNBNDP (delimiter)) { delimiter = scm_from_locale_string (" "); - del_len = 1; + delimiter_len = 1; } else { SCM_VALIDATE_STRING (2, delimiter); - del_len = scm_i_string_length (delimiter); + delimiter_len = scm_i_string_length (delimiter); } - /* Validate the grammar symbol and remember the grammar. */ + /* Validate the grammar symbol. */ if (SCM_UNBNDP (grammar)) - gram = GRAM_INFIX; - else if (scm_is_eq (grammar, scm_sym_infix)) - gram = GRAM_INFIX; - else if (scm_is_eq (grammar, scm_sym_strict_infix)) - gram = GRAM_STRICT_INFIX; - else if (scm_is_eq (grammar, scm_sym_suffix)) - gram = GRAM_SUFFIX; - else if (scm_is_eq (grammar, scm_sym_prefix)) - gram = GRAM_PREFIX; - else + grammar = scm_sym_infix; + else if (!(scm_is_eq (grammar, scm_sym_infix) + || scm_is_eq (grammar, scm_sym_strict_infix) + || scm_is_eq (grammar, scm_sym_suffix) + || scm_is_eq (grammar, scm_sym_prefix))) SCM_WRONG_TYPE_ARG (3, grammar); - /* Check grammar constraints. */ - if (strings == 0 && gram == GRAM_STRICT_INFIX) - SCM_MISC_ERROR ("strict-infix grammar requires non-empty list", - SCM_EOL); - - result = scm_i_make_string (0, NULL, 0); - - tmp = ls; - switch (gram) + if (list_len == 0) { - case GRAM_INFIX: - case GRAM_STRICT_INFIX: - while (scm_is_pair (tmp)) - { - result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp))); - if (!scm_is_null (SCM_CDR (tmp)) && del_len > 0) - result = scm_string_append (scm_list_2 (result, delimiter)); - tmp = SCM_CDR (tmp); - } - break; - case GRAM_SUFFIX: - while (scm_is_pair (tmp)) - { - result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp))); - if (del_len > 0) - result = scm_string_append (scm_list_2 (result, delimiter)); - tmp = SCM_CDR (tmp); - } - break; - case GRAM_PREFIX: - while (scm_is_pair (tmp)) - { - if (del_len > 0) - result = scm_string_append (scm_list_2 (result, delimiter)); - result = scm_string_append (scm_list_2 (result, SCM_CAR (tmp))); - tmp = SCM_CDR (tmp); - } - break; + if (scm_is_eq (grammar, scm_sym_strict_infix)) + SCM_MISC_ERROR ("strict-infix grammar requires non-empty list", + SCM_EOL); + else + /* Handle empty lists specially */ + append_list = SCM_EOL; + } + else if (delimiter_len == 0) + /* Handle empty delimiters specially */ + append_list = ls; + else + { + SCM *last_cdr_p = &append_list; + +#define ADD_TO_APPEND_LIST(x) \ + (last_cdr_p = SCM_CDRLOC (*last_cdr_p = scm_list_1 (x))) + + /* Build a list of strings to pass to 'string-append'. + Here we assume that 'ls' has at least one element. */ + + /* If using the 'prefix' grammar, start with the delimiter. */ + if (scm_is_eq (grammar, scm_sym_prefix)) + ADD_TO_APPEND_LIST (delimiter); + + /* Handle the first element of 'ls' specially, so that in the loop + that follows we can unconditionally insert the delimiter before + every remaining element. */ + ADD_TO_APPEND_LIST (SCM_CAR (ls)); + ls = SCM_CDR (ls); + + /* Insert the delimiter before every remaining element. */ + while (scm_is_pair (ls)) + { + ADD_TO_APPEND_LIST (delimiter); + ADD_TO_APPEND_LIST (SCM_CAR (ls)); + ls = SCM_CDR (ls); + } + + /* If using the 'suffix' grammar, add the delimiter to the end. */ + if (scm_is_eq (grammar, scm_sym_suffix)) + ADD_TO_APPEND_LIST (delimiter); + +#undef ADD_TO_APPEND_LIST } - return result; -#undef GRAM_INFIX -#undef GRAM_STRICT_INFIX -#undef GRAM_SUFFIX -#undef GRAM_PREFIX + /* Construct the final result. */ + return scm_string_append (append_list); } #undef FUNC_NAME From de2bc673bba931a70e3b96336cab6512a47541fe Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Mon, 1 Apr 2013 05:31:08 -0400 Subject: [PATCH 5/5] scm_string_join: properly sequence operations in ADD_TO_APPEND_LIST * libguile/srfi-13.c (scm_string_join): Properly sequence operations in ADD_TO_APPEND_LIST macro. --- libguile/srfi-13.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libguile/srfi-13.c b/libguile/srfi-13.c index e4268879e..4e5d5725f 100644 --- a/libguile/srfi-13.c +++ b/libguile/srfi-13.c @@ -439,8 +439,9 @@ SCM_DEFINE (scm_string_join, "string-join", 1, 2, 0, { SCM *last_cdr_p = &append_list; -#define ADD_TO_APPEND_LIST(x) \ - (last_cdr_p = SCM_CDRLOC (*last_cdr_p = scm_list_1 (x))) +#define ADD_TO_APPEND_LIST(x) \ + ((*last_cdr_p = scm_list_1 (x)), \ + (last_cdr_p = SCM_CDRLOC (*last_cdr_p))) /* Build a list of strings to pass to 'string-append'. Here we assume that 'ls' has at least one element. */