mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Optimize `scm_read_string'.
According to the new benchmarks, this leads a 5% speed improvement when reading small strings, and a 27% improvement when reading large strings. * libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update comment to mention codepoints. (scm_read_string): Make `str' a list of strings, instead of a string. Store characters read in buffer `c_str'. Cons to STR when C_STR is full, and concatenate/reverse at the end. * benchmark-suite/benchmarks/read.bm (small, large): New variables. Set %DEFAULT-PORT-ENCODING to "UTF-8". ("read")["small strings", "large strings"]: New benchmarks.
This commit is contained in:
parent
5bbd632fc3
commit
ff4d367275
2 changed files with 38 additions and 19 deletions
|
@ -1,6 +1,6 @@
|
||||||
;;; read.bm --- Exercise the reader. -*- Scheme -*-
|
;;; read.bm --- Exercise the reader. -*- Scheme -*-
|
||||||
;;;
|
;;;
|
||||||
;;; Copyright (C) 2008, 2010 Free Software Foundation, Inc.
|
;;; Copyright (C) 2008, 2010, 2012 Free Software Foundation, Inc.
|
||||||
;;;
|
;;;
|
||||||
;;; This program is free software; you can redistribute it and/or
|
;;; This program is free software; you can redistribute it and/or
|
||||||
;;; modify it under the terms of the GNU Lesser General Public License
|
;;; modify it under the terms of the GNU Lesser General Public License
|
||||||
|
@ -43,6 +43,11 @@
|
||||||
(load-file-with-reader file read buffering))
|
(load-file-with-reader file read buffering))
|
||||||
%files-to-load))
|
%files-to-load))
|
||||||
|
|
||||||
|
(define small "\"hello, world!\"")
|
||||||
|
(define large (string-append "\"" (make-string 1234 #\A) "\""))
|
||||||
|
|
||||||
|
(fluid-set! %default-port-encoding "UTF-8") ; for string ports
|
||||||
|
|
||||||
|
|
||||||
(with-benchmark-prefix "read"
|
(with-benchmark-prefix "read"
|
||||||
|
|
||||||
|
@ -59,4 +64,10 @@
|
||||||
(exercise-read (list _IOFBF 8192)))
|
(exercise-read (list _IOFBF 8192)))
|
||||||
|
|
||||||
(benchmark "_IOFBF 16384" 10
|
(benchmark "_IOFBF 16384" 10
|
||||||
(exercise-read (list _IOFBF 16384))))
|
(exercise-read (list _IOFBF 16384)))
|
||||||
|
|
||||||
|
(benchmark "small strings" 100000
|
||||||
|
(call-with-input-string small read))
|
||||||
|
|
||||||
|
(benchmark "large strings" 100000
|
||||||
|
(call-with-input-string large read)))
|
||||||
|
|
|
@ -161,8 +161,8 @@ scm_i_read_hash_procedures_set_x (SCM value)
|
||||||
/* Size of the C buffer used to read symbols and numbers. */
|
/* Size of the C buffer used to read symbols and numbers. */
|
||||||
#define READER_BUFFER_SIZE 128
|
#define READER_BUFFER_SIZE 128
|
||||||
|
|
||||||
/* Size of the C buffer used to read strings. */
|
/* Number of 32-bit codepoints in the buffer used to read strings. */
|
||||||
#define READER_STRING_BUFFER_SIZE 512
|
#define READER_STRING_BUFFER_SIZE 128
|
||||||
|
|
||||||
/* The maximum size of Scheme character names. */
|
/* The maximum size of Scheme character names. */
|
||||||
#define READER_CHAR_NAME_MAX_SIZE 50
|
#define READER_CHAR_NAME_MAX_SIZE 50
|
||||||
|
@ -493,15 +493,14 @@ scm_read_string (int chr, SCM port)
|
||||||
/* For strings smaller than C_STR, this function creates only one Scheme
|
/* For strings smaller than C_STR, this function creates only one Scheme
|
||||||
object (the string returned). */
|
object (the string returned). */
|
||||||
|
|
||||||
SCM str = SCM_BOOL_F;
|
SCM str = SCM_EOL;
|
||||||
unsigned c_str_len = 0;
|
size_t c_str_len = 0;
|
||||||
scm_t_wchar c;
|
scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
|
||||||
|
|
||||||
/* Need to capture line and column numbers here. */
|
/* Need to capture line and column numbers here. */
|
||||||
long line = SCM_LINUM (port);
|
long line = SCM_LINUM (port);
|
||||||
int column = SCM_COL (port) - 1;
|
int column = SCM_COL (port) - 1;
|
||||||
|
|
||||||
str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
|
|
||||||
while ('"' != (c = scm_getc (port)))
|
while ('"' != (c = scm_getc (port)))
|
||||||
{
|
{
|
||||||
if (c == EOF)
|
if (c == EOF)
|
||||||
|
@ -511,12 +510,11 @@ scm_read_string (int chr, SCM port)
|
||||||
"end of file in string constant", SCM_EOL);
|
"end of file in string constant", SCM_EOL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (c_str_len + 1 >= scm_i_string_length (str))
|
if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
|
||||||
{
|
{
|
||||||
SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0);
|
str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
|
||||||
|
c_str_len = 0;
|
||||||
str = scm_string_append (scm_list_2 (str, addy));
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (c == '\\')
|
if (c == '\\')
|
||||||
{
|
{
|
||||||
|
@ -580,12 +578,22 @@ scm_read_string (int chr, SCM port)
|
||||||
scm_list_1 (SCM_MAKE_CHAR (c)));
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
str = scm_i_string_start_writing (str);
|
|
||||||
scm_i_string_set_x (str, c_str_len++, c);
|
c_str[c_str_len++] = c;
|
||||||
scm_i_string_stop_writing ();
|
|
||||||
}
|
}
|
||||||
return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len),
|
|
||||||
port, line, column);
|
if (scm_is_null (str))
|
||||||
|
/* Fast path: we got a string that fits in C_STR. */
|
||||||
|
str = scm_from_utf32_stringn (c_str, c_str_len);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (c_str_len > 0)
|
||||||
|
str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
|
||||||
|
|
||||||
|
str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
|
||||||
|
}
|
||||||
|
|
||||||
|
return maybe_annotate_source (str, port, line, column);
|
||||||
}
|
}
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue