mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Optimize `peek-char'.
This makes `peek-char' 40x faster on a port whose encoding is faster on a UTF-8 port containing multi-byte codepoints. The `xml->sxml' procedure is 4x faster on a 2.7 MiB XML file. * libguile/ports.c (get_codepoint): New procedure, moved here from `scm_getc', with the additional BUF and LEN parameters. (scm_getc): Use it. (scm_peek_char): Use it instead of the `scm_getc'/`scm_ungetc' sequence. * test-suite/tests/ports.test ("string ports")["peek-char [latin-1]", "peek-char [utf-8]"]: New tests. * benchmark-suite/Makefile.am (SCM_BENCHMARKS): Add `benchmarks/ports.bm'. * benchmark-suite/benchmarks/ports.bm: New file.
This commit is contained in:
parent
e9c3018cec
commit
fd5eec2b6e
4 changed files with 132 additions and 14 deletions
|
@ -4,6 +4,7 @@ SCM_BENCHMARKS = benchmarks/0-reference.bm \
|
|||
benchmarks/continuations.bm \
|
||||
benchmarks/if.bm \
|
||||
benchmarks/logand.bm \
|
||||
benchmarks/ports.bm \
|
||||
benchmarks/read.bm \
|
||||
benchmarks/srfi-1.bm \
|
||||
benchmarks/srfi-13.bm \
|
||||
|
|
67
benchmark-suite/benchmarks/ports.bm
Normal file
67
benchmark-suite/benchmarks/ports.bm
Normal file
|
@ -0,0 +1,67 @@
|
|||
;;; ports.bm --- Port I/O. -*- mode: scheme; coding: utf-8; -*-
|
||||
;;;
|
||||
;;; Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
;;;
|
||||
;;; This program is free software; you can redistribute it and/or
|
||||
;;; modify it under the terms of the GNU Lesser General Public License
|
||||
;;; as published by the Free Software Foundation; either version 3, or
|
||||
;;; (at your option) any later version.
|
||||
;;;
|
||||
;;; This program is distributed in the hope that it will be useful,
|
||||
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;; GNU Lesser General Public License for more details.
|
||||
;;;
|
||||
;;; You should have received a copy of the GNU Lesser General Public
|
||||
;;; License along with this software; see the file COPYING.LESSER. If
|
||||
;;; not, write to the Free Software Foundation, Inc., 51 Franklin
|
||||
;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (benchmarks ports)
|
||||
#:use-module (benchmark-suite lib))
|
||||
|
||||
(define %latin1-port
|
||||
(with-fluids ((%default-port-encoding #f))
|
||||
(open-input-string "hello, world")))
|
||||
|
||||
(define %utf8/ascii-port
|
||||
(with-fluids ((%default-port-encoding "UTF-8"))
|
||||
(open-input-string "hello, world")))
|
||||
|
||||
(define %utf8/wide-port
|
||||
(with-fluids ((%default-port-encoding "UTF-8"))
|
||||
(open-input-string "안녕하세요")))
|
||||
|
||||
|
||||
(with-benchmark-prefix "peek-char"
|
||||
|
||||
(benchmark "latin-1 port" 700000
|
||||
(peek-char %latin1-port))
|
||||
|
||||
(benchmark "utf-8 port, ascii character" 700000
|
||||
(peek-char %utf8/ascii-port))
|
||||
|
||||
(benchmark "utf-8 port, Korean character" 700000
|
||||
(peek-char %utf8/wide-port)))
|
||||
|
||||
(with-benchmark-prefix "read-char"
|
||||
|
||||
(benchmark "latin-1 port" 10000000
|
||||
(read-char %latin1-port))
|
||||
|
||||
(benchmark "utf-8 port, ascii character" 10000000
|
||||
(read-char %utf8/ascii-port))
|
||||
|
||||
(benchmark "utf-8 port, Korean character" 10000000
|
||||
(read-char %utf8/wide-port)))
|
||||
|
||||
(with-benchmark-prefix "char-ready?"
|
||||
|
||||
(benchmark "latin-1 port" 10000000
|
||||
(char-ready? %latin1-port))
|
||||
|
||||
(benchmark "utf-8 port, ascii character" 10000000
|
||||
(char-ready? %utf8/ascii-port))
|
||||
|
||||
(benchmark "utf-8 port, Korean character" 10000000
|
||||
(char-ready? %utf8/wide-port)))
|
|
@ -1023,13 +1023,15 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
|
|||
|
||||
#define SCM_MBCHAR_BUF_SIZE (4)
|
||||
|
||||
/* Get one codepoint from a file, using the port's encoding. */
|
||||
scm_t_wchar
|
||||
scm_getc (SCM port)
|
||||
/* Read a codepoint from PORT and return it. Fill BUF with the byte
|
||||
representation of the codepoint in PORT's encoding, and set *LEN to
|
||||
the length in bytes of that representation. Raise an error on
|
||||
failure. */
|
||||
static scm_t_wchar
|
||||
get_codepoint (SCM port, char buf[SCM_MBCHAR_BUF_SIZE], size_t *len)
|
||||
{
|
||||
int c;
|
||||
unsigned int bufcount = 0;
|
||||
char buf[SCM_MBCHAR_BUF_SIZE];
|
||||
size_t bufcount = 0;
|
||||
scm_t_uint32 result_buf;
|
||||
scm_t_wchar codepoint = 0;
|
||||
scm_t_uint32 *u32;
|
||||
|
@ -1133,6 +1135,8 @@ scm_getc (SCM port)
|
|||
break;
|
||||
}
|
||||
|
||||
*len = bufcount;
|
||||
|
||||
return codepoint;
|
||||
|
||||
failure:
|
||||
|
@ -1155,6 +1159,15 @@ scm_getc (SCM port)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Read a codepoint from PORT and return it. */
|
||||
scm_t_wchar
|
||||
scm_getc (SCM port)
|
||||
{
|
||||
size_t len;
|
||||
char buf[SCM_MBCHAR_BUF_SIZE];
|
||||
|
||||
return get_codepoint (port, buf, &len);
|
||||
}
|
||||
|
||||
/* this should only be called when the read buffer is empty. it
|
||||
tries to refill the read buffer. it returns the first char from
|
||||
|
@ -1635,18 +1648,37 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
|
|||
"to @code{read-char} would have hung.")
|
||||
#define FUNC_NAME s_scm_peek_char
|
||||
{
|
||||
scm_t_wchar c, column;
|
||||
SCM result;
|
||||
scm_t_wchar c;
|
||||
char bytes[SCM_MBCHAR_BUF_SIZE];
|
||||
long column, line;
|
||||
size_t len;
|
||||
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
else
|
||||
SCM_VALIDATE_OPINPORT (1, port);
|
||||
|
||||
column = SCM_COL (port);
|
||||
c = scm_getc (port);
|
||||
if (EOF == c)
|
||||
return SCM_EOF_VAL;
|
||||
scm_ungetc (c, port);
|
||||
line = SCM_LINUM (port);
|
||||
|
||||
c = get_codepoint (port, bytes, &len);
|
||||
if (c == EOF)
|
||||
result = SCM_EOF_VAL;
|
||||
else
|
||||
{
|
||||
long i;
|
||||
|
||||
result = SCM_MAKE_CHAR (c);
|
||||
|
||||
for (i = len - 1; i >= 0; i--)
|
||||
scm_unget_byte (bytes[i], port);
|
||||
|
||||
SCM_COL (port) = column;
|
||||
return SCM_MAKE_CHAR (c);
|
||||
SCM_LINUM (port) = line;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
|
|
@ -422,7 +422,25 @@
|
|||
(and (eq? faulty-str str)
|
||||
(string=? from "UTF-32")
|
||||
(string=? to "ISO-8859-1")
|
||||
(string? (strerror errno))))))))
|
||||
(string? (strerror errno)))))))
|
||||
|
||||
(pass-if "peek-char [latin-1]"
|
||||
(let ((p (with-fluids ((%default-port-encoding #f))
|
||||
(open-input-string "hello, world"))))
|
||||
(and (char=? (peek-char p) #\h)
|
||||
(char=? (peek-char p) #\h)
|
||||
(char=? (peek-char p) #\h)
|
||||
(= (port-line p) 0)
|
||||
(= (port-column p) 0))))
|
||||
|
||||
(pass-if "peek-char [utf-8]"
|
||||
(let ((p (with-fluids ((%default-port-encoding "UTF-8"))
|
||||
(open-input-string "안녕하세요"))))
|
||||
(and (char=? (peek-char p) #\안)
|
||||
(char=? (peek-char p) #\안)
|
||||
(char=? (peek-char p) #\안)
|
||||
(= (port-line p) 0)
|
||||
(= (port-column p) 0)))))
|
||||
|
||||
(with-test-prefix "call-with-output-string"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue