1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 11:50:28 +02:00
guile/benchmark-suite/benchmarks/strings.bm
Michael Gran d78de77f43 Benchmarks for string comparisons
* benchmark-suite/benchmarks/strings.bm: new file
* benchmark-suite/Makefile.am: add strings.bm
2011-03-19 15:25:28 -07:00

537 lines
23 KiB
Scheme
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; -*- Mode: scheme; coding: utf-8; -*-
;;; strings.bm
;;;
;;; Copyright (C) 2011 Free Software Foundation, Inc.
;;;
;;;
;;; This program is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 3, or
;;; (at your option) any later version.
;;;
;;; This program is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this software; see the file COPYING.LESSER. If
;;; not, write to the Free Software Foundation, Inc., 51 Franklin
;;; Street, Fifth Floor, Boston, MA 02110-1301 USA
(define-module (benchmarks strings)
#:use-module (benchmark-suite lib)
#:use-module (ice-9 i18n))
(use-modules (ice-9 i18n))
(seed->random-state 1)
;; Start from a known locale state
(setlocale LC_ALL "C")
(define char-set:cased (char-set-union char-set:lower-case
char-set:upper-case
char-set:title-case))
(define *latin1*
(char-set->list (char-set-xor
(char-set-intersection (ucs-range->char-set 0 255)
char-set:cased)
(->char-set #\µ)))) ; Can't do a case-insensitive comparison of a string
; with mu in fr_FR.iso88591 since it case-folds into a
; non-Latin-1 character.
(define *cased*
(char-set->list char-set:cased))
(define (random-string c-list n)
(let ((len (length c-list)))
(apply string
(map
(lambda (x)
(list-ref c-list (random len)))
(iota n)))))
(define (diff-at-start str)
(string-append "!" (substring str 1)))
(define (diff-in-middle str)
(let ((x (floor (/ (string-length str) 2))))
(string-append (substring str 0 x)
"!"
(substring str (1+ x)))))
(define (diff-at-end str)
(string-append (substring str 0 (1- (string-length str)))
"!"))
(define short-latin1-string (random-string *latin1* 10))
(define medium-latin1-string (random-string *latin1* 100))
(define long-latin1-string (random-string *latin1* 1000))
(define short-latin1-string-diff-at-start (diff-at-start short-latin1-string))
(define medium-latin1-string-diff-at-start (diff-at-start medium-latin1-string))
(define long-latin1-string-diff-at-start (diff-at-start long-latin1-string))
(define short-latin1-string-diff-in-middle (diff-in-middle short-latin1-string))
(define medium-latin1-string-diff-in-middle (diff-in-middle medium-latin1-string))
(define long-latin1-string-diff-in-middle (diff-in-middle long-latin1-string))
(define short-latin1-string-diff-at-end (diff-at-end short-latin1-string))
(define medium-latin1-string-diff-at-end (diff-at-end medium-latin1-string))
(define long-latin1-string-diff-at-end (diff-at-end long-latin1-string))
(define short-cased-string (random-string *cased* 10))
(define medium-cased-string (random-string *cased* 100))
(define long-cased-string (random-string *cased* 1000))
(define short-cased-string-diff-at-start (diff-at-start short-cased-string))
(define medium-cased-string-diff-at-start (diff-at-start medium-cased-string))
(define long-cased-string-diff-at-start (diff-at-start long-cased-string))
(define short-cased-string-diff-in-middle (diff-in-middle short-cased-string))
(define medium-cased-string-diff-in-middle (diff-in-middle medium-cased-string))
(define long-cased-string-diff-in-middle (diff-in-middle long-cased-string))
(define short-cased-string-diff-at-end (diff-at-end short-cased-string))
(define medium-cased-string-diff-at-end (diff-at-end medium-cased-string))
(define long-cased-string-diff-at-end (diff-at-end long-cased-string))
(define %french-locale-name "fr_FR.ISO-8859-1")
(define %french-utf8-locale-name "fr_FR.UTF-8")
(define %french-locale
(false-if-exception
(make-locale (list LC_CTYPE LC_COLLATE LC_NUMERIC LC_TIME)
%french-locale-name)))
(define %french-utf8-locale
(false-if-exception
(make-locale (list LC_CTYPE LC_COLLATE LC_NUMERIC LC_TIME)
%french-utf8-locale-name)))
(define (under-locale-or-unresolved locale thunk)
;; On non-GNU systems, an exception may be raised only when the locale is
;; actually used rather than at `make-locale'-time. Thus, we must guard
;; against both.
(if locale
(if (string-contains %host-type "-gnu")
(thunk)
(catch 'system-error thunk
(lambda (key . args)
(throw 'unresolved))))
(throw 'unresolved)))
(define (under-french-locale-or-unresolved thunk)
(under-locale-or-unresolved %french-locale thunk))
(define (under-french-utf8-locale-or-unresolved thunk)
(under-locale-or-unresolved %french-utf8-locale thunk))
(define (string-op str1 str2)
(string<? str1 str2)
(string>? str1 str2))
(define (string-ci-op str1 str2)
(string-ci<? str1 str2)
(string-ci>? str1 str2))
(define (string-fr-op str1 str2)
(under-french-locale-or-unresolved
(lambda ()
(string-locale<? str1 str2 %french-locale)
(string-locale>? str1 str2 %french-locale))))
(define (string-fr-utf8-op str1 str2)
(under-french-utf8-locale-or-unresolved
(lambda ()
(string-locale<? str1 str2 %french-utf8-locale)
(string-locale>? str1 str2 %french-utf8-locale))))
(define (string-fr-ci-op str1 str2)
(under-french-locale-or-unresolved
(lambda ()
(string-locale-ci<? str1 str2 %french-locale)
(string-locale-ci>? str1 str2 %french-locale))))
(define (string-fr-utf8-ci-op str1 str2)
(under-french-utf8-locale-or-unresolved
(lambda ()
(string-locale-ci<? str1 str2 %french-utf8-locale)
(string-locale-ci>? str1 str2 %french-utf8-locale))))
(with-benchmark-prefix "string ops"
(with-benchmark-prefix "short Latin1"
(benchmark "compare initially differing strings" 100000
(string-op short-latin1-string short-latin1-string-diff-at-start))
(benchmark "compare medially differing strings" 100000
(string-op short-latin1-string short-latin1-string-diff-in-middle))
(benchmark "compare terminally differing strings" 100000
(string-op short-latin1-string short-latin1-string-diff-at-end))
(benchmark "compare identical strings" 100000
(string-op short-latin1-string short-latin1-string))
(benchmark "case compare initially differing strings" 100000
(string-ci-op short-latin1-string short-latin1-string-diff-at-start))
(benchmark "case compare medially differing strings" 100000
(string-ci-op short-latin1-string short-latin1-string-diff-in-middle))
(benchmark "case compare terminally differing strings" 100000
(string-ci-op short-latin1-string short-latin1-string-diff-at-end))
(benchmark "case compare identical strings" 100000
(string-ci-op short-latin1-string short-latin1-string))
(benchmark "French Latin-1 locale compare initially differing strings" 100000
(string-fr-op short-latin1-string short-latin1-string-diff-at-start))
(benchmark "French Latin-1 locale compare medially differing strings" 100000
(string-fr-op short-latin1-string short-latin1-string-diff-in-middle))
(benchmark "French Latin-1 locale compare terminally differing strings" 100000
(string-fr-op short-latin1-string short-latin1-string-diff-at-end))
(benchmark "French Latin-1 locale compare identical strings" 100000
(string-fr-op short-latin1-string short-latin1-string))
(benchmark "French Latin-1 locale case compare initially differing strings" 100000
(string-fr-ci-op short-latin1-string short-latin1-string-diff-at-start))
(benchmark "French Latin-1 locale case compare medially differing strings" 100000
(string-fr-ci-op short-latin1-string short-latin1-string-diff-in-middle))
(benchmark "French Latin-1 locale case compare terminally differing strings" 100000
(string-fr-ci-op short-latin1-string short-latin1-string-diff-at-end))
(benchmark "French Latin-1 locale case compare identical strings" 100000
(string-fr-ci-op short-latin1-string short-latin1-string))
(benchmark "French UTF-8 locale compare initially differing strings" 100000
(string-fr-utf8-op short-latin1-string short-latin1-string-diff-at-start))
(benchmark "French UTF-8 locale compare medially differing strings" 100000
(string-fr-utf8-op short-latin1-string short-latin1-string-diff-in-middle))
(benchmark "French UTF-8 locale compare terminally differing strings" 100000
(string-fr-utf8-op short-latin1-string short-latin1-string-diff-at-end))
(benchmark "French UTF-8 locale compare identical strings" 100000
(string-fr-utf8-op short-latin1-string short-latin1-string))
(benchmark "French UTF-8 locale case compare initially differing strings" 100000
(string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-at-start))
(benchmark "French UTF-8 locale case compare medially differing strings" 100000
(string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-in-middle))
(benchmark "French UTF-8 locale case compare terminally differing strings" 100000
(string-fr-utf8-ci-op short-latin1-string short-latin1-string-diff-at-end))
(benchmark "French UTF-8 locale case compare identical strings" 100000
(string-fr-utf8-ci-op short-latin1-string short-latin1-string)))
(with-benchmark-prefix "medium Latin1"
(benchmark "compare initially differing strings" 10000
(string-op medium-latin1-string medium-latin1-string-diff-at-start))
(benchmark "compare medially differing strings" 10000
(string-op medium-latin1-string medium-latin1-string-diff-in-middle))
(benchmark "compare terminally differing strings" 10000
(string-op medium-latin1-string medium-latin1-string-diff-at-end))
(benchmark "compare identical strings" 10000
(string-op medium-latin1-string medium-latin1-string))
(benchmark "case compare initially differing strings" 10000
(string-ci-op medium-latin1-string medium-latin1-string-diff-at-start))
(benchmark "case compare medially differing strings" 10000
(string-ci-op medium-latin1-string medium-latin1-string-diff-in-middle))
(benchmark "case compare terminally differing strings" 10000
(string-ci-op medium-latin1-string medium-latin1-string-diff-at-end))
(benchmark "case compare identical strings" 10000
(string-ci-op medium-latin1-string medium-latin1-string))
(benchmark "French Latin-1 locale compare initially differing strings" 10000
(string-fr-op medium-latin1-string medium-latin1-string-diff-at-start))
(benchmark "French Latin-1 locale compare medially differing strings" 10000
(string-fr-op medium-latin1-string medium-latin1-string-diff-in-middle))
(benchmark "French Latin-1 locale compare terminally differing strings" 10000
(string-fr-op medium-latin1-string medium-latin1-string-diff-at-end))
(benchmark "French Latin-1 locale compare identical strings" 10000
(string-fr-op medium-latin1-string medium-latin1-string))
(benchmark "French Latin-1 locale case compare initially differing strings" 10000
(string-fr-ci-op medium-latin1-string medium-latin1-string-diff-at-start))
(benchmark "French Latin-1 locale case compare medially differing strings" 10000
(string-fr-ci-op medium-latin1-string medium-latin1-string-diff-in-middle))
(benchmark "French Latin-1 locale case compare terminally differing strings" 10000
(string-fr-ci-op medium-latin1-string medium-latin1-string-diff-at-end))
(benchmark "French Latin-1 locale case compare identical strings" 10000
(string-fr-ci-op medium-latin1-string medium-latin1-string))
(benchmark "French UTF-8 locale compare initially differing strings" 10000
(string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-at-start))
(benchmark "French UTF-8 locale compare medially differing strings" 10000
(string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-in-middle))
(benchmark "French UTF-8 locale compare terminally differing strings" 10000
(string-fr-utf8-op medium-latin1-string medium-latin1-string-diff-at-end))
(benchmark "French UTF-8 locale compare identical strings" 10000
(string-fr-utf8-op medium-latin1-string medium-latin1-string))
(benchmark "French UTF-8 locale case compare initially differing strings" 10000
(string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-at-start))
(benchmark "French UTF-8 locale case compare medially differing strings" 10000
(string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-in-middle))
(benchmark "French UTF-8 locale case compare terminally differing strings" 10000
(string-fr-utf8-ci-op medium-latin1-string medium-latin1-string-diff-at-end))
(benchmark "French UTF-8 locale case compare identical strings" 10000
(string-fr-utf8-ci-op medium-latin1-string medium-latin1-string)))
(with-benchmark-prefix "long Latin1"
(benchmark "compare initially differing strings" 1000
(string-op long-latin1-string long-latin1-string-diff-at-start))
(benchmark "compare medially differing strings" 1000
(string-op long-latin1-string long-latin1-string-diff-in-middle))
(benchmark "compare terminally differing strings" 1000
(string-op long-latin1-string long-latin1-string-diff-at-end))
(benchmark "compare identical strings" 1000
(string-op long-latin1-string long-latin1-string))
(benchmark "case compare initially differing strings" 1000
(string-ci-op long-latin1-string long-latin1-string-diff-at-start))
(benchmark "case compare medially differing strings" 1000
(string-ci-op long-latin1-string long-latin1-string-diff-in-middle))
(benchmark "case compare terminally differing strings" 1000
(string-ci-op long-latin1-string long-latin1-string-diff-at-end))
(benchmark "case compare identical strings" 1000
(string-ci-op long-latin1-string long-latin1-string))
(benchmark "French Latin-1 locale compare initially differing strings" 1000
(string-fr-op long-latin1-string long-latin1-string-diff-at-start))
(benchmark "French Latin-1 locale compare medially differing strings" 1000
(string-fr-op long-latin1-string long-latin1-string-diff-in-middle))
(benchmark "French Latin-1 locale compare terminally differing strings" 1000
(string-fr-op long-latin1-string long-latin1-string-diff-at-end))
(benchmark "French Latin-1 locale compare identical strings" 1000
(string-fr-op long-latin1-string long-latin1-string))
(benchmark "French Latin-1 locale case compare initially differing strings" 1000
(string-fr-ci-op long-latin1-string long-latin1-string-diff-at-start))
(benchmark "French Latin-1 locale case compare medially differing strings" 1000
(string-fr-ci-op long-latin1-string long-latin1-string-diff-in-middle))
(benchmark "French Latin-1 locale case compare terminally differing strings" 1000
(string-fr-ci-op long-latin1-string long-latin1-string-diff-at-end))
(benchmark "French Latin-1 locale case compare identical strings" 1000
(string-fr-ci-op long-latin1-string long-latin1-string))
(benchmark "French UTF-8 locale compare initially differing strings" 1000
(string-fr-utf8-op long-latin1-string long-latin1-string-diff-at-start))
(benchmark "French UTF-8 locale compare medially differing strings" 1000
(string-fr-utf8-op long-latin1-string long-latin1-string-diff-in-middle))
(benchmark "French UTF-8 locale compare terminally differing strings" 1000
(string-fr-utf8-op long-latin1-string long-latin1-string-diff-at-end))
(benchmark "French UTF-8 locale compare identical strings" 1000
(string-fr-utf8-op long-latin1-string long-latin1-string))
(benchmark "French UTF-8 locale case compare initially differing strings" 1000
(string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-at-start))
(benchmark "French UTF-8 locale case compare medially differing strings" 1000
(string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-in-middle))
(benchmark "French UTF-8 locale case compare terminally differing strings" 1000
(string-fr-utf8-ci-op long-latin1-string long-latin1-string-diff-at-end))
(benchmark "French UTF-8 locale case compare identical strings" 1000
(string-fr-utf8-ci-op long-latin1-string long-latin1-string)))
(with-benchmark-prefix "short Unicode"
(benchmark "compare initially differing strings" 100000
(string-op short-cased-string short-cased-string-diff-at-start))
(benchmark "compare medially differing strings" 100000
(string-op short-cased-string short-cased-string-diff-in-middle))
(benchmark "compare terminally differing strings" 100000
(string-op short-cased-string short-cased-string-diff-at-end))
(benchmark "compare identical strings" 100000
(string-op short-cased-string short-cased-string))
(benchmark "case compare initially differing strings" 100000
(string-ci-op short-cased-string short-cased-string-diff-at-start))
(benchmark "case compare medially differing strings" 100000
(string-ci-op short-cased-string short-cased-string-diff-in-middle))
(benchmark "case compare terminally differing strings" 100000
(string-ci-op short-cased-string short-cased-string-diff-at-end))
(benchmark "case compare identical strings" 100000
(string-ci-op short-cased-string short-cased-string))
(benchmark "French UTF-8 locale compare initially differing strings" 100000
(string-fr-utf8-op short-cased-string short-cased-string-diff-at-start))
(benchmark "French UTF-8 locale compare medially differing strings" 100000
(string-fr-utf8-op short-cased-string short-cased-string-diff-in-middle))
(benchmark "French UTF-8 locale compare terminally differing strings" 100000
(string-fr-utf8-op short-cased-string short-cased-string-diff-at-end))
(benchmark "French UTF-8 locale compare identical strings" 100000
(string-fr-utf8-op short-cased-string short-cased-string))
(benchmark "French UTF-8 locale case compare initially differing strings" 100000
(string-fr-utf8-ci-op short-cased-string short-cased-string-diff-at-start))
(benchmark "French UTF-8 locale case compare medially differing strings" 100000
(string-fr-utf8-ci-op short-cased-string short-cased-string-diff-in-middle))
(benchmark "French UTF-8 locale case compare terminally differing strings" 100000
(string-fr-utf8-ci-op short-cased-string short-cased-string-diff-at-end))
(benchmark "French UTF-8 locale case compare identical strings" 100000
(string-fr-utf8-ci-op short-cased-string short-cased-string)))
(with-benchmark-prefix "medium Unicode"
(benchmark "compare initially differing strings" 10000
(string-op medium-cased-string medium-cased-string-diff-at-start))
(benchmark "compare medially differing strings" 10000
(string-op medium-cased-string medium-cased-string-diff-in-middle))
(benchmark "compare terminally differing strings" 10000
(string-op medium-cased-string medium-cased-string-diff-at-end))
(benchmark "compare identical strings" 10000
(string-op medium-cased-string medium-cased-string))
(benchmark "case compare initially differing strings" 10000
(string-ci-op medium-cased-string medium-cased-string-diff-at-start))
(benchmark "case compare medially differing strings" 10000
(string-ci-op medium-cased-string medium-cased-string-diff-in-middle))
(benchmark "case compare terminally differing strings" 10000
(string-ci-op medium-cased-string medium-cased-string-diff-at-end))
(benchmark "case compare identical strings" 10000
(string-ci-op medium-cased-string medium-cased-string))
(benchmark "French UTF-8 locale compare initially differing strings" 10000
(string-fr-utf8-op medium-cased-string medium-cased-string-diff-at-start))
(benchmark "French UTF-8 locale compare medially differing strings" 10000
(string-fr-utf8-op medium-cased-string medium-cased-string-diff-in-middle))
(benchmark "French UTF-8 locale compare terminally differing strings" 10000
(string-fr-utf8-op medium-cased-string medium-cased-string-diff-at-end))
(benchmark "French UTF-8 locale compare identical strings" 10000
(string-fr-utf8-op medium-cased-string medium-cased-string))
(benchmark "French UTF-8 locale case compare initially differing strings" 10000
(string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-at-start))
(benchmark "French UTF-8 locale case compare medially differing strings" 10000
(string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-in-middle))
(benchmark "French UTF-8 locale case compare terminally differing strings" 10000
(string-fr-utf8-ci-op medium-cased-string medium-cased-string-diff-at-end))
(benchmark "French UTF-8 locale case compare identical strings" 10000
(string-fr-utf8-ci-op medium-cased-string medium-cased-string)))
(with-benchmark-prefix "long Unicode"
(benchmark "compare initially differing strings" 1000
(string-op long-cased-string long-cased-string-diff-at-start))
(benchmark "compare medially differing strings" 1000
(string-op long-cased-string long-cased-string-diff-in-middle))
(benchmark "compare terminally differing strings" 1000
(string-op long-cased-string long-cased-string-diff-at-end))
(benchmark "compare identical strings" 1000
(string-op long-cased-string long-cased-string))
(benchmark "case compare initially differing strings" 1000
(string-ci-op long-cased-string long-cased-string-diff-at-start))
(benchmark "case compare medially differing strings" 1000
(string-ci-op long-cased-string long-cased-string-diff-in-middle))
(benchmark "case compare terminally differing strings" 1000
(string-ci-op long-cased-string long-cased-string-diff-at-end))
(benchmark "case compare identical strings" 1000
(string-ci-op long-cased-string long-cased-string))
(benchmark "French UTF-8 locale compare initially differing strings" 1000
(string-fr-utf8-op long-cased-string long-cased-string-diff-at-start))
(benchmark "French UTF-8 locale compare medially differing strings" 1000
(string-fr-utf8-op long-cased-string long-cased-string-diff-in-middle))
(benchmark "French UTF-8 locale compare terminally differing strings" 1000
(string-fr-utf8-op long-cased-string long-cased-string-diff-at-end))
(benchmark "French UTF-8 locale compare identical strings" 1000
(string-fr-utf8-op long-cased-string long-cased-string))
(benchmark "French UTF-8 locale case compare initially differing strings" 1000
(string-fr-utf8-ci-op long-cased-string long-cased-string-diff-at-start))
(benchmark "French UTF-8 locale case compare medially differing strings" 1000
(string-fr-utf8-ci-op long-cased-string long-cased-string-diff-in-middle))
(benchmark "French UTF-8 locale case compare terminally differing strings" 1000
(string-fr-utf8-ci-op long-cased-string long-cased-string-diff-at-end))
(benchmark "French UTF-8 locale case compare identical strings" 1000
(string-fr-utf8-ci-op long-cased-string long-cased-string))))