mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-29 19:30:36 +02:00
Use Gnulib's `regex' module.
This should help with regex portability, as reported in <http://bugs.gnu.org/10684> for Darwin 8.11. * m4/gnulib-cache.m4 (gl_MODULES): Add `regex'. * configure.ac: Remove header checks for regex.h, rxposix.h, and rx/rxposix.h. Remove check for the `regcomp' function. Remove definition of `HAVE_REGCOMP'. Define `ENABLE_REGEX'. * libguile/init.c: Check for `ENABLE_REGEX' instead of `HAVE_REGCOMP'. * libguile/regex-posix.c: Always include <regex.h>. Remove #ifdefs for rxposix.h and co.
This commit is contained in:
parent
1ba05158eb
commit
eb4a14ed47
40 changed files with 16040 additions and 43 deletions
184
lib/Makefile.am
184
lib/Makefile.am
|
@ -21,7 +21,7 @@
|
|||
# the same distribution terms as the rest of that program.
|
||||
#
|
||||
# Generated by gnulib-tool.
|
||||
# Reproduce by: gnulib-tool --import --dir=. --local-dir=gnulib-local --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl=3 --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alignof alloca-opt announce-gen autobuild bind byteswap canonicalize-lgpl ceil close connect dirfd duplocale environ extensions flock floor fpieee frexp full-read full-write func gendocs getaddrinfo getpeername getsockname getsockopt git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload havelib iconv_open-utf inet_ntop inet_pton isinf isnan ldexp lib-symbol-versions lib-symbol-visibility libunistring listen localcharset locale log1p maintainer-makefile malloc-gnu malloca nproc open pipe2 putenv recv recvfrom rename send sendto setenv setsockopt shutdown socket stat-time stdlib strftime striconveh string sys_stat trunc verify vsnprintf warnings wchar
|
||||
# Reproduce by: gnulib-tool --import --dir=. --local-dir=gnulib-local --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl=3 --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alignof alloca-opt announce-gen autobuild bind byteswap canonicalize-lgpl ceil close connect dirfd duplocale environ extensions flock floor fpieee frexp full-read full-write func gendocs getaddrinfo getpeername getsockname getsockopt git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload havelib iconv_open-utf inet_ntop inet_pton isinf isnan ldexp lib-symbol-versions lib-symbol-visibility libunistring listen localcharset locale log1p maintainer-makefile malloc-gnu malloca nproc open pipe2 putenv recv recvfrom regex rename send sendto setenv setsockopt shutdown socket stat-time stdlib strftime striconveh string sys_stat trunc verify vsnprintf warnings wchar
|
||||
|
||||
AUTOMAKE_OPTIONS = 1.5 gnits subdir-objects
|
||||
|
||||
|
@ -165,6 +165,15 @@ EXTRA_libgnu_la_SOURCES += bind.c
|
|||
|
||||
## end gnulib module bind
|
||||
|
||||
## begin gnulib module btowc
|
||||
|
||||
|
||||
EXTRA_DIST += btowc.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += btowc.c
|
||||
|
||||
## end gnulib module btowc
|
||||
|
||||
## begin gnulib module byteswap
|
||||
|
||||
BUILT_SOURCES += $(BYTESWAP_H)
|
||||
|
@ -755,6 +764,39 @@ EXTRA_libgnu_la_SOURCES += isnan.c isnanl.c
|
|||
|
||||
## end gnulib module isnanl
|
||||
|
||||
## begin gnulib module langinfo
|
||||
|
||||
BUILT_SOURCES += langinfo.h
|
||||
|
||||
# We need the following in order to create an empty placeholder for
|
||||
# <langinfo.h> when the system doesn't have one.
|
||||
langinfo.h: langinfo.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H)
|
||||
$(AM_V_GEN)rm -f $@-t $@ && \
|
||||
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
|
||||
sed -e 's|@''GUARD_PREFIX''@|GL|g' \
|
||||
-e 's|@''HAVE_LANGINFO_H''@|$(HAVE_LANGINFO_H)|g' \
|
||||
-e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
|
||||
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
|
||||
-e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
|
||||
-e 's|@''NEXT_LANGINFO_H''@|$(NEXT_LANGINFO_H)|g' \
|
||||
-e 's/@''GNULIB_NL_LANGINFO''@/$(GNULIB_NL_LANGINFO)/g' \
|
||||
-e 's|@''HAVE_LANGINFO_CODESET''@|$(HAVE_LANGINFO_CODESET)|g' \
|
||||
-e 's|@''HAVE_LANGINFO_T_FMT_AMPM''@|$(HAVE_LANGINFO_T_FMT_AMPM)|g' \
|
||||
-e 's|@''HAVE_LANGINFO_ERA''@|$(HAVE_LANGINFO_ERA)|g' \
|
||||
-e 's|@''HAVE_LANGINFO_YESEXPR''@|$(HAVE_LANGINFO_YESEXPR)|g' \
|
||||
-e 's|@''HAVE_NL_LANGINFO''@|$(HAVE_NL_LANGINFO)|g' \
|
||||
-e 's|@''REPLACE_NL_LANGINFO''@|$(REPLACE_NL_LANGINFO)|g' \
|
||||
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
|
||||
-e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \
|
||||
< $(srcdir)/langinfo.in.h; \
|
||||
} > $@-t && \
|
||||
mv $@-t $@
|
||||
MOSTLYCLEANFILES += langinfo.h langinfo.h-t
|
||||
|
||||
EXTRA_DIST += langinfo.in.h
|
||||
|
||||
## end gnulib module langinfo
|
||||
|
||||
## begin gnulib module lib-symbol-visibility
|
||||
|
||||
# The value of $(CFLAG_VISIBILITY) needs to be added to the CFLAGS for the
|
||||
|
@ -1106,6 +1148,33 @@ EXTRA_DIST += math.in.h
|
|||
|
||||
## end gnulib module math
|
||||
|
||||
## begin gnulib module mbrtowc
|
||||
|
||||
|
||||
EXTRA_DIST += mbrtowc.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += mbrtowc.c
|
||||
|
||||
## end gnulib module mbrtowc
|
||||
|
||||
## begin gnulib module mbsinit
|
||||
|
||||
|
||||
EXTRA_DIST += mbsinit.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += mbsinit.c
|
||||
|
||||
## end gnulib module mbsinit
|
||||
|
||||
## begin gnulib module mbtowc
|
||||
|
||||
|
||||
EXTRA_DIST += mbtowc-impl.h mbtowc.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += mbtowc.c
|
||||
|
||||
## end gnulib module mbtowc
|
||||
|
||||
## begin gnulib module memchr
|
||||
|
||||
|
||||
|
@ -1198,6 +1267,15 @@ EXTRA_DIST += netinet_in.in.h
|
|||
|
||||
## end gnulib module netinet_in
|
||||
|
||||
## begin gnulib module nl_langinfo
|
||||
|
||||
|
||||
EXTRA_DIST += nl_langinfo.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += nl_langinfo.c
|
||||
|
||||
## end gnulib module nl_langinfo
|
||||
|
||||
## begin gnulib module nproc
|
||||
|
||||
libgnu_la_SOURCES += nproc.c
|
||||
|
@ -1282,6 +1360,15 @@ EXTRA_libgnu_la_SOURCES += recvfrom.c
|
|||
|
||||
## end gnulib module recvfrom
|
||||
|
||||
## begin gnulib module regex
|
||||
|
||||
|
||||
EXTRA_DIST += regcomp.c regex.c regex.h regex_internal.c regex_internal.h regexec.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += regcomp.c regex.c regex_internal.c regexec.c
|
||||
|
||||
## end gnulib module regex
|
||||
|
||||
## begin gnulib module rename
|
||||
|
||||
|
||||
|
@ -1921,6 +2008,22 @@ EXTRA_DIST += stdlib.in.h
|
|||
|
||||
## end gnulib module stdlib
|
||||
|
||||
## begin gnulib module strcase
|
||||
|
||||
|
||||
EXTRA_DIST += strcasecmp.c strncasecmp.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += strcasecmp.c strncasecmp.c
|
||||
|
||||
## end gnulib module strcase
|
||||
|
||||
## begin gnulib module streq
|
||||
|
||||
|
||||
EXTRA_DIST += streq.h
|
||||
|
||||
## end gnulib module streq
|
||||
|
||||
## begin gnulib module strftime
|
||||
|
||||
libgnu_la_SOURCES += strftime.c
|
||||
|
@ -2040,6 +2143,37 @@ EXTRA_DIST += string.in.h
|
|||
|
||||
## end gnulib module string
|
||||
|
||||
## begin gnulib module strings
|
||||
|
||||
BUILT_SOURCES += strings.h
|
||||
|
||||
# We need the following in order to create <strings.h> when the system
|
||||
# doesn't have one that works with the given compiler.
|
||||
strings.h: strings.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) $(ARG_NONNULL_H)
|
||||
$(AM_V_GEN)rm -f $@-t $@ && \
|
||||
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
|
||||
sed -e 's|@''GUARD_PREFIX''@|GL|g' \
|
||||
-e 's|@''HAVE_STRINGS_H''@|$(HAVE_STRINGS_H)|g' \
|
||||
-e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
|
||||
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
|
||||
-e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
|
||||
-e 's|@''NEXT_STRINGS_H''@|$(NEXT_STRINGS_H)|g' \
|
||||
-e 's|@''GNULIB_FFS''@|$(GNULIB_FFS)|g' \
|
||||
-e 's|@''HAVE_FFS''@|$(HAVE_FFS)|g' \
|
||||
-e 's|@''HAVE_STRCASECMP''@|$(HAVE_STRCASECMP)|g' \
|
||||
-e 's|@''HAVE_DECL_STRNCASECMP''@|$(HAVE_DECL_STRNCASECMP)|g' \
|
||||
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
|
||||
-e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \
|
||||
-e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \
|
||||
< $(srcdir)/strings.in.h; \
|
||||
} > $@-t && \
|
||||
mv $@-t $@
|
||||
MOSTLYCLEANFILES += strings.h strings.h-t
|
||||
|
||||
EXTRA_DIST += strings.in.h
|
||||
|
||||
## end gnulib module strings
|
||||
|
||||
## begin gnulib module sys_file
|
||||
|
||||
BUILT_SOURCES += sys/file.h
|
||||
|
@ -2704,6 +2838,54 @@ EXTRA_DIST += wchar.in.h
|
|||
|
||||
## end gnulib module wchar
|
||||
|
||||
## begin gnulib module wcrtomb
|
||||
|
||||
|
||||
EXTRA_DIST += wcrtomb.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += wcrtomb.c
|
||||
|
||||
## end gnulib module wcrtomb
|
||||
|
||||
## begin gnulib module wctype-h
|
||||
|
||||
BUILT_SOURCES += wctype.h
|
||||
|
||||
# We need the following in order to create <wctype.h> when the system
|
||||
# doesn't have one that works with the given compiler.
|
||||
wctype.h: wctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H)
|
||||
$(AM_V_GEN)rm -f $@-t $@ && \
|
||||
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
|
||||
sed -e 's|@''GUARD_PREFIX''@|GL|g' \
|
||||
-e 's/@''HAVE_WCTYPE_H''@/$(HAVE_WCTYPE_H)/g' \
|
||||
-e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
|
||||
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
|
||||
-e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
|
||||
-e 's|@''NEXT_WCTYPE_H''@|$(NEXT_WCTYPE_H)|g' \
|
||||
-e 's/@''GNULIB_ISWBLANK''@/$(GNULIB_ISWBLANK)/g' \
|
||||
-e 's/@''GNULIB_WCTYPE''@/$(GNULIB_WCTYPE)/g' \
|
||||
-e 's/@''GNULIB_ISWCTYPE''@/$(GNULIB_ISWCTYPE)/g' \
|
||||
-e 's/@''GNULIB_WCTRANS''@/$(GNULIB_WCTRANS)/g' \
|
||||
-e 's/@''GNULIB_TOWCTRANS''@/$(GNULIB_TOWCTRANS)/g' \
|
||||
-e 's/@''HAVE_ISWBLANK''@/$(HAVE_ISWBLANK)/g' \
|
||||
-e 's/@''HAVE_ISWCNTRL''@/$(HAVE_ISWCNTRL)/g' \
|
||||
-e 's/@''HAVE_WCTYPE_T''@/$(HAVE_WCTYPE_T)/g' \
|
||||
-e 's/@''HAVE_WCTRANS_T''@/$(HAVE_WCTRANS_T)/g' \
|
||||
-e 's/@''HAVE_WINT_T''@/$(HAVE_WINT_T)/g' \
|
||||
-e 's/@''REPLACE_ISWBLANK''@/$(REPLACE_ISWBLANK)/g' \
|
||||
-e 's/@''REPLACE_ISWCNTRL''@/$(REPLACE_ISWCNTRL)/g' \
|
||||
-e 's/@''REPLACE_TOWLOWER''@/$(REPLACE_TOWLOWER)/g' \
|
||||
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
|
||||
-e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \
|
||||
< $(srcdir)/wctype.in.h; \
|
||||
} > $@-t && \
|
||||
mv $@-t $@
|
||||
MOSTLYCLEANFILES += wctype.h wctype.h-t
|
||||
|
||||
EXTRA_DIST += wctype.in.h
|
||||
|
||||
## end gnulib module wctype-h
|
||||
|
||||
## begin gnulib module write
|
||||
|
||||
|
||||
|
|
39
lib/btowc.c
Normal file
39
lib/btowc.c
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* Convert unibyte character to wide character.
|
||||
Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2008.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <wchar.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
wint_t
|
||||
btowc (int c)
|
||||
{
|
||||
if (c != EOF)
|
||||
{
|
||||
char buf[1];
|
||||
wchar_t wc;
|
||||
|
||||
buf[0] = c;
|
||||
if (mbtowc (&wc, buf, 1) >= 0)
|
||||
return wc;
|
||||
}
|
||||
return WEOF;
|
||||
}
|
177
lib/langinfo.in.h
Normal file
177
lib/langinfo.in.h
Normal file
|
@ -0,0 +1,177 @@
|
|||
/* Substitute for and wrapper around <langinfo.h>.
|
||||
Copyright (C) 2009-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
/*
|
||||
* POSIX <langinfo.h> for platforms that lack it or have an incomplete one.
|
||||
* <http://www.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>
|
||||
*/
|
||||
|
||||
#ifndef _@GUARD_PREFIX@_LANGINFO_H
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
@PRAGMA_SYSTEM_HEADER@
|
||||
#endif
|
||||
@PRAGMA_COLUMNS@
|
||||
|
||||
/* The include_next requires a split double-inclusion guard. */
|
||||
#if @HAVE_LANGINFO_H@
|
||||
# @INCLUDE_NEXT@ @NEXT_LANGINFO_H@
|
||||
#endif
|
||||
|
||||
#ifndef _@GUARD_PREFIX@_LANGINFO_H
|
||||
#define _@GUARD_PREFIX@_LANGINFO_H
|
||||
|
||||
|
||||
#if !@HAVE_LANGINFO_H@
|
||||
|
||||
/* A platform that lacks <langinfo.h>. */
|
||||
|
||||
/* Assume that it also lacks <nl_types.h> and the nl_item type. */
|
||||
# if !GNULIB_defined_nl_item
|
||||
typedef int nl_item;
|
||||
# define GNULIB_defined_nl_item 1
|
||||
# endif
|
||||
|
||||
/* nl_langinfo items of the LC_CTYPE category */
|
||||
# define CODESET 10000
|
||||
/* nl_langinfo items of the LC_NUMERIC category */
|
||||
# define RADIXCHAR 10001
|
||||
# define THOUSEP 10002
|
||||
/* nl_langinfo items of the LC_TIME category */
|
||||
# define D_T_FMT 10003
|
||||
# define D_FMT 10004
|
||||
# define T_FMT 10005
|
||||
# define T_FMT_AMPM 10006
|
||||
# define AM_STR 10007
|
||||
# define PM_STR 10008
|
||||
# define DAY_1 10009
|
||||
# define DAY_2 (DAY_1 + 1)
|
||||
# define DAY_3 (DAY_1 + 2)
|
||||
# define DAY_4 (DAY_1 + 3)
|
||||
# define DAY_5 (DAY_1 + 4)
|
||||
# define DAY_6 (DAY_1 + 5)
|
||||
# define DAY_7 (DAY_1 + 6)
|
||||
# define ABDAY_1 10016
|
||||
# define ABDAY_2 (ABDAY_1 + 1)
|
||||
# define ABDAY_3 (ABDAY_1 + 2)
|
||||
# define ABDAY_4 (ABDAY_1 + 3)
|
||||
# define ABDAY_5 (ABDAY_1 + 4)
|
||||
# define ABDAY_6 (ABDAY_1 + 5)
|
||||
# define ABDAY_7 (ABDAY_1 + 6)
|
||||
# define MON_1 10023
|
||||
# define MON_2 (MON_1 + 1)
|
||||
# define MON_3 (MON_1 + 2)
|
||||
# define MON_4 (MON_1 + 3)
|
||||
# define MON_5 (MON_1 + 4)
|
||||
# define MON_6 (MON_1 + 5)
|
||||
# define MON_7 (MON_1 + 6)
|
||||
# define MON_8 (MON_1 + 7)
|
||||
# define MON_9 (MON_1 + 8)
|
||||
# define MON_10 (MON_1 + 9)
|
||||
# define MON_11 (MON_1 + 10)
|
||||
# define MON_12 (MON_1 + 11)
|
||||
# define ABMON_1 10035
|
||||
# define ABMON_2 (ABMON_1 + 1)
|
||||
# define ABMON_3 (ABMON_1 + 2)
|
||||
# define ABMON_4 (ABMON_1 + 3)
|
||||
# define ABMON_5 (ABMON_1 + 4)
|
||||
# define ABMON_6 (ABMON_1 + 5)
|
||||
# define ABMON_7 (ABMON_1 + 6)
|
||||
# define ABMON_8 (ABMON_1 + 7)
|
||||
# define ABMON_9 (ABMON_1 + 8)
|
||||
# define ABMON_10 (ABMON_1 + 9)
|
||||
# define ABMON_11 (ABMON_1 + 10)
|
||||
# define ABMON_12 (ABMON_1 + 11)
|
||||
# define ERA 10047
|
||||
# define ERA_D_FMT 10048
|
||||
# define ERA_D_T_FMT 10049
|
||||
# define ERA_T_FMT 10050
|
||||
# define ALT_DIGITS 10051
|
||||
/* nl_langinfo items of the LC_MONETARY category */
|
||||
# define CRNCYSTR 10052
|
||||
/* nl_langinfo items of the LC_MESSAGES category */
|
||||
# define YESEXPR 10053
|
||||
# define NOEXPR 10054
|
||||
|
||||
#else
|
||||
|
||||
/* A platform that has <langinfo.h>. */
|
||||
|
||||
# if !@HAVE_LANGINFO_CODESET@
|
||||
# define CODESET 10000
|
||||
# define GNULIB_defined_CODESET 1
|
||||
# endif
|
||||
|
||||
# if !@HAVE_LANGINFO_T_FMT_AMPM@
|
||||
# define T_FMT_AMPM 10006
|
||||
# define GNULIB_defined_T_FMT_AMPM 1
|
||||
# endif
|
||||
|
||||
# if !@HAVE_LANGINFO_ERA@
|
||||
# define ERA 10047
|
||||
# define ERA_D_FMT 10048
|
||||
# define ERA_D_T_FMT 10049
|
||||
# define ERA_T_FMT 10050
|
||||
# define ALT_DIGITS 10051
|
||||
# define GNULIB_defined_ERA 1
|
||||
# endif
|
||||
|
||||
# if !@HAVE_LANGINFO_YESEXPR@
|
||||
# define YESEXPR 10053
|
||||
# define NOEXPR 10054
|
||||
# define GNULIB_defined_YESEXPR 1
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */
|
||||
|
||||
/* The definition of _GL_WARN_ON_USE is copied here. */
|
||||
|
||||
/* Declare overridden functions. */
|
||||
|
||||
|
||||
/* Return a piece of locale dependent information.
|
||||
Note: The difference between nl_langinfo (CODESET) and locale_charset ()
|
||||
is that the latter normalizes the encoding names to GNU conventions. */
|
||||
|
||||
#if @GNULIB_NL_LANGINFO@
|
||||
# if @REPLACE_NL_LANGINFO@
|
||||
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
|
||||
# undef nl_langinfo
|
||||
# define nl_langinfo rpl_nl_langinfo
|
||||
# endif
|
||||
_GL_FUNCDECL_RPL (nl_langinfo, char *, (nl_item item));
|
||||
_GL_CXXALIAS_RPL (nl_langinfo, char *, (nl_item item));
|
||||
# else
|
||||
# if !@HAVE_NL_LANGINFO@
|
||||
_GL_FUNCDECL_SYS (nl_langinfo, char *, (nl_item item));
|
||||
# endif
|
||||
_GL_CXXALIAS_SYS (nl_langinfo, char *, (nl_item item));
|
||||
# endif
|
||||
_GL_CXXALIASWARN (nl_langinfo);
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef nl_langinfo
|
||||
# if HAVE_RAW_DECL_NL_LANGINFO
|
||||
_GL_WARN_ON_USE (nl_langinfo, "nl_langinfo is not portable - "
|
||||
"use gnulib module nl_langinfo for portability");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _@GUARD_PREFIX@_LANGINFO_H */
|
||||
#endif /* _@GUARD_PREFIX@_LANGINFO_H */
|
396
lib/mbrtowc.c
Normal file
396
lib/mbrtowc.c
Normal file
|
@ -0,0 +1,396 @@
|
|||
/* Convert multibyte character to wide character.
|
||||
Copyright (C) 1999-2002, 2005-2012 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2008.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <wchar.h>
|
||||
|
||||
#if GNULIB_defined_mbstate_t
|
||||
/* Implement mbrtowc() on top of mbtowc(). */
|
||||
|
||||
# include <errno.h>
|
||||
# include <stdlib.h>
|
||||
|
||||
# include "localcharset.h"
|
||||
# include "streq.h"
|
||||
# include "verify.h"
|
||||
|
||||
|
||||
verify (sizeof (mbstate_t) >= 4);
|
||||
|
||||
static char internal_state[4];
|
||||
|
||||
size_t
|
||||
mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
char *pstate = (char *)ps;
|
||||
|
||||
if (s == NULL)
|
||||
{
|
||||
pwc = NULL;
|
||||
s = "";
|
||||
n = 1;
|
||||
}
|
||||
|
||||
if (n == 0)
|
||||
return (size_t)(-2);
|
||||
|
||||
/* Here n > 0. */
|
||||
|
||||
if (pstate == NULL)
|
||||
pstate = internal_state;
|
||||
|
||||
{
|
||||
size_t nstate = pstate[0];
|
||||
char buf[4];
|
||||
const char *p;
|
||||
size_t m;
|
||||
|
||||
switch (nstate)
|
||||
{
|
||||
case 0:
|
||||
p = s;
|
||||
m = n;
|
||||
break;
|
||||
case 3:
|
||||
buf[2] = pstate[3];
|
||||
/*FALLTHROUGH*/
|
||||
case 2:
|
||||
buf[1] = pstate[2];
|
||||
/*FALLTHROUGH*/
|
||||
case 1:
|
||||
buf[0] = pstate[1];
|
||||
p = buf;
|
||||
m = nstate;
|
||||
buf[m++] = s[0];
|
||||
if (n >= 2 && m < 4)
|
||||
{
|
||||
buf[m++] = s[1];
|
||||
if (n >= 3 && m < 4)
|
||||
buf[m++] = s[2];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
errno = EINVAL;
|
||||
return (size_t)(-1);
|
||||
}
|
||||
|
||||
/* Here m > 0. */
|
||||
|
||||
# if __GLIBC__ || defined __UCLIBC__
|
||||
/* Work around bug <http://sourceware.org/bugzilla/show_bug.cgi?id=9674> */
|
||||
mbtowc (NULL, NULL, 0);
|
||||
# endif
|
||||
{
|
||||
int res = mbtowc (pwc, p, m);
|
||||
|
||||
if (res >= 0)
|
||||
{
|
||||
if (pwc != NULL && ((*pwc == 0) != (res == 0)))
|
||||
abort ();
|
||||
if (nstate >= (res > 0 ? res : 1))
|
||||
abort ();
|
||||
res -= nstate;
|
||||
pstate[0] = 0;
|
||||
return res;
|
||||
}
|
||||
|
||||
/* mbtowc does not distinguish between invalid and incomplete multibyte
|
||||
sequences. But mbrtowc needs to make this distinction.
|
||||
There are two possible approaches:
|
||||
- Use iconv() and its return value.
|
||||
- Use built-in knowledge about the possible encodings.
|
||||
Given the low quality of implementation of iconv() on the systems that
|
||||
lack mbrtowc(), we use the second approach.
|
||||
The possible encodings are:
|
||||
- 8-bit encodings,
|
||||
- EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
|
||||
- UTF-8.
|
||||
Use specialized code for each. */
|
||||
if (m >= 4 || m >= MB_CUR_MAX)
|
||||
goto invalid;
|
||||
/* Here MB_CUR_MAX > 1 and 0 < m < 4. */
|
||||
{
|
||||
const char *encoding = locale_charset ();
|
||||
|
||||
if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
|
||||
{
|
||||
/* Cf. unistr/u8-mblen.c. */
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if (c >= 0xc2)
|
||||
{
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (m == 1)
|
||||
goto incomplete;
|
||||
}
|
||||
else if (c < 0xf0)
|
||||
{
|
||||
if (m == 1)
|
||||
goto incomplete;
|
||||
if (m == 2)
|
||||
{
|
||||
unsigned char c2 = (unsigned char) p[1];
|
||||
|
||||
if ((c2 ^ 0x80) < 0x40
|
||||
&& (c >= 0xe1 || c2 >= 0xa0)
|
||||
&& (c != 0xed || c2 < 0xa0))
|
||||
goto incomplete;
|
||||
}
|
||||
}
|
||||
else if (c <= 0xf4)
|
||||
{
|
||||
if (m == 1)
|
||||
goto incomplete;
|
||||
else /* m == 2 || m == 3 */
|
||||
{
|
||||
unsigned char c2 = (unsigned char) p[1];
|
||||
|
||||
if ((c2 ^ 0x80) < 0x40
|
||||
&& (c >= 0xf1 || c2 >= 0x90)
|
||||
&& (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
|
||||
{
|
||||
if (m == 2)
|
||||
goto incomplete;
|
||||
else /* m == 3 */
|
||||
{
|
||||
unsigned char c3 = (unsigned char) p[2];
|
||||
|
||||
if ((c3 ^ 0x80) < 0x40)
|
||||
goto incomplete;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
goto invalid;
|
||||
}
|
||||
|
||||
/* As a reference for this code, you can use the GNU libiconv
|
||||
implementation. Look for uses of the RET_TOOFEW macro. */
|
||||
|
||||
if (STREQ (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
|
||||
{
|
||||
if (m == 1)
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
|
||||
goto incomplete;
|
||||
}
|
||||
if (m == 2)
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if (c == 0x8f)
|
||||
{
|
||||
unsigned char c2 = (unsigned char) p[1];
|
||||
|
||||
if (c2 >= 0xa1 && c2 < 0xff)
|
||||
goto incomplete;
|
||||
}
|
||||
}
|
||||
goto invalid;
|
||||
}
|
||||
if (STREQ (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
|
||||
|| STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
|
||||
|| STREQ (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
|
||||
{
|
||||
if (m == 1)
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if (c >= 0xa1 && c < 0xff)
|
||||
goto incomplete;
|
||||
}
|
||||
goto invalid;
|
||||
}
|
||||
if (STREQ (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
|
||||
{
|
||||
if (m == 1)
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
|
||||
goto incomplete;
|
||||
}
|
||||
else /* m == 2 || m == 3 */
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if (c == 0x8e)
|
||||
goto incomplete;
|
||||
}
|
||||
goto invalid;
|
||||
}
|
||||
if (STREQ (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
|
||||
{
|
||||
if (m == 1)
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
|
||||
goto incomplete;
|
||||
}
|
||||
else /* m == 2 || m == 3 */
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if (c >= 0x90 && c <= 0xe3)
|
||||
{
|
||||
unsigned char c2 = (unsigned char) p[1];
|
||||
|
||||
if (c2 >= 0x30 && c2 <= 0x39)
|
||||
{
|
||||
if (m == 2)
|
||||
goto incomplete;
|
||||
else /* m == 3 */
|
||||
{
|
||||
unsigned char c3 = (unsigned char) p[2];
|
||||
|
||||
if (c3 >= 0x81 && c3 <= 0xfe)
|
||||
goto incomplete;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
goto invalid;
|
||||
}
|
||||
if (STREQ (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
|
||||
{
|
||||
if (m == 1)
|
||||
{
|
||||
unsigned char c = (unsigned char) p[0];
|
||||
|
||||
if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
|
||||
|| (c >= 0xf0 && c <= 0xf9))
|
||||
goto incomplete;
|
||||
}
|
||||
goto invalid;
|
||||
}
|
||||
|
||||
/* An unknown multibyte encoding. */
|
||||
goto incomplete;
|
||||
}
|
||||
|
||||
incomplete:
|
||||
{
|
||||
size_t k = nstate;
|
||||
/* Here 0 <= k < m < 4. */
|
||||
pstate[++k] = s[0];
|
||||
if (k < m)
|
||||
{
|
||||
pstate[++k] = s[1];
|
||||
if (k < m)
|
||||
pstate[++k] = s[2];
|
||||
}
|
||||
if (k != m)
|
||||
abort ();
|
||||
}
|
||||
pstate[0] = m;
|
||||
return (size_t)(-2);
|
||||
|
||||
invalid:
|
||||
errno = EILSEQ;
|
||||
/* The conversion state is undefined, says POSIX. */
|
||||
return (size_t)(-1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
/* Override the system's mbrtowc() function. */
|
||||
|
||||
# undef mbrtowc
|
||||
|
||||
size_t
|
||||
rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG
|
||||
if (s == NULL)
|
||||
{
|
||||
pwc = NULL;
|
||||
s = "";
|
||||
n = 1;
|
||||
}
|
||||
# endif
|
||||
|
||||
# if MBRTOWC_RETVAL_BUG
|
||||
{
|
||||
static mbstate_t internal_state;
|
||||
|
||||
/* Override mbrtowc's internal state. We cannot call mbsinit() on the
|
||||
hidden internal state, but we can call it on our variable. */
|
||||
if (ps == NULL)
|
||||
ps = &internal_state;
|
||||
|
||||
if (!mbsinit (ps))
|
||||
{
|
||||
/* Parse the rest of the multibyte character byte for byte. */
|
||||
size_t count = 0;
|
||||
for (; n > 0; s++, n--)
|
||||
{
|
||||
wchar_t wc;
|
||||
size_t ret = mbrtowc (&wc, s, 1, ps);
|
||||
|
||||
if (ret == (size_t)(-1))
|
||||
return (size_t)(-1);
|
||||
count++;
|
||||
if (ret != (size_t)(-2))
|
||||
{
|
||||
/* The multibyte character has been completed. */
|
||||
if (pwc != NULL)
|
||||
*pwc = wc;
|
||||
return (wc == 0 ? 0 : count);
|
||||
}
|
||||
}
|
||||
return (size_t)(-2);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
# if MBRTOWC_NUL_RETVAL_BUG
|
||||
{
|
||||
wchar_t wc;
|
||||
size_t ret = mbrtowc (&wc, s, n, ps);
|
||||
|
||||
if (ret != (size_t)(-1) && ret != (size_t)(-2))
|
||||
{
|
||||
if (pwc != NULL)
|
||||
*pwc = wc;
|
||||
if (wc == 0)
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
# else
|
||||
{
|
||||
# if MBRTOWC_NULL_ARG1_BUG
|
||||
wchar_t dummy;
|
||||
|
||||
if (pwc == NULL)
|
||||
pwc = &dummy;
|
||||
# endif
|
||||
|
||||
return mbrtowc (pwc, s, n, ps);
|
||||
}
|
||||
# endif
|
||||
}
|
||||
|
||||
#endif
|
61
lib/mbsinit.c
Normal file
61
lib/mbsinit.c
Normal file
|
@ -0,0 +1,61 @@
|
|||
/* Test for initial conversion state.
|
||||
Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2008.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <wchar.h>
|
||||
|
||||
#include "verify.h"
|
||||
|
||||
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
|
||||
|
||||
/* On native Windows, 'mbstate_t' is defined as 'int'. */
|
||||
|
||||
int
|
||||
mbsinit (const mbstate_t *ps)
|
||||
{
|
||||
return ps == NULL || *ps == 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Platforms that lack mbsinit() also lack mbrlen(), mbrtowc(), mbsrtowcs()
|
||||
and wcrtomb(), wcsrtombs().
|
||||
We assume that
|
||||
- sizeof (mbstate_t) >= 4,
|
||||
- only stateless encodings are supported (such as UTF-8 and EUC-JP, but
|
||||
not ISO-2022 variants),
|
||||
- for each encoding, the number of bytes for a wide character is <= 4.
|
||||
(This maximum is attained for UTF-8, GB18030, EUC-TW.)
|
||||
We define the meaning of mbstate_t as follows:
|
||||
- In mb -> wc direction, mbstate_t's first byte contains the number of
|
||||
buffered bytes (in the range 0..3), followed by up to 3 buffered bytes.
|
||||
- In wc -> mb direction, mbstate_t contains no information. In other
|
||||
words, it is always in the initial state. */
|
||||
|
||||
verify (sizeof (mbstate_t) >= 4);
|
||||
|
||||
int
|
||||
mbsinit (const mbstate_t *ps)
|
||||
{
|
||||
const char *pstate = (const char *)ps;
|
||||
|
||||
return pstate == NULL || pstate[0] == 0;
|
||||
}
|
||||
|
||||
#endif
|
44
lib/mbtowc-impl.h
Normal file
44
lib/mbtowc-impl.h
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* Convert multibyte character to wide character.
|
||||
Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2011.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* We don't need a static internal state, because the encoding is not state
|
||||
dependent, and when mbrtowc returns (size_t)(-2). we throw the result
|
||||
away. */
|
||||
|
||||
int
|
||||
mbtowc (wchar_t *pwc, const char *s, size_t n)
|
||||
{
|
||||
if (s == NULL)
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
mbstate_t state;
|
||||
wchar_t wc;
|
||||
size_t result;
|
||||
|
||||
memset (&state, 0, sizeof (mbstate_t));
|
||||
result = mbrtowc (&wc, s, n, &state);
|
||||
if (result == (size_t)-1 || result == (size_t)-2)
|
||||
{
|
||||
errno = EILSEQ;
|
||||
return -1;
|
||||
}
|
||||
if (pwc != NULL)
|
||||
*pwc = wc;
|
||||
return (wc == 0 ? 0 : result);
|
||||
}
|
||||
}
|
26
lib/mbtowc.c
Normal file
26
lib/mbtowc.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* Convert multibyte character to wide character.
|
||||
Copyright (C) 2011-2012 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2011.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <wchar.h>
|
||||
|
||||
#include "mbtowc-impl.h"
|
271
lib/nl_langinfo.c
Normal file
271
lib/nl_langinfo.c
Normal file
|
@ -0,0 +1,271 @@
|
|||
/* nl_langinfo() replacement: query locale dependent information.
|
||||
|
||||
Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <langinfo.h>
|
||||
|
||||
#if REPLACE_NL_LANGINFO
|
||||
|
||||
/* Override nl_langinfo with support for added nl_item values. */
|
||||
|
||||
# include <locale.h>
|
||||
# include <string.h>
|
||||
|
||||
# undef nl_langinfo
|
||||
|
||||
char *
|
||||
rpl_nl_langinfo (nl_item item)
|
||||
{
|
||||
switch (item)
|
||||
{
|
||||
# if GNULIB_defined_CODESET
|
||||
case CODESET:
|
||||
{
|
||||
const char *locale;
|
||||
static char buf[2 + 10 + 1];
|
||||
|
||||
locale = setlocale (LC_CTYPE, NULL);
|
||||
if (locale != NULL && locale[0] != '\0')
|
||||
{
|
||||
/* If the locale name contains an encoding after the dot, return
|
||||
it. */
|
||||
const char *dot = strchr (locale, '.');
|
||||
|
||||
if (dot != NULL)
|
||||
{
|
||||
const char *modifier;
|
||||
|
||||
dot++;
|
||||
/* Look for the possible @... trailer and remove it, if any. */
|
||||
modifier = strchr (dot, '@');
|
||||
if (modifier == NULL)
|
||||
return dot;
|
||||
if (modifier - dot < sizeof (buf))
|
||||
{
|
||||
memcpy (buf, dot, modifier - dot);
|
||||
buf [modifier - dot] = '\0';
|
||||
return buf;
|
||||
}
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
# endif
|
||||
# if GNULIB_defined_T_FMT_AMPM
|
||||
case T_FMT_AMPM:
|
||||
return "%I:%M:%S %p";
|
||||
# endif
|
||||
# if GNULIB_defined_ERA
|
||||
case ERA:
|
||||
/* The format is not standardized. In glibc it is a sequence of strings
|
||||
of the form "direction:offset:start_date:end_date:era_name:era_format"
|
||||
with an empty string at the end. */
|
||||
return "";
|
||||
case ERA_D_FMT:
|
||||
/* The %Ex conversion in strftime behaves like %x if the locale does not
|
||||
have an alternative time format. */
|
||||
item = D_FMT;
|
||||
break;
|
||||
case ERA_D_T_FMT:
|
||||
/* The %Ec conversion in strftime behaves like %c if the locale does not
|
||||
have an alternative time format. */
|
||||
item = D_T_FMT;
|
||||
break;
|
||||
case ERA_T_FMT:
|
||||
/* The %EX conversion in strftime behaves like %X if the locale does not
|
||||
have an alternative time format. */
|
||||
item = T_FMT;
|
||||
break;
|
||||
case ALT_DIGITS:
|
||||
/* The format is not standardized. In glibc it is a sequence of 10
|
||||
strings, appended in memory. */
|
||||
return "\0\0\0\0\0\0\0\0\0\0";
|
||||
# endif
|
||||
# if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
|
||||
case YESEXPR:
|
||||
return "^[yY]";
|
||||
case NOEXPR:
|
||||
return "^[nN]";
|
||||
# endif
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return nl_langinfo (item);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Provide nl_langinfo from scratch. */
|
||||
|
||||
# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
|
||||
|
||||
/* Native Windows platforms. */
|
||||
|
||||
# define WIN32_LEAN_AND_MEAN /* avoid including junk */
|
||||
# include <windows.h>
|
||||
|
||||
# include <stdio.h>
|
||||
|
||||
# else
|
||||
|
||||
/* An old Unix platform without locales, such as Linux libc5 or BeOS. */
|
||||
|
||||
# endif
|
||||
|
||||
# include <locale.h>
|
||||
|
||||
char *
|
||||
nl_langinfo (nl_item item)
|
||||
{
|
||||
switch (item)
|
||||
{
|
||||
/* nl_langinfo items of the LC_CTYPE category */
|
||||
case CODESET:
|
||||
# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
|
||||
{
|
||||
static char buf[2 + 10 + 1];
|
||||
|
||||
/* The Windows API has a function returning the locale's codepage as
|
||||
a number. */
|
||||
sprintf (buf, "CP%u", GetACP ());
|
||||
return buf;
|
||||
}
|
||||
# elif defined __BEOS__
|
||||
return "UTF-8";
|
||||
# else
|
||||
return "ISO-8859-1";
|
||||
# endif
|
||||
/* nl_langinfo items of the LC_NUMERIC category */
|
||||
case RADIXCHAR:
|
||||
return localeconv () ->decimal_point;
|
||||
case THOUSEP:
|
||||
return localeconv () ->thousands_sep;
|
||||
/* nl_langinfo items of the LC_TIME category.
|
||||
TODO: Really use the locale. */
|
||||
case D_T_FMT:
|
||||
case ERA_D_T_FMT:
|
||||
return "%a %b %e %H:%M:%S %Y";
|
||||
case D_FMT:
|
||||
case ERA_D_FMT:
|
||||
return "%m/%d/%y";
|
||||
case T_FMT:
|
||||
case ERA_T_FMT:
|
||||
return "%H:%M:%S";
|
||||
case T_FMT_AMPM:
|
||||
return "%I:%M:%S %p";
|
||||
case AM_STR:
|
||||
return "AM";
|
||||
case PM_STR:
|
||||
return "PM";
|
||||
case DAY_1:
|
||||
return "Sunday";
|
||||
case DAY_2:
|
||||
return "Monday";
|
||||
case DAY_3:
|
||||
return "Tuesday";
|
||||
case DAY_4:
|
||||
return "Wednesday";
|
||||
case DAY_5:
|
||||
return "Thursday";
|
||||
case DAY_6:
|
||||
return "Friday";
|
||||
case DAY_7:
|
||||
return "Saturday";
|
||||
case ABDAY_1:
|
||||
return "Sun";
|
||||
case ABDAY_2:
|
||||
return "Mon";
|
||||
case ABDAY_3:
|
||||
return "Tue";
|
||||
case ABDAY_4:
|
||||
return "Wed";
|
||||
case ABDAY_5:
|
||||
return "Thu";
|
||||
case ABDAY_6:
|
||||
return "Fri";
|
||||
case ABDAY_7:
|
||||
return "Sat";
|
||||
case MON_1:
|
||||
return "January";
|
||||
case MON_2:
|
||||
return "February";
|
||||
case MON_3:
|
||||
return "March";
|
||||
case MON_4:
|
||||
return "April";
|
||||
case MON_5:
|
||||
return "May";
|
||||
case MON_6:
|
||||
return "June";
|
||||
case MON_7:
|
||||
return "July";
|
||||
case MON_8:
|
||||
return "August";
|
||||
case MON_9:
|
||||
return "September";
|
||||
case MON_10:
|
||||
return "October";
|
||||
case MON_11:
|
||||
return "November";
|
||||
case MON_12:
|
||||
return "December";
|
||||
case ABMON_1:
|
||||
return "Jan";
|
||||
case ABMON_2:
|
||||
return "Feb";
|
||||
case ABMON_3:
|
||||
return "Mar";
|
||||
case ABMON_4:
|
||||
return "Apr";
|
||||
case ABMON_5:
|
||||
return "May";
|
||||
case ABMON_6:
|
||||
return "Jun";
|
||||
case ABMON_7:
|
||||
return "Jul";
|
||||
case ABMON_8:
|
||||
return "Aug";
|
||||
case ABMON_9:
|
||||
return "Sep";
|
||||
case ABMON_10:
|
||||
return "Oct";
|
||||
case ABMON_11:
|
||||
return "Nov";
|
||||
case ABMON_12:
|
||||
return "Dec";
|
||||
case ERA:
|
||||
return "";
|
||||
case ALT_DIGITS:
|
||||
return "\0\0\0\0\0\0\0\0\0\0";
|
||||
/* nl_langinfo items of the LC_MONETARY category
|
||||
TODO: Really use the locale. */
|
||||
case CRNCYSTR:
|
||||
return "-";
|
||||
/* nl_langinfo items of the LC_MESSAGES category
|
||||
TODO: Really use the locale. */
|
||||
case YESEXPR:
|
||||
return "^[yY]";
|
||||
case NOEXPR:
|
||||
return "^[nN]";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
3876
lib/regcomp.c
Normal file
3876
lib/regcomp.c
Normal file
File diff suppressed because it is too large
Load diff
72
lib/regex.c
Normal file
72
lib/regex.c
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* Extended regular expression matching and search library.
|
||||
Copyright (C) 2002-2003, 2005-2006, 2009-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Make sure noone compiles this code with a C++ compiler. */
|
||||
#if defined __cplusplus && defined _LIBC
|
||||
# error "This is C code, use a C compiler"
|
||||
#endif
|
||||
|
||||
#ifdef _LIBC
|
||||
/* We have to keep the namespace clean. */
|
||||
# define regfree(preg) __regfree (preg)
|
||||
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
|
||||
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
|
||||
# define regerror(errcode, preg, errbuf, errbuf_size) \
|
||||
__regerror(errcode, preg, errbuf, errbuf_size)
|
||||
# define re_set_registers(bu, re, nu, st, en) \
|
||||
__re_set_registers (bu, re, nu, st, en)
|
||||
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
|
||||
__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
|
||||
# define re_match(bufp, string, size, pos, regs) \
|
||||
__re_match (bufp, string, size, pos, regs)
|
||||
# define re_search(bufp, string, size, startpos, range, regs) \
|
||||
__re_search (bufp, string, size, startpos, range, regs)
|
||||
# define re_compile_pattern(pattern, length, bufp) \
|
||||
__re_compile_pattern (pattern, length, bufp)
|
||||
# define re_set_syntax(syntax) __re_set_syntax (syntax)
|
||||
# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
|
||||
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
|
||||
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
|
||||
|
||||
# include "../locale/localeinfo.h"
|
||||
#endif
|
||||
|
||||
/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
|
||||
GNU regex allows. Include it before <regex.h>, which correctly
|
||||
#undefs RE_DUP_MAX and sets it to the right value. */
|
||||
#include <limits.h>
|
||||
#include <strings.h>
|
||||
|
||||
#include <regex.h>
|
||||
#include "regex_internal.h"
|
||||
|
||||
#include "regex_internal.c"
|
||||
#include "regcomp.c"
|
||||
#include "regexec.c"
|
||||
|
||||
/* Binary backward compatibility. */
|
||||
#if _LIBC
|
||||
# include <shlib-compat.h>
|
||||
# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
|
||||
link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
|
||||
int re_max_failures = 2000;
|
||||
# endif
|
||||
#endif
|
675
lib/regex.h
Normal file
675
lib/regex.h
Normal file
|
@ -0,0 +1,675 @@
|
|||
/* Definitions for data structures and routines for the regular
|
||||
expression library.
|
||||
Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2006, 2009-2012
|
||||
Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef _REGEX_H
|
||||
#define _REGEX_H 1
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
/* Allow the use in C++ code. */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Define __USE_GNU_REGEX to declare GNU extensions that violate the
|
||||
POSIX name space rules. */
|
||||
#undef __USE_GNU_REGEX
|
||||
#if (defined _GNU_SOURCE \
|
||||
|| (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \
|
||||
&& !defined _XOPEN_SOURCE))
|
||||
# define __USE_GNU_REGEX 1
|
||||
#endif
|
||||
|
||||
#ifdef _REGEX_LARGE_OFFSETS
|
||||
|
||||
/* Use types and values that are wide enough to represent signed and
|
||||
unsigned byte offsets in memory. This currently works only when
|
||||
the regex code is used outside of the GNU C library; it is not yet
|
||||
supported within glibc itself, and glibc users should not define
|
||||
_REGEX_LARGE_OFFSETS. */
|
||||
|
||||
/* The type of the offset of a byte within a string.
|
||||
For historical reasons POSIX 1003.1-2004 requires that regoff_t be
|
||||
at least as wide as off_t. However, many common POSIX platforms set
|
||||
regoff_t to the more-sensible ssize_t and the Open Group has
|
||||
signalled its intention to change the requirement to be that
|
||||
regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN
|
||||
60 (2005-08-25). We don't know of any hosts where ssize_t or
|
||||
ptrdiff_t is wider than ssize_t, so ssize_t is safe. */
|
||||
typedef ssize_t regoff_t;
|
||||
|
||||
/* The type of nonnegative object indexes. Traditionally, GNU regex
|
||||
uses 'int' for these. Code that uses __re_idx_t should work
|
||||
regardless of whether the type is signed. */
|
||||
typedef size_t __re_idx_t;
|
||||
|
||||
/* The type of object sizes. */
|
||||
typedef size_t __re_size_t;
|
||||
|
||||
/* The type of object sizes, in places where the traditional code
|
||||
uses unsigned long int. */
|
||||
typedef size_t __re_long_size_t;
|
||||
|
||||
#else
|
||||
|
||||
/* Use types that are binary-compatible with the traditional GNU regex
|
||||
implementation, which mishandles strings longer than INT_MAX. */
|
||||
|
||||
typedef int regoff_t;
|
||||
typedef int __re_idx_t;
|
||||
typedef unsigned int __re_size_t;
|
||||
typedef unsigned long int __re_long_size_t;
|
||||
|
||||
#endif
|
||||
|
||||
/* The following two types have to be signed and unsigned integer type
|
||||
wide enough to hold a value of a pointer. For most ANSI compilers
|
||||
ptrdiff_t and size_t should be likely OK. Still size of these two
|
||||
types is 2 for Microsoft C. Ugh... */
|
||||
typedef long int s_reg_t;
|
||||
typedef unsigned long int active_reg_t;
|
||||
|
||||
/* The following bits are used to determine the regexp syntax we
|
||||
recognize. The set/not-set meanings are chosen so that Emacs syntax
|
||||
remains the value 0. The bits are given in alphabetical order, and
|
||||
the definitions shifted by one from the previous bit; thus, when we
|
||||
add or remove a bit, only one other definition need change. */
|
||||
typedef unsigned long int reg_syntax_t;
|
||||
|
||||
#ifdef __USE_GNU_REGEX
|
||||
|
||||
/* If this bit is not set, then \ inside a bracket expression is literal.
|
||||
If set, then such a \ quotes the following character. */
|
||||
# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
|
||||
|
||||
/* If this bit is not set, then + and ? are operators, and \+ and \? are
|
||||
literals.
|
||||
If set, then \+ and \? are operators and + and ? are literals. */
|
||||
# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
|
||||
|
||||
/* If this bit is set, then character classes are supported. They are:
|
||||
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
|
||||
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
|
||||
If not set, then character classes are not supported. */
|
||||
# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
|
||||
|
||||
/* If this bit is set, then ^ and $ are always anchors (outside bracket
|
||||
expressions, of course).
|
||||
If this bit is not set, then it depends:
|
||||
^ is an anchor if it is at the beginning of a regular
|
||||
expression or after an open-group or an alternation operator;
|
||||
$ is an anchor if it is at the end of a regular expression, or
|
||||
before a close-group or an alternation operator.
|
||||
|
||||
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
|
||||
POSIX draft 11.2 says that * etc. in leading positions is undefined.
|
||||
We already implemented a previous draft which made those constructs
|
||||
invalid, though, so we haven't changed the code back. */
|
||||
# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
|
||||
|
||||
/* If this bit is set, then special characters are always special
|
||||
regardless of where they are in the pattern.
|
||||
If this bit is not set, then special characters are special only in
|
||||
some contexts; otherwise they are ordinary. Specifically,
|
||||
* + ? and intervals are only special when not after the beginning,
|
||||
open-group, or alternation operator. */
|
||||
# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
|
||||
|
||||
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
|
||||
immediately after an alternation or begin-group operator. */
|
||||
# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
|
||||
|
||||
/* If this bit is set, then . matches newline.
|
||||
If not set, then it doesn't. */
|
||||
# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
|
||||
|
||||
/* If this bit is set, then . doesn't match NUL.
|
||||
If not set, then it does. */
|
||||
# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
|
||||
|
||||
/* If this bit is set, nonmatching lists [^...] do not match newline.
|
||||
If not set, they do. */
|
||||
# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
|
||||
|
||||
/* If this bit is set, either \{...\} or {...} defines an
|
||||
interval, depending on RE_NO_BK_BRACES.
|
||||
If not set, \{, \}, {, and } are literals. */
|
||||
# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
|
||||
|
||||
/* If this bit is set, +, ? and | aren't recognized as operators.
|
||||
If not set, they are. */
|
||||
# define RE_LIMITED_OPS (RE_INTERVALS << 1)
|
||||
|
||||
/* If this bit is set, newline is an alternation operator.
|
||||
If not set, newline is literal. */
|
||||
# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
|
||||
|
||||
/* If this bit is set, then '{...}' defines an interval, and \{ and \}
|
||||
are literals.
|
||||
If not set, then '\{...\}' defines an interval. */
|
||||
# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
|
||||
|
||||
/* If this bit is set, (...) defines a group, and \( and \) are literals.
|
||||
If not set, \(...\) defines a group, and ( and ) are literals. */
|
||||
# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
|
||||
|
||||
/* If this bit is set, then \<digit> matches <digit>.
|
||||
If not set, then \<digit> is a back-reference. */
|
||||
# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
|
||||
|
||||
/* If this bit is set, then | is an alternation operator, and \| is literal.
|
||||
If not set, then \| is an alternation operator, and | is literal. */
|
||||
# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
|
||||
|
||||
/* If this bit is set, then an ending range point collating higher
|
||||
than the starting range point, as in [z-a], is invalid.
|
||||
If not set, then when ending range point collates higher than the
|
||||
starting range point, the range is ignored. */
|
||||
# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
|
||||
|
||||
/* If this bit is set, then an unmatched ) is ordinary.
|
||||
If not set, then an unmatched ) is invalid. */
|
||||
# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
|
||||
|
||||
/* If this bit is set, succeed as soon as we match the whole pattern,
|
||||
without further backtracking. */
|
||||
# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
|
||||
|
||||
/* If this bit is set, do not process the GNU regex operators.
|
||||
If not set, then the GNU regex operators are recognized. */
|
||||
# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
|
||||
|
||||
/* If this bit is set, turn on internal regex debugging.
|
||||
If not set, and debugging was on, turn it off.
|
||||
This only works if regex.c is compiled -DDEBUG.
|
||||
We define this bit always, so that all that's needed to turn on
|
||||
debugging is to recompile regex.c; the calling code can always have
|
||||
this bit set, and it won't affect anything in the normal case. */
|
||||
# define RE_DEBUG (RE_NO_GNU_OPS << 1)
|
||||
|
||||
/* If this bit is set, a syntactically invalid interval is treated as
|
||||
a string of ordinary characters. For example, the ERE 'a{1' is
|
||||
treated as 'a\{1'. */
|
||||
# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
|
||||
|
||||
/* If this bit is set, then ignore case when matching.
|
||||
If not set, then case is significant. */
|
||||
# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
|
||||
|
||||
/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
|
||||
for ^, because it is difficult to scan the regex backwards to find
|
||||
whether ^ should be special. */
|
||||
# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
|
||||
|
||||
/* If this bit is set, then \{ cannot be first in a regex or
|
||||
immediately after an alternation, open-group or \} operator. */
|
||||
# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
|
||||
|
||||
/* If this bit is set, then no_sub will be set to 1 during
|
||||
re_compile_pattern. */
|
||||
# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
|
||||
|
||||
#endif /* defined __USE_GNU_REGEX */
|
||||
|
||||
/* This global variable defines the particular regexp syntax to use (for
|
||||
some interfaces). When a regexp is compiled, the syntax used is
|
||||
stored in the pattern buffer, so changing this does not affect
|
||||
already-compiled regexps. */
|
||||
extern reg_syntax_t re_syntax_options;
|
||||
|
||||
#ifdef __USE_GNU_REGEX
|
||||
/* Define combinations of the above bits for the standard possibilities.
|
||||
(The [[[ comments delimit what gets put into the Texinfo file, so
|
||||
don't delete them!) */
|
||||
/* [[[begin syntaxes]]] */
|
||||
# define RE_SYNTAX_EMACS 0
|
||||
|
||||
# define RE_SYNTAX_AWK \
|
||||
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
|
||||
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
|
||||
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
|
||||
| RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
|
||||
|
||||
# define RE_SYNTAX_GNU_AWK \
|
||||
((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
|
||||
& ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
|
||||
| RE_CONTEXT_INVALID_OPS ))
|
||||
|
||||
# define RE_SYNTAX_POSIX_AWK \
|
||||
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
|
||||
| RE_INTERVALS | RE_NO_GNU_OPS)
|
||||
|
||||
# define RE_SYNTAX_GREP \
|
||||
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
|
||||
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
|
||||
| RE_NEWLINE_ALT)
|
||||
|
||||
# define RE_SYNTAX_EGREP \
|
||||
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
|
||||
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
|
||||
| RE_NO_BK_VBAR)
|
||||
|
||||
# define RE_SYNTAX_POSIX_EGREP \
|
||||
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
|
||||
| RE_INVALID_INTERVAL_ORD)
|
||||
|
||||
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
|
||||
# define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
|
||||
|
||||
# define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
|
||||
|
||||
/* Syntax bits common to both basic and extended POSIX regex syntax. */
|
||||
# define _RE_SYNTAX_POSIX_COMMON \
|
||||
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
|
||||
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
|
||||
|
||||
# define RE_SYNTAX_POSIX_BASIC \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
|
||||
|
||||
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
|
||||
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
|
||||
isn't minimal, since other operators, such as \`, aren't disabled. */
|
||||
# define RE_SYNTAX_POSIX_MINIMAL_BASIC \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
|
||||
|
||||
# define RE_SYNTAX_POSIX_EXTENDED \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
|
||||
| RE_NO_BK_PARENS | RE_NO_BK_VBAR \
|
||||
| RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
|
||||
|
||||
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
|
||||
removed and RE_NO_BK_REFS is added. */
|
||||
# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
|
||||
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
|
||||
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
|
||||
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
|
||||
| RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
|
||||
/* [[[end syntaxes]]] */
|
||||
|
||||
#endif /* defined __USE_GNU_REGEX */
|
||||
|
||||
#ifdef __USE_GNU_REGEX
|
||||
|
||||
/* Maximum number of duplicates an interval can allow. POSIX-conforming
|
||||
systems might define this in <limits.h>, but we want our
|
||||
value, so remove any previous define. */
|
||||
# ifdef RE_DUP_MAX
|
||||
# undef RE_DUP_MAX
|
||||
# endif
|
||||
|
||||
/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
|
||||
the counter as a 2-byte signed integer. This is no longer true, so
|
||||
RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
|
||||
((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined.
|
||||
However, there would be a huge performance problem if someone
|
||||
actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
|
||||
its historical value. */
|
||||
# define RE_DUP_MAX (0x7fff)
|
||||
|
||||
#endif /* defined __USE_GNU_REGEX */
|
||||
|
||||
|
||||
/* POSIX 'cflags' bits (i.e., information for 'regcomp'). */
|
||||
|
||||
/* If this bit is set, then use extended regular expression syntax.
|
||||
If not set, then use basic regular expression syntax. */
|
||||
#define REG_EXTENDED 1
|
||||
|
||||
/* If this bit is set, then ignore case when matching.
|
||||
If not set, then case is significant. */
|
||||
#define REG_ICASE (1 << 1)
|
||||
|
||||
/* If this bit is set, then anchors do not match at newline
|
||||
characters in the string.
|
||||
If not set, then anchors do match at newlines. */
|
||||
#define REG_NEWLINE (1 << 2)
|
||||
|
||||
/* If this bit is set, then report only success or fail in regexec.
|
||||
If not set, then returns differ between not matching and errors. */
|
||||
#define REG_NOSUB (1 << 3)
|
||||
|
||||
|
||||
/* POSIX 'eflags' bits (i.e., information for regexec). */
|
||||
|
||||
/* If this bit is set, then the beginning-of-line operator doesn't match
|
||||
the beginning of the string (presumably because it's not the
|
||||
beginning of a line).
|
||||
If not set, then the beginning-of-line operator does match the
|
||||
beginning of the string. */
|
||||
#define REG_NOTBOL 1
|
||||
|
||||
/* Like REG_NOTBOL, except for the end-of-line. */
|
||||
#define REG_NOTEOL (1 << 1)
|
||||
|
||||
/* Use PMATCH[0] to delimit the start and end of the search in the
|
||||
buffer. */
|
||||
#define REG_STARTEND (1 << 2)
|
||||
|
||||
|
||||
/* If any error codes are removed, changed, or added, update the
|
||||
'__re_error_msgid' table in regcomp.c. */
|
||||
|
||||
typedef enum
|
||||
{
|
||||
_REG_ENOSYS = -1, /* This will never happen for this implementation. */
|
||||
_REG_NOERROR = 0, /* Success. */
|
||||
_REG_NOMATCH, /* Didn't find a match (for regexec). */
|
||||
|
||||
/* POSIX regcomp return error codes. (In the order listed in the
|
||||
standard.) */
|
||||
_REG_BADPAT, /* Invalid pattern. */
|
||||
_REG_ECOLLATE, /* Invalid collating element. */
|
||||
_REG_ECTYPE, /* Invalid character class name. */
|
||||
_REG_EESCAPE, /* Trailing backslash. */
|
||||
_REG_ESUBREG, /* Invalid back reference. */
|
||||
_REG_EBRACK, /* Unmatched left bracket. */
|
||||
_REG_EPAREN, /* Parenthesis imbalance. */
|
||||
_REG_EBRACE, /* Unmatched \{. */
|
||||
_REG_BADBR, /* Invalid contents of \{\}. */
|
||||
_REG_ERANGE, /* Invalid range end. */
|
||||
_REG_ESPACE, /* Ran out of memory. */
|
||||
_REG_BADRPT, /* No preceding re for repetition op. */
|
||||
|
||||
/* Error codes we've added. */
|
||||
_REG_EEND, /* Premature end. */
|
||||
_REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
|
||||
_REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
|
||||
} reg_errcode_t;
|
||||
|
||||
#ifdef _XOPEN_SOURCE
|
||||
# define REG_ENOSYS _REG_ENOSYS
|
||||
#endif
|
||||
#define REG_NOERROR _REG_NOERROR
|
||||
#define REG_NOMATCH _REG_NOMATCH
|
||||
#define REG_BADPAT _REG_BADPAT
|
||||
#define REG_ECOLLATE _REG_ECOLLATE
|
||||
#define REG_ECTYPE _REG_ECTYPE
|
||||
#define REG_EESCAPE _REG_EESCAPE
|
||||
#define REG_ESUBREG _REG_ESUBREG
|
||||
#define REG_EBRACK _REG_EBRACK
|
||||
#define REG_EPAREN _REG_EPAREN
|
||||
#define REG_EBRACE _REG_EBRACE
|
||||
#define REG_BADBR _REG_BADBR
|
||||
#define REG_ERANGE _REG_ERANGE
|
||||
#define REG_ESPACE _REG_ESPACE
|
||||
#define REG_BADRPT _REG_BADRPT
|
||||
#define REG_EEND _REG_EEND
|
||||
#define REG_ESIZE _REG_ESIZE
|
||||
#define REG_ERPAREN _REG_ERPAREN
|
||||
|
||||
/* struct re_pattern_buffer normally uses member names like 'buffer'
|
||||
that POSIX does not allow. In POSIX mode these members have names
|
||||
with leading 're_' (e.g., 're_buffer'). */
|
||||
#ifdef __USE_GNU_REGEX
|
||||
# define _REG_RE_NAME(id) id
|
||||
# define _REG_RM_NAME(id) id
|
||||
#else
|
||||
# define _REG_RE_NAME(id) re_##id
|
||||
# define _REG_RM_NAME(id) rm_##id
|
||||
#endif
|
||||
|
||||
/* The user can specify the type of the re_translate member by
|
||||
defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned
|
||||
char *. This pollutes the POSIX name space, so in POSIX mode just
|
||||
use unsigned char *. */
|
||||
#ifdef __USE_GNU_REGEX
|
||||
# ifndef RE_TRANSLATE_TYPE
|
||||
# define RE_TRANSLATE_TYPE unsigned char *
|
||||
# endif
|
||||
# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE
|
||||
#else
|
||||
# define REG_TRANSLATE_TYPE unsigned char *
|
||||
#endif
|
||||
|
||||
/* This data structure represents a compiled pattern. Before calling
|
||||
the pattern compiler, the fields 'buffer', 'allocated', 'fastmap',
|
||||
'translate', and 'no_sub' can be set. After the pattern has been
|
||||
compiled, the 're_nsub' field is available. All other fields are
|
||||
private to the regex routines. */
|
||||
|
||||
struct re_pattern_buffer
|
||||
{
|
||||
/* Space that holds the compiled pattern. It is declared as
|
||||
'unsigned char *' because its elements are sometimes used as
|
||||
array indexes. */
|
||||
unsigned char *_REG_RE_NAME (buffer);
|
||||
|
||||
/* Number of bytes to which 'buffer' points. */
|
||||
__re_long_size_t _REG_RE_NAME (allocated);
|
||||
|
||||
/* Number of bytes actually used in 'buffer'. */
|
||||
__re_long_size_t _REG_RE_NAME (used);
|
||||
|
||||
/* Syntax setting with which the pattern was compiled. */
|
||||
reg_syntax_t _REG_RE_NAME (syntax);
|
||||
|
||||
/* Pointer to a fastmap, if any, otherwise zero. re_search uses the
|
||||
fastmap, if there is one, to skip over impossible starting points
|
||||
for matches. */
|
||||
char *_REG_RE_NAME (fastmap);
|
||||
|
||||
/* Either a translate table to apply to all characters before
|
||||
comparing them, or zero for no translation. The translation is
|
||||
applied to a pattern when it is compiled and to a string when it
|
||||
is matched. */
|
||||
REG_TRANSLATE_TYPE _REG_RE_NAME (translate);
|
||||
|
||||
/* Number of subexpressions found by the compiler. */
|
||||
size_t re_nsub;
|
||||
|
||||
/* Zero if this pattern cannot match the empty string, one else.
|
||||
Well, in truth it's used only in 're_search_2', to see whether or
|
||||
not we should use the fastmap, so we don't set this absolutely
|
||||
perfectly; see 're_compile_fastmap' (the "duplicate" case). */
|
||||
unsigned int _REG_RE_NAME (can_be_null) : 1;
|
||||
|
||||
/* If REGS_UNALLOCATED, allocate space in the 'regs' structure
|
||||
for 'max (RE_NREGS, re_nsub + 1)' groups.
|
||||
If REGS_REALLOCATE, reallocate space if necessary.
|
||||
If REGS_FIXED, use what's there. */
|
||||
#ifdef __USE_GNU_REGEX
|
||||
# define REGS_UNALLOCATED 0
|
||||
# define REGS_REALLOCATE 1
|
||||
# define REGS_FIXED 2
|
||||
#endif
|
||||
unsigned int _REG_RE_NAME (regs_allocated) : 2;
|
||||
|
||||
/* Set to zero when 're_compile_pattern' compiles a pattern; set to
|
||||
one by 're_compile_fastmap' if it updates the fastmap. */
|
||||
unsigned int _REG_RE_NAME (fastmap_accurate) : 1;
|
||||
|
||||
/* If set, 're_match_2' does not return information about
|
||||
subexpressions. */
|
||||
unsigned int _REG_RE_NAME (no_sub) : 1;
|
||||
|
||||
/* If set, a beginning-of-line anchor doesn't match at the beginning
|
||||
of the string. */
|
||||
unsigned int _REG_RE_NAME (not_bol) : 1;
|
||||
|
||||
/* Similarly for an end-of-line anchor. */
|
||||
unsigned int _REG_RE_NAME (not_eol) : 1;
|
||||
|
||||
/* If true, an anchor at a newline matches. */
|
||||
unsigned int _REG_RE_NAME (newline_anchor) : 1;
|
||||
|
||||
/* [[[end pattern_buffer]]] */
|
||||
};
|
||||
|
||||
typedef struct re_pattern_buffer regex_t;
|
||||
|
||||
/* This is the structure we store register match data in. See
|
||||
regex.texinfo for a full description of what registers match. */
|
||||
struct re_registers
|
||||
{
|
||||
__re_size_t _REG_RM_NAME (num_regs);
|
||||
regoff_t *_REG_RM_NAME (start);
|
||||
regoff_t *_REG_RM_NAME (end);
|
||||
};
|
||||
|
||||
|
||||
/* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
|
||||
're_match_2' returns information about at least this many registers
|
||||
the first time a 'regs' structure is passed. */
|
||||
#if !defined RE_NREGS && defined __USE_GNU_REGEX
|
||||
# define RE_NREGS 30
|
||||
#endif
|
||||
|
||||
|
||||
/* POSIX specification for registers. Aside from the different names than
|
||||
're_registers', POSIX uses an array of structures, instead of a
|
||||
structure of arrays. */
|
||||
typedef struct
|
||||
{
|
||||
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
|
||||
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
|
||||
} regmatch_t;
|
||||
|
||||
/* Declarations for routines. */
|
||||
|
||||
/* Sets the current default syntax to SYNTAX, and return the old syntax.
|
||||
You can also simply assign to the 're_syntax_options' variable. */
|
||||
extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
|
||||
|
||||
/* Compile the regular expression PATTERN, with length LENGTH
|
||||
and syntax given by the global 're_syntax_options', into the buffer
|
||||
BUFFER. Return NULL if successful, and an error string if not. */
|
||||
extern const char *re_compile_pattern (const char *__pattern, size_t __length,
|
||||
struct re_pattern_buffer *__buffer);
|
||||
|
||||
|
||||
/* Compile a fastmap for the compiled pattern in BUFFER; used to
|
||||
accelerate searches. Return 0 if successful and -2 if was an
|
||||
internal error. */
|
||||
extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
|
||||
|
||||
|
||||
/* Search in the string STRING (with length LENGTH) for the pattern
|
||||
compiled into BUFFER. Start searching at position START, for RANGE
|
||||
characters. Return the starting position of the match, -1 for no
|
||||
match, or -2 for an internal error. Also return register
|
||||
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
|
||||
extern regoff_t re_search (struct re_pattern_buffer *__buffer,
|
||||
const char *__string, __re_idx_t __length,
|
||||
__re_idx_t __start, regoff_t __range,
|
||||
struct re_registers *__regs);
|
||||
|
||||
|
||||
/* Like 're_search', but search in the concatenation of STRING1 and
|
||||
STRING2. Also, stop searching at index START + STOP. */
|
||||
extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
|
||||
const char *__string1, __re_idx_t __length1,
|
||||
const char *__string2, __re_idx_t __length2,
|
||||
__re_idx_t __start, regoff_t __range,
|
||||
struct re_registers *__regs,
|
||||
__re_idx_t __stop);
|
||||
|
||||
|
||||
/* Like 're_search', but return how many characters in STRING the regexp
|
||||
in BUFFER matched, starting at position START. */
|
||||
extern regoff_t re_match (struct re_pattern_buffer *__buffer,
|
||||
const char *__string, __re_idx_t __length,
|
||||
__re_idx_t __start, struct re_registers *__regs);
|
||||
|
||||
|
||||
/* Relates to 're_match' as 're_search_2' relates to 're_search'. */
|
||||
extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
|
||||
const char *__string1, __re_idx_t __length1,
|
||||
const char *__string2, __re_idx_t __length2,
|
||||
__re_idx_t __start, struct re_registers *__regs,
|
||||
__re_idx_t __stop);
|
||||
|
||||
|
||||
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
|
||||
ENDS. Subsequent matches using BUFFER and REGS will use this memory
|
||||
for recording register information. STARTS and ENDS must be
|
||||
allocated with malloc, and must each be at least 'NUM_REGS * sizeof
|
||||
(regoff_t)' bytes long.
|
||||
|
||||
If NUM_REGS == 0, then subsequent matches should allocate their own
|
||||
register data.
|
||||
|
||||
Unless this function is called, the first search or match using
|
||||
BUFFER will allocate its own register data, without freeing the old
|
||||
data. */
|
||||
extern void re_set_registers (struct re_pattern_buffer *__buffer,
|
||||
struct re_registers *__regs,
|
||||
__re_size_t __num_regs,
|
||||
regoff_t *__starts, regoff_t *__ends);
|
||||
|
||||
#if defined _REGEX_RE_COMP || defined _LIBC
|
||||
# ifndef _CRAY
|
||||
/* 4.2 bsd compatibility. */
|
||||
extern char *re_comp (const char *);
|
||||
extern int re_exec (const char *);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* GCC 2.95 and later have "__restrict"; C99 compilers have
|
||||
"restrict", and "configure" may have defined "restrict".
|
||||
Other compilers use __restrict, __restrict__, and _Restrict, and
|
||||
'configure' might #define 'restrict' to those words, so pick a
|
||||
different name. */
|
||||
#ifndef _Restrict_
|
||||
# if 199901L <= __STDC_VERSION__
|
||||
# define _Restrict_ restrict
|
||||
# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)
|
||||
# define _Restrict_ __restrict
|
||||
# else
|
||||
# define _Restrict_
|
||||
# endif
|
||||
#endif
|
||||
/* gcc 3.1 and up support the [restrict] syntax. Don't trust
|
||||
sys/cdefs.h's definition of __restrict_arr, though, as it
|
||||
mishandles gcc -ansi -pedantic. */
|
||||
#ifndef _Restrict_arr_
|
||||
# if ((199901L <= __STDC_VERSION__ \
|
||||
|| ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \
|
||||
&& !defined __STRICT_ANSI__)) \
|
||||
&& !defined __GNUG__)
|
||||
# define _Restrict_arr_ _Restrict_
|
||||
# else
|
||||
# define _Restrict_arr_
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* POSIX compatibility. */
|
||||
extern int regcomp (regex_t *_Restrict_ __preg,
|
||||
const char *_Restrict_ __pattern,
|
||||
int __cflags);
|
||||
|
||||
extern int regexec (const regex_t *_Restrict_ __preg,
|
||||
const char *_Restrict_ __string, size_t __nmatch,
|
||||
regmatch_t __pmatch[_Restrict_arr_],
|
||||
int __eflags);
|
||||
|
||||
extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
|
||||
char *_Restrict_ __errbuf, size_t __errbuf_size);
|
||||
|
||||
extern void regfree (regex_t *__preg);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* C++ */
|
||||
|
||||
#endif /* regex.h */
|
1741
lib/regex_internal.c
Normal file
1741
lib/regex_internal.c
Normal file
File diff suppressed because it is too large
Load diff
866
lib/regex_internal.h
Normal file
866
lib/regex_internal.h
Normal file
|
@ -0,0 +1,866 @@
|
|||
/* Extended regular expression matching and search library.
|
||||
Copyright (C) 2002-2012 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef _REGEX_INTERNAL_H
|
||||
#define _REGEX_INTERNAL_H 1
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <langinfo.h>
|
||||
#ifndef _LIBC
|
||||
# include "localcharset.h"
|
||||
#endif
|
||||
#include <locale.h>
|
||||
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include <stdint.h>
|
||||
#if defined _LIBC
|
||||
# include <bits/libc-lock.h>
|
||||
#else
|
||||
# define __libc_lock_init(NAME) do { } while (0)
|
||||
# define __libc_lock_lock(NAME) do { } while (0)
|
||||
# define __libc_lock_unlock(NAME) do { } while (0)
|
||||
#endif
|
||||
|
||||
/* In case that the system doesn't have isblank(). */
|
||||
#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
|
||||
# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
|
||||
#endif
|
||||
|
||||
#ifdef _LIBC
|
||||
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
|
||||
# define _RE_DEFINE_LOCALE_FUNCTIONS 1
|
||||
# include <locale/localeinfo.h>
|
||||
# include <locale/elem-hash.h>
|
||||
# include <locale/coll-lookup.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* This is for other GNU distributions with internationalized messages. */
|
||||
#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
|
||||
# include <libintl.h>
|
||||
# ifdef _LIBC
|
||||
# undef gettext
|
||||
# define gettext(msgid) \
|
||||
INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
|
||||
# endif
|
||||
#else
|
||||
# define gettext(msgid) (msgid)
|
||||
#endif
|
||||
|
||||
#ifndef gettext_noop
|
||||
/* This define is so xgettext can find the internationalizable
|
||||
strings. */
|
||||
# define gettext_noop(String) String
|
||||
#endif
|
||||
|
||||
/* For loser systems without the definition. */
|
||||
#ifndef SIZE_MAX
|
||||
# define SIZE_MAX ((size_t) -1)
|
||||
#endif
|
||||
|
||||
#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC
|
||||
# define RE_ENABLE_I18N
|
||||
#endif
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
# define BE(expr, val) __builtin_expect (expr, val)
|
||||
#else
|
||||
# define BE(expr, val) (expr)
|
||||
# ifdef _LIBC
|
||||
# define inline
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Number of ASCII characters. */
|
||||
#define ASCII_CHARS 0x80
|
||||
|
||||
/* Number of single byte characters. */
|
||||
#define SBC_MAX (UCHAR_MAX + 1)
|
||||
|
||||
#define COLL_ELEM_LEN_MAX 8
|
||||
|
||||
/* The character which represents newline. */
|
||||
#define NEWLINE_CHAR '\n'
|
||||
#define WIDE_NEWLINE_CHAR L'\n'
|
||||
|
||||
/* Rename to standard API for using out of glibc. */
|
||||
#ifndef _LIBC
|
||||
# define __wctype wctype
|
||||
# define __iswctype iswctype
|
||||
# define __btowc btowc
|
||||
# define __wcrtomb wcrtomb
|
||||
# define __mbrtowc mbrtowc
|
||||
# define __regfree regfree
|
||||
# define attribute_hidden
|
||||
#endif /* not _LIBC */
|
||||
|
||||
#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
|
||||
# define __attribute(arg) __attribute__ (arg)
|
||||
#else
|
||||
# define __attribute(arg)
|
||||
#endif
|
||||
|
||||
typedef __re_idx_t Idx;
|
||||
|
||||
/* Special return value for failure to match. */
|
||||
#define REG_MISSING ((Idx) -1)
|
||||
|
||||
/* Special return value for internal error. */
|
||||
#define REG_ERROR ((Idx) -2)
|
||||
|
||||
/* Test whether N is a valid index, and is not one of the above. */
|
||||
#ifdef _REGEX_LARGE_OFFSETS
|
||||
# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)
|
||||
#else
|
||||
# define REG_VALID_INDEX(n) (0 <= (n))
|
||||
#endif
|
||||
|
||||
/* Test whether N is a valid nonzero index. */
|
||||
#ifdef _REGEX_LARGE_OFFSETS
|
||||
# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))
|
||||
#else
|
||||
# define REG_VALID_NONZERO_INDEX(n) (0 < (n))
|
||||
#endif
|
||||
|
||||
/* A hash value, suitable for computing hash tables. */
|
||||
typedef __re_size_t re_hashval_t;
|
||||
|
||||
/* An integer used to represent a set of bits. It must be unsigned,
|
||||
and must be at least as wide as unsigned int. */
|
||||
typedef unsigned long int bitset_word_t;
|
||||
/* All bits set in a bitset_word_t. */
|
||||
#define BITSET_WORD_MAX ULONG_MAX
|
||||
|
||||
/* Number of bits in a bitset_word_t. For portability to hosts with
|
||||
padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
|
||||
instead, deduce it directly from BITSET_WORD_MAX. Avoid
|
||||
greater-than-32-bit integers and unconditional shifts by more than
|
||||
31 bits, as they're not portable. */
|
||||
#if BITSET_WORD_MAX == 0xffffffffUL
|
||||
# define BITSET_WORD_BITS 32
|
||||
#elif BITSET_WORD_MAX >> 31 >> 4 == 1
|
||||
# define BITSET_WORD_BITS 36
|
||||
#elif BITSET_WORD_MAX >> 31 >> 16 == 1
|
||||
# define BITSET_WORD_BITS 48
|
||||
#elif BITSET_WORD_MAX >> 31 >> 28 == 1
|
||||
# define BITSET_WORD_BITS 60
|
||||
#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
|
||||
# define BITSET_WORD_BITS 64
|
||||
#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
|
||||
# define BITSET_WORD_BITS 72
|
||||
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
|
||||
# define BITSET_WORD_BITS 128
|
||||
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
|
||||
# define BITSET_WORD_BITS 256
|
||||
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
|
||||
# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
|
||||
# if BITSET_WORD_BITS <= SBC_MAX
|
||||
# error "Invalid SBC_MAX"
|
||||
# endif
|
||||
#else
|
||||
# error "Add case for new bitset_word_t size"
|
||||
#endif
|
||||
|
||||
/* Number of bitset_word_t values in a bitset_t. */
|
||||
#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
|
||||
|
||||
typedef bitset_word_t bitset_t[BITSET_WORDS];
|
||||
typedef bitset_word_t *re_bitset_ptr_t;
|
||||
typedef const bitset_word_t *re_const_bitset_ptr_t;
|
||||
|
||||
#define PREV_WORD_CONSTRAINT 0x0001
|
||||
#define PREV_NOTWORD_CONSTRAINT 0x0002
|
||||
#define NEXT_WORD_CONSTRAINT 0x0004
|
||||
#define NEXT_NOTWORD_CONSTRAINT 0x0008
|
||||
#define PREV_NEWLINE_CONSTRAINT 0x0010
|
||||
#define NEXT_NEWLINE_CONSTRAINT 0x0020
|
||||
#define PREV_BEGBUF_CONSTRAINT 0x0040
|
||||
#define NEXT_ENDBUF_CONSTRAINT 0x0080
|
||||
#define WORD_DELIM_CONSTRAINT 0x0100
|
||||
#define NOT_WORD_DELIM_CONSTRAINT 0x0200
|
||||
|
||||
typedef enum
|
||||
{
|
||||
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
|
||||
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
|
||||
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
|
||||
INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
|
||||
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
|
||||
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
|
||||
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
|
||||
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
|
||||
WORD_DELIM = WORD_DELIM_CONSTRAINT,
|
||||
NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
|
||||
} re_context_type;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Idx alloc;
|
||||
Idx nelem;
|
||||
Idx *elems;
|
||||
} re_node_set;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NON_TYPE = 0,
|
||||
|
||||
/* Node type, These are used by token, node, tree. */
|
||||
CHARACTER = 1,
|
||||
END_OF_RE = 2,
|
||||
SIMPLE_BRACKET = 3,
|
||||
OP_BACK_REF = 4,
|
||||
OP_PERIOD = 5,
|
||||
#ifdef RE_ENABLE_I18N
|
||||
COMPLEX_BRACKET = 6,
|
||||
OP_UTF8_PERIOD = 7,
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
|
||||
/* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
|
||||
when the debugger shows values of this enum type. */
|
||||
#define EPSILON_BIT 8
|
||||
OP_OPEN_SUBEXP = EPSILON_BIT | 0,
|
||||
OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
|
||||
OP_ALT = EPSILON_BIT | 2,
|
||||
OP_DUP_ASTERISK = EPSILON_BIT | 3,
|
||||
ANCHOR = EPSILON_BIT | 4,
|
||||
|
||||
/* Tree type, these are used only by tree. */
|
||||
CONCAT = 16,
|
||||
SUBEXP = 17,
|
||||
|
||||
/* Token type, these are used only by token. */
|
||||
OP_DUP_PLUS = 18,
|
||||
OP_DUP_QUESTION,
|
||||
OP_OPEN_BRACKET,
|
||||
OP_CLOSE_BRACKET,
|
||||
OP_CHARSET_RANGE,
|
||||
OP_OPEN_DUP_NUM,
|
||||
OP_CLOSE_DUP_NUM,
|
||||
OP_NON_MATCH_LIST,
|
||||
OP_OPEN_COLL_ELEM,
|
||||
OP_CLOSE_COLL_ELEM,
|
||||
OP_OPEN_EQUIV_CLASS,
|
||||
OP_CLOSE_EQUIV_CLASS,
|
||||
OP_OPEN_CHAR_CLASS,
|
||||
OP_CLOSE_CHAR_CLASS,
|
||||
OP_WORD,
|
||||
OP_NOTWORD,
|
||||
OP_SPACE,
|
||||
OP_NOTSPACE,
|
||||
BACK_SLASH
|
||||
|
||||
} re_token_type_t;
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
typedef struct
|
||||
{
|
||||
/* Multibyte characters. */
|
||||
wchar_t *mbchars;
|
||||
|
||||
/* Collating symbols. */
|
||||
# ifdef _LIBC
|
||||
int32_t *coll_syms;
|
||||
# endif
|
||||
|
||||
/* Equivalence classes. */
|
||||
# ifdef _LIBC
|
||||
int32_t *equiv_classes;
|
||||
# endif
|
||||
|
||||
/* Range expressions. */
|
||||
# ifdef _LIBC
|
||||
uint32_t *range_starts;
|
||||
uint32_t *range_ends;
|
||||
# else /* not _LIBC */
|
||||
wchar_t *range_starts;
|
||||
wchar_t *range_ends;
|
||||
# endif /* not _LIBC */
|
||||
|
||||
/* Character classes. */
|
||||
wctype_t *char_classes;
|
||||
|
||||
/* If this character set is the non-matching list. */
|
||||
unsigned int non_match : 1;
|
||||
|
||||
/* # of multibyte characters. */
|
||||
Idx nmbchars;
|
||||
|
||||
/* # of collating symbols. */
|
||||
Idx ncoll_syms;
|
||||
|
||||
/* # of equivalence classes. */
|
||||
Idx nequiv_classes;
|
||||
|
||||
/* # of range expressions. */
|
||||
Idx nranges;
|
||||
|
||||
/* # of character classes. */
|
||||
Idx nchar_classes;
|
||||
} re_charset_t;
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
union
|
||||
{
|
||||
unsigned char c; /* for CHARACTER */
|
||||
re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
|
||||
#ifdef RE_ENABLE_I18N
|
||||
re_charset_t *mbcset; /* for COMPLEX_BRACKET */
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
Idx idx; /* for BACK_REF */
|
||||
re_context_type ctx_type; /* for ANCHOR */
|
||||
} opr;
|
||||
#if __GNUC__ >= 2 && !defined __STRICT_ANSI__
|
||||
re_token_type_t type : 8;
|
||||
#else
|
||||
re_token_type_t type;
|
||||
#endif
|
||||
unsigned int constraint : 10; /* context constraint */
|
||||
unsigned int duplicated : 1;
|
||||
unsigned int opt_subexp : 1;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
unsigned int accept_mb : 1;
|
||||
/* These 2 bits can be moved into the union if needed (e.g. if running out
|
||||
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
|
||||
unsigned int mb_partial : 1;
|
||||
#endif
|
||||
unsigned int word_char : 1;
|
||||
} re_token_t;
|
||||
|
||||
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
|
||||
|
||||
struct re_string_t
|
||||
{
|
||||
/* Indicate the raw buffer which is the original string passed as an
|
||||
argument of regexec(), re_search(), etc.. */
|
||||
const unsigned char *raw_mbs;
|
||||
/* Store the multibyte string. In case of "case insensitive mode" like
|
||||
REG_ICASE, upper cases of the string are stored, otherwise MBS points
|
||||
the same address that RAW_MBS points. */
|
||||
unsigned char *mbs;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Store the wide character string which is corresponding to MBS. */
|
||||
wint_t *wcs;
|
||||
Idx *offsets;
|
||||
mbstate_t cur_state;
|
||||
#endif
|
||||
/* Index in RAW_MBS. Each character mbs[i] corresponds to
|
||||
raw_mbs[raw_mbs_idx + i]. */
|
||||
Idx raw_mbs_idx;
|
||||
/* The length of the valid characters in the buffers. */
|
||||
Idx valid_len;
|
||||
/* The corresponding number of bytes in raw_mbs array. */
|
||||
Idx valid_raw_len;
|
||||
/* The length of the buffers MBS and WCS. */
|
||||
Idx bufs_len;
|
||||
/* The index in MBS, which is updated by re_string_fetch_byte. */
|
||||
Idx cur_idx;
|
||||
/* length of RAW_MBS array. */
|
||||
Idx raw_len;
|
||||
/* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
|
||||
Idx len;
|
||||
/* End of the buffer may be shorter than its length in the cases such
|
||||
as re_match_2, re_search_2. Then, we use STOP for end of the buffer
|
||||
instead of LEN. */
|
||||
Idx raw_stop;
|
||||
/* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
|
||||
Idx stop;
|
||||
|
||||
/* The context of mbs[0]. We store the context independently, since
|
||||
the context of mbs[0] may be different from raw_mbs[0], which is
|
||||
the beginning of the input string. */
|
||||
unsigned int tip_context;
|
||||
/* The translation passed as a part of an argument of re_compile_pattern. */
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
/* Copy of re_dfa_t's word_char. */
|
||||
re_const_bitset_ptr_t word_char;
|
||||
/* true if REG_ICASE. */
|
||||
unsigned char icase;
|
||||
unsigned char is_utf8;
|
||||
unsigned char map_notascii;
|
||||
unsigned char mbs_allocated;
|
||||
unsigned char offsets_needed;
|
||||
unsigned char newline_anchor;
|
||||
unsigned char word_ops_used;
|
||||
int mb_cur_max;
|
||||
};
|
||||
typedef struct re_string_t re_string_t;
|
||||
|
||||
|
||||
struct re_dfa_t;
|
||||
typedef struct re_dfa_t re_dfa_t;
|
||||
|
||||
#ifndef _LIBC
|
||||
# define internal_function
|
||||
#endif
|
||||
|
||||
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
|
||||
Idx new_buf_len)
|
||||
internal_function;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static void build_wcs_buffer (re_string_t *pstr) internal_function;
|
||||
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
|
||||
internal_function;
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
static void build_upper_buffer (re_string_t *pstr) internal_function;
|
||||
static void re_string_translate_buffer (re_string_t *pstr) internal_function;
|
||||
static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
|
||||
int eflags)
|
||||
internal_function __attribute ((pure));
|
||||
#define re_string_peek_byte(pstr, offset) \
|
||||
((pstr)->mbs[(pstr)->cur_idx + offset])
|
||||
#define re_string_fetch_byte(pstr) \
|
||||
((pstr)->mbs[(pstr)->cur_idx++])
|
||||
#define re_string_first_byte(pstr, idx) \
|
||||
((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
|
||||
#define re_string_is_single_byte_char(pstr, idx) \
|
||||
((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
|
||||
|| (pstr)->wcs[(idx) + 1] != WEOF))
|
||||
#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
|
||||
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
|
||||
#define re_string_get_buffer(pstr) ((pstr)->mbs)
|
||||
#define re_string_length(pstr) ((pstr)->len)
|
||||
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
|
||||
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
|
||||
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
|
||||
|
||||
#include <alloca.h>
|
||||
|
||||
#ifndef _LIBC
|
||||
# if HAVE_ALLOCA
|
||||
/* The OS usually guarantees only one guard page at the bottom of the stack,
|
||||
and a page size can be as small as 4096 bytes. So we cannot safely
|
||||
allocate anything larger than 4096 bytes. Also care for the possibility
|
||||
of a few compiler-allocated temporary stack slots. */
|
||||
# define __libc_use_alloca(n) ((n) < 4032)
|
||||
# else
|
||||
/* alloca is implemented with malloc, so just use malloc. */
|
||||
# define __libc_use_alloca(n) 0
|
||||
# undef alloca
|
||||
# define alloca(n) malloc (n)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifndef MAX
|
||||
# define MAX(a,b) ((a) < (b) ? (b) : (a))
|
||||
#endif
|
||||
|
||||
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
|
||||
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
|
||||
#define re_free(p) free (p)
|
||||
|
||||
struct bin_tree_t
|
||||
{
|
||||
struct bin_tree_t *parent;
|
||||
struct bin_tree_t *left;
|
||||
struct bin_tree_t *right;
|
||||
struct bin_tree_t *first;
|
||||
struct bin_tree_t *next;
|
||||
|
||||
re_token_t token;
|
||||
|
||||
/* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
|
||||
Otherwise 'type' indicate the type of this node. */
|
||||
Idx node_idx;
|
||||
};
|
||||
typedef struct bin_tree_t bin_tree_t;
|
||||
|
||||
#define BIN_TREE_STORAGE_SIZE \
|
||||
((1024 - sizeof (void *)) / sizeof (bin_tree_t))
|
||||
|
||||
struct bin_tree_storage_t
|
||||
{
|
||||
struct bin_tree_storage_t *next;
|
||||
bin_tree_t data[BIN_TREE_STORAGE_SIZE];
|
||||
};
|
||||
typedef struct bin_tree_storage_t bin_tree_storage_t;
|
||||
|
||||
#define CONTEXT_WORD 1
|
||||
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
|
||||
#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
|
||||
#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
|
||||
|
||||
#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
|
||||
#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
|
||||
#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
|
||||
#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
|
||||
#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
|
||||
|
||||
#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
|
||||
#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
|
||||
#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
|
||||
#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
|
||||
|
||||
#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
|
||||
((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
|
||||
|| ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
|
||||
|| ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
|
||||
|| ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
|
||||
|
||||
#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
|
||||
((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
|
||||
|| (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
|
||||
|| (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
|
||||
|| (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
|
||||
|
||||
struct re_dfastate_t
|
||||
{
|
||||
re_hashval_t hash;
|
||||
re_node_set nodes;
|
||||
re_node_set non_eps_nodes;
|
||||
re_node_set inveclosure;
|
||||
re_node_set *entrance_nodes;
|
||||
struct re_dfastate_t **trtable, **word_trtable;
|
||||
unsigned int context : 4;
|
||||
unsigned int halt : 1;
|
||||
/* If this state can accept "multi byte".
|
||||
Note that we refer to multibyte characters, and multi character
|
||||
collating elements as "multi byte". */
|
||||
unsigned int accept_mb : 1;
|
||||
/* If this state has backreference node(s). */
|
||||
unsigned int has_backref : 1;
|
||||
unsigned int has_constraint : 1;
|
||||
};
|
||||
typedef struct re_dfastate_t re_dfastate_t;
|
||||
|
||||
struct re_state_table_entry
|
||||
{
|
||||
Idx num;
|
||||
Idx alloc;
|
||||
re_dfastate_t **array;
|
||||
};
|
||||
|
||||
/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Idx next_idx;
|
||||
Idx alloc;
|
||||
re_dfastate_t **array;
|
||||
} state_array_t;
|
||||
|
||||
/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Idx node;
|
||||
Idx str_idx; /* The position NODE match at. */
|
||||
state_array_t path;
|
||||
} re_sub_match_last_t;
|
||||
|
||||
/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
|
||||
And information about the node, whose type is OP_CLOSE_SUBEXP,
|
||||
corresponding to NODE is stored in LASTS. */
|
||||
|
||||
typedef struct
|
||||
{
|
||||
Idx str_idx;
|
||||
Idx node;
|
||||
state_array_t *path;
|
||||
Idx alasts; /* Allocation size of LASTS. */
|
||||
Idx nlasts; /* The number of LASTS. */
|
||||
re_sub_match_last_t **lasts;
|
||||
} re_sub_match_top_t;
|
||||
|
||||
struct re_backref_cache_entry
|
||||
{
|
||||
Idx node;
|
||||
Idx str_idx;
|
||||
Idx subexp_from;
|
||||
Idx subexp_to;
|
||||
char more;
|
||||
char unused;
|
||||
unsigned short int eps_reachable_subexps_map;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* The string object corresponding to the input string. */
|
||||
re_string_t input;
|
||||
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
|
||||
const re_dfa_t *const dfa;
|
||||
#else
|
||||
const re_dfa_t *dfa;
|
||||
#endif
|
||||
/* EFLAGS of the argument of regexec. */
|
||||
int eflags;
|
||||
/* Where the matching ends. */
|
||||
Idx match_last;
|
||||
Idx last_node;
|
||||
/* The state log used by the matcher. */
|
||||
re_dfastate_t **state_log;
|
||||
Idx state_log_top;
|
||||
/* Back reference cache. */
|
||||
Idx nbkref_ents;
|
||||
Idx abkref_ents;
|
||||
struct re_backref_cache_entry *bkref_ents;
|
||||
int max_mb_elem_len;
|
||||
Idx nsub_tops;
|
||||
Idx asub_tops;
|
||||
re_sub_match_top_t **sub_tops;
|
||||
} re_match_context_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
re_dfastate_t **sifted_states;
|
||||
re_dfastate_t **limited_states;
|
||||
Idx last_node;
|
||||
Idx last_str_idx;
|
||||
re_node_set limits;
|
||||
} re_sift_context_t;
|
||||
|
||||
struct re_fail_stack_ent_t
|
||||
{
|
||||
Idx idx;
|
||||
Idx node;
|
||||
regmatch_t *regs;
|
||||
re_node_set eps_via_nodes;
|
||||
};
|
||||
|
||||
struct re_fail_stack_t
|
||||
{
|
||||
Idx num;
|
||||
Idx alloc;
|
||||
struct re_fail_stack_ent_t *stack;
|
||||
};
|
||||
|
||||
struct re_dfa_t
|
||||
{
|
||||
re_token_t *nodes;
|
||||
size_t nodes_alloc;
|
||||
size_t nodes_len;
|
||||
Idx *nexts;
|
||||
Idx *org_indices;
|
||||
re_node_set *edests;
|
||||
re_node_set *eclosures;
|
||||
re_node_set *inveclosures;
|
||||
struct re_state_table_entry *state_table;
|
||||
re_dfastate_t *init_state;
|
||||
re_dfastate_t *init_state_word;
|
||||
re_dfastate_t *init_state_nl;
|
||||
re_dfastate_t *init_state_begbuf;
|
||||
bin_tree_t *str_tree;
|
||||
bin_tree_storage_t *str_tree_storage;
|
||||
re_bitset_ptr_t sb_char;
|
||||
int str_tree_storage_idx;
|
||||
|
||||
/* number of subexpressions 're_nsub' is in regex_t. */
|
||||
re_hashval_t state_hash_mask;
|
||||
Idx init_node;
|
||||
Idx nbackref; /* The number of backreference in this dfa. */
|
||||
|
||||
/* Bitmap expressing which backreference is used. */
|
||||
bitset_word_t used_bkref_map;
|
||||
bitset_word_t completed_bkref_map;
|
||||
|
||||
unsigned int has_plural_match : 1;
|
||||
/* If this dfa has "multibyte node", which is a backreference or
|
||||
a node which can accept multibyte character or multi character
|
||||
collating element. */
|
||||
unsigned int has_mb_node : 1;
|
||||
unsigned int is_utf8 : 1;
|
||||
unsigned int map_notascii : 1;
|
||||
unsigned int word_ops_used : 1;
|
||||
int mb_cur_max;
|
||||
bitset_t word_char;
|
||||
reg_syntax_t syntax;
|
||||
Idx *subexp_map;
|
||||
#ifdef DEBUG
|
||||
char* re_str;
|
||||
#endif
|
||||
#ifdef _LIBC
|
||||
__libc_lock_define (, lock)
|
||||
#endif
|
||||
};
|
||||
|
||||
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
|
||||
#define re_node_set_remove(set,id) \
|
||||
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
|
||||
#define re_node_set_empty(p) ((p)->nelem = 0)
|
||||
#define re_node_set_free(set) re_free ((set)->elems)
|
||||
|
||||
|
||||
typedef enum
|
||||
{
|
||||
SB_CHAR,
|
||||
MB_CHAR,
|
||||
EQUIV_CLASS,
|
||||
COLL_SYM,
|
||||
CHAR_CLASS
|
||||
} bracket_elem_type;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bracket_elem_type type;
|
||||
union
|
||||
{
|
||||
unsigned char ch;
|
||||
unsigned char *name;
|
||||
wchar_t wch;
|
||||
} opr;
|
||||
} bracket_elem_t;
|
||||
|
||||
|
||||
/* Inline functions for bitset_t operation. */
|
||||
|
||||
static inline void
|
||||
bitset_set (bitset_t set, Idx i)
|
||||
{
|
||||
set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_clear (bitset_t set, Idx i)
|
||||
{
|
||||
set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
bitset_contain (const bitset_t set, Idx i)
|
||||
{
|
||||
return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_empty (bitset_t set)
|
||||
{
|
||||
memset (set, '\0', sizeof (bitset_t));
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_set_all (bitset_t set)
|
||||
{
|
||||
memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
|
||||
if (SBC_MAX % BITSET_WORD_BITS != 0)
|
||||
set[BITSET_WORDS - 1] =
|
||||
((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_copy (bitset_t dest, const bitset_t src)
|
||||
{
|
||||
memcpy (dest, src, sizeof (bitset_t));
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_not (bitset_t set)
|
||||
{
|
||||
int bitset_i;
|
||||
for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
|
||||
set[bitset_i] = ~set[bitset_i];
|
||||
if (SBC_MAX % BITSET_WORD_BITS != 0)
|
||||
set[BITSET_WORDS - 1] =
|
||||
((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
|
||||
& ~set[BITSET_WORDS - 1]);
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_merge (bitset_t dest, const bitset_t src)
|
||||
{
|
||||
int bitset_i;
|
||||
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
|
||||
dest[bitset_i] |= src[bitset_i];
|
||||
}
|
||||
|
||||
static inline void
|
||||
bitset_mask (bitset_t dest, const bitset_t src)
|
||||
{
|
||||
int bitset_i;
|
||||
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
|
||||
dest[bitset_i] &= src[bitset_i];
|
||||
}
|
||||
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Inline functions for re_string. */
|
||||
static inline int
|
||||
internal_function __attribute ((pure))
|
||||
re_string_char_size_at (const re_string_t *pstr, Idx idx)
|
||||
{
|
||||
int byte_idx;
|
||||
if (pstr->mb_cur_max == 1)
|
||||
return 1;
|
||||
for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
|
||||
if (pstr->wcs[idx + byte_idx] != WEOF)
|
||||
break;
|
||||
return byte_idx;
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
internal_function __attribute ((pure))
|
||||
re_string_wchar_at (const re_string_t *pstr, Idx idx)
|
||||
{
|
||||
if (pstr->mb_cur_max == 1)
|
||||
return (wint_t) pstr->mbs[idx];
|
||||
return (wint_t) pstr->wcs[idx];
|
||||
}
|
||||
|
||||
static int
|
||||
internal_function __attribute ((pure))
|
||||
re_string_elem_size_at (const re_string_t *pstr, Idx idx)
|
||||
{
|
||||
# ifdef _LIBC
|
||||
const unsigned char *p, *extra;
|
||||
const int32_t *table, *indirect;
|
||||
int32_t tmp;
|
||||
# include <locale/weight.h>
|
||||
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
|
||||
|
||||
if (nrules != 0)
|
||||
{
|
||||
table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
|
||||
extra = (const unsigned char *)
|
||||
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
|
||||
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
|
||||
_NL_COLLATE_INDIRECTMB);
|
||||
p = pstr->mbs + idx;
|
||||
tmp = findidx (&p);
|
||||
return p - pstr->mbs - idx;
|
||||
}
|
||||
else
|
||||
# endif /* _LIBC */
|
||||
return 1;
|
||||
}
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
|
||||
#ifndef __GNUC_PREREQ
|
||||
# if defined __GNUC__ && defined __GNUC_MINOR__
|
||||
# define __GNUC_PREREQ(maj, min) \
|
||||
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
|
||||
# else
|
||||
# define __GNUC_PREREQ(maj, min) 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if __GNUC_PREREQ (3,4)
|
||||
# undef __attribute_warn_unused_result__
|
||||
# define __attribute_warn_unused_result__ \
|
||||
__attribute__ ((__warn_unused_result__))
|
||||
#else
|
||||
# define __attribute_warn_unused_result__ /* empty */
|
||||
#endif
|
||||
|
||||
#endif /* _REGEX_INTERNAL_H */
|
4417
lib/regexec.c
Normal file
4417
lib/regexec.c
Normal file
File diff suppressed because it is too large
Load diff
63
lib/strcasecmp.c
Normal file
63
lib/strcasecmp.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* Case-insensitive string comparison function.
|
||||
Copyright (C) 1998-1999, 2005-2007, 2009-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <string.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
|
||||
|
||||
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
|
||||
greater than zero if S1 is lexicographically less than, equal to or greater
|
||||
than S2.
|
||||
Note: This function does not work with multibyte strings! */
|
||||
|
||||
int
|
||||
strcasecmp (const char *s1, const char *s2)
|
||||
{
|
||||
const unsigned char *p1 = (const unsigned char *) s1;
|
||||
const unsigned char *p2 = (const unsigned char *) s2;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (p1 == p2)
|
||||
return 0;
|
||||
|
||||
do
|
||||
{
|
||||
c1 = TOLOWER (*p1);
|
||||
c2 = TOLOWER (*p2);
|
||||
|
||||
if (c1 == '\0')
|
||||
break;
|
||||
|
||||
++p1;
|
||||
++p2;
|
||||
}
|
||||
while (c1 == c2);
|
||||
|
||||
if (UCHAR_MAX <= INT_MAX)
|
||||
return c1 - c2;
|
||||
else
|
||||
/* On machines where 'char' and 'int' are types of the same size, the
|
||||
difference of two 'unsigned char' values - including the sign bit -
|
||||
doesn't fit in an 'int'. */
|
||||
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
|
||||
}
|
176
lib/streq.h
Normal file
176
lib/streq.h
Normal file
|
@ -0,0 +1,176 @@
|
|||
/* Optimized string comparison.
|
||||
Copyright (C) 2001-2002, 2007, 2009-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>. */
|
||||
|
||||
#ifndef _GL_STREQ_H
|
||||
#define _GL_STREQ_H
|
||||
|
||||
#include <string.h>
|
||||
|
||||
/* STREQ allows to optimize string comparison with a small literal string.
|
||||
STREQ (s, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
|
||||
is semantically equivalent to
|
||||
strcmp (s, "EUC-KR") == 0
|
||||
just faster. */
|
||||
|
||||
/* Help GCC to generate good code for string comparisons with
|
||||
immediate strings. */
|
||||
#if defined (__GNUC__) && defined (__OPTIMIZE__)
|
||||
|
||||
static inline int
|
||||
streq9 (const char *s1, const char *s2)
|
||||
{
|
||||
return strcmp (s1 + 9, s2 + 9) == 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq8 (const char *s1, const char *s2, char s28)
|
||||
{
|
||||
if (s1[8] == s28)
|
||||
{
|
||||
if (s28 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq9 (s1, s2);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq7 (const char *s1, const char *s2, char s27, char s28)
|
||||
{
|
||||
if (s1[7] == s27)
|
||||
{
|
||||
if (s27 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq8 (s1, s2, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq6 (const char *s1, const char *s2, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[6] == s26)
|
||||
{
|
||||
if (s26 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq7 (s1, s2, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[5] == s25)
|
||||
{
|
||||
if (s25 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq6 (s1, s2, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[4] == s24)
|
||||
{
|
||||
if (s24 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq5 (s1, s2, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[3] == s23)
|
||||
{
|
||||
if (s23 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq4 (s1, s2, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[2] == s22)
|
||||
{
|
||||
if (s22 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq3 (s1, s2, s23, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[1] == s21)
|
||||
{
|
||||
if (s21 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
streq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (s1[0] == s20)
|
||||
{
|
||||
if (s20 == 0)
|
||||
return 1;
|
||||
else
|
||||
return streq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define STREQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
|
||||
streq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28)
|
||||
|
||||
#else
|
||||
|
||||
#define STREQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
|
||||
(strcmp (s1, s2) == 0)
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _GL_STREQ_H */
|
123
lib/strings.in.h
Normal file
123
lib/strings.in.h
Normal file
|
@ -0,0 +1,123 @@
|
|||
/* A substitute <strings.h>.
|
||||
|
||||
Copyright (C) 2007-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef _@GUARD_PREFIX@_STRINGS_H
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
@PRAGMA_SYSTEM_HEADER@
|
||||
#endif
|
||||
@PRAGMA_COLUMNS@
|
||||
|
||||
/* Minix 3.1.8 has a bug: <sys/types.h> must be included before <strings.h>.
|
||||
But avoid namespace pollution on glibc systems. */
|
||||
#if defined __minix && !defined __GLIBC__
|
||||
# include <sys/types.h>
|
||||
#endif
|
||||
|
||||
/* The include_next requires a split double-inclusion guard. */
|
||||
#if @HAVE_STRINGS_H@
|
||||
# @INCLUDE_NEXT@ @NEXT_STRINGS_H@
|
||||
#endif
|
||||
|
||||
#ifndef _@GUARD_PREFIX@_STRINGS_H
|
||||
#define _@GUARD_PREFIX@_STRINGS_H
|
||||
|
||||
#if ! @HAVE_DECL_STRNCASECMP@
|
||||
/* Get size_t. */
|
||||
# include <stddef.h>
|
||||
#endif
|
||||
|
||||
|
||||
/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */
|
||||
|
||||
/* The definition of _GL_ARG_NONNULL is copied here. */
|
||||
|
||||
/* The definition of _GL_WARN_ON_USE is copied here. */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Find the index of the least-significant set bit. */
|
||||
#if @GNULIB_FFS@
|
||||
# if !@HAVE_FFS@
|
||||
_GL_FUNCDECL_SYS (ffs, int, (int i));
|
||||
# endif
|
||||
_GL_CXXALIAS_SYS (ffs, int, (int i));
|
||||
_GL_CXXALIASWARN (ffs);
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef ffs
|
||||
# if HAVE_RAW_DECL_FFS
|
||||
_GL_WARN_ON_USE (ffs, "ffs is not portable - use the ffs module");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
|
||||
greater than zero if S1 is lexicographically less than, equal to or greater
|
||||
than S2.
|
||||
Note: This function does not work in multibyte locales. */
|
||||
#if ! @HAVE_STRCASECMP@
|
||||
extern int strcasecmp (char const *s1, char const *s2)
|
||||
_GL_ARG_NONNULL ((1, 2));
|
||||
#endif
|
||||
#if defined GNULIB_POSIXCHECK
|
||||
/* strcasecmp() does not work with multibyte strings:
|
||||
POSIX says that it operates on "strings", and "string" in POSIX is defined
|
||||
as a sequence of bytes, not of characters. */
|
||||
# undef strcasecmp
|
||||
# if HAVE_RAW_DECL_STRCASECMP
|
||||
_GL_WARN_ON_USE (strcasecmp, "strcasecmp cannot work correctly on character "
|
||||
"strings in multibyte locales - "
|
||||
"use mbscasecmp if you care about "
|
||||
"internationalization, or use c_strcasecmp , "
|
||||
"gnulib module c-strcase) if you want a locale "
|
||||
"independent function");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Compare no more than N bytes of strings S1 and S2, ignoring case,
|
||||
returning less than, equal to or greater than zero if S1 is
|
||||
lexicographically less than, equal to or greater than S2.
|
||||
Note: This function cannot work correctly in multibyte locales. */
|
||||
#if ! @HAVE_DECL_STRNCASECMP@
|
||||
extern int strncasecmp (char const *s1, char const *s2, size_t n)
|
||||
_GL_ARG_NONNULL ((1, 2));
|
||||
#endif
|
||||
#if defined GNULIB_POSIXCHECK
|
||||
/* strncasecmp() does not work with multibyte strings:
|
||||
POSIX says that it operates on "strings", and "string" in POSIX is defined
|
||||
as a sequence of bytes, not of characters. */
|
||||
# undef strncasecmp
|
||||
# if HAVE_RAW_DECL_STRNCASECMP
|
||||
_GL_WARN_ON_USE (strncasecmp, "strncasecmp cannot work correctly on character "
|
||||
"strings in multibyte locales - "
|
||||
"use mbsncasecmp or mbspcasecmp if you care about "
|
||||
"internationalization, or use c_strncasecmp , "
|
||||
"gnulib module c-strcase) if you want a locale "
|
||||
"independent function");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _@GUARD_PREFIX@_STRING_H */
|
||||
#endif /* _@GUARD_PREFIX@_STRING_H */
|
63
lib/strncasecmp.c
Normal file
63
lib/strncasecmp.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* strncasecmp.c -- case insensitive string comparator
|
||||
Copyright (C) 1998-1999, 2005-2007, 2009-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <string.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
|
||||
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
|
||||
|
||||
/* Compare no more than N bytes of strings S1 and S2, ignoring case,
|
||||
returning less than, equal to or greater than zero if S1 is
|
||||
lexicographically less than, equal to or greater than S2.
|
||||
Note: This function cannot work correctly in multibyte locales. */
|
||||
|
||||
int
|
||||
strncasecmp (const char *s1, const char *s2, size_t n)
|
||||
{
|
||||
register const unsigned char *p1 = (const unsigned char *) s1;
|
||||
register const unsigned char *p2 = (const unsigned char *) s2;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (p1 == p2 || n == 0)
|
||||
return 0;
|
||||
|
||||
do
|
||||
{
|
||||
c1 = TOLOWER (*p1);
|
||||
c2 = TOLOWER (*p2);
|
||||
|
||||
if (--n == 0 || c1 == '\0')
|
||||
break;
|
||||
|
||||
++p1;
|
||||
++p2;
|
||||
}
|
||||
while (c1 == c2);
|
||||
|
||||
if (UCHAR_MAX <= INT_MAX)
|
||||
return c1 - c2;
|
||||
else
|
||||
/* On machines where 'char' and 'int' are types of the same size, the
|
||||
difference of two 'unsigned char' values - including the sign bit -
|
||||
doesn't fit in an 'int'. */
|
||||
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
|
||||
}
|
53
lib/wcrtomb.c
Normal file
53
lib/wcrtomb.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* Convert wide character to multibyte character.
|
||||
Copyright (C) 2008-2012 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2008.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <wchar.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
size_t
|
||||
wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
|
||||
{
|
||||
/* This implementation of wcrtomb on top of wctomb() supports only
|
||||
stateless encodings. ps must be in the initial state. */
|
||||
if (ps != NULL && !mbsinit (ps))
|
||||
{
|
||||
errno = EINVAL;
|
||||
return (size_t)(-1);
|
||||
}
|
||||
|
||||
if (s == NULL)
|
||||
/* We know the NUL wide character corresponds to the NUL character. */
|
||||
return 1;
|
||||
else
|
||||
{
|
||||
int ret = wctomb (s, wc);
|
||||
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
else
|
||||
{
|
||||
errno = EILSEQ;
|
||||
return (size_t)(-1);
|
||||
}
|
||||
}
|
||||
}
|
499
lib/wctype.in.h
Normal file
499
lib/wctype.in.h
Normal file
|
@ -0,0 +1,499 @@
|
|||
/* A substitute for ISO C99 <wctype.h>, for platforms that lack it.
|
||||
|
||||
Copyright (C) 2006-2012 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
/* Written by Bruno Haible and Paul Eggert. */
|
||||
|
||||
/*
|
||||
* ISO C 99 <wctype.h> for platforms that lack it.
|
||||
* <http://www.opengroup.org/susv3xbd/wctype.h.html>
|
||||
*
|
||||
* iswctype, towctrans, towlower, towupper, wctrans, wctype,
|
||||
* wctrans_t, and wctype_t are not yet implemented.
|
||||
*/
|
||||
|
||||
#ifndef _@GUARD_PREFIX@_WCTYPE_H
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
@PRAGMA_SYSTEM_HEADER@
|
||||
#endif
|
||||
@PRAGMA_COLUMNS@
|
||||
|
||||
#if @HAVE_WINT_T@
|
||||
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.
|
||||
Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
|
||||
<wchar.h>.
|
||||
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
|
||||
included before <wchar.h>. */
|
||||
# include <stddef.h>
|
||||
# include <stdio.h>
|
||||
# include <time.h>
|
||||
# include <wchar.h>
|
||||
#endif
|
||||
|
||||
/* Include the original <wctype.h> if it exists.
|
||||
BeOS 5 has the functions but no <wctype.h>. */
|
||||
/* The include_next requires a split double-inclusion guard. */
|
||||
#if @HAVE_WCTYPE_H@
|
||||
# @INCLUDE_NEXT@ @NEXT_WCTYPE_H@
|
||||
#endif
|
||||
|
||||
#ifndef _@GUARD_PREFIX@_WCTYPE_H
|
||||
#define _@GUARD_PREFIX@_WCTYPE_H
|
||||
|
||||
/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */
|
||||
|
||||
/* The definition of _GL_WARN_ON_USE is copied here. */
|
||||
|
||||
/* Solaris 2.6 <wctype.h> includes <widec.h> which includes <euc.h> which
|
||||
#defines a number of identifiers in the application namespace. Revert
|
||||
these #defines. */
|
||||
#ifdef __sun
|
||||
# undef multibyte
|
||||
# undef eucw1
|
||||
# undef eucw2
|
||||
# undef eucw3
|
||||
# undef scrw1
|
||||
# undef scrw2
|
||||
# undef scrw3
|
||||
#endif
|
||||
|
||||
/* Define wint_t and WEOF. (Also done in wchar.in.h.) */
|
||||
#if !@HAVE_WINT_T@ && !defined wint_t
|
||||
# define wint_t int
|
||||
# ifndef WEOF
|
||||
# define WEOF -1
|
||||
# endif
|
||||
#else
|
||||
/* MSVC defines wint_t as 'unsigned short' in <crtdefs.h>.
|
||||
This is too small: ISO C 99 section 7.24.1.(2) says that wint_t must be
|
||||
"unchanged by default argument promotions". Override it. */
|
||||
# if defined _MSC_VER
|
||||
# if !GNULIB_defined_wint_t
|
||||
# include <crtdefs.h>
|
||||
typedef unsigned int rpl_wint_t;
|
||||
# undef wint_t
|
||||
# define wint_t rpl_wint_t
|
||||
# define GNULIB_defined_wint_t 1
|
||||
# endif
|
||||
# endif
|
||||
# ifndef WEOF
|
||||
# define WEOF ((wint_t) -1)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#if !GNULIB_defined_wctype_functions
|
||||
|
||||
/* FreeBSD 4.4 to 4.11 has <wctype.h> but lacks the functions.
|
||||
Linux libc5 has <wctype.h> and the functions but they are broken.
|
||||
Assume all 11 functions (all isw* except iswblank) are implemented the
|
||||
same way, or not at all. */
|
||||
# if ! @HAVE_ISWCNTRL@ || @REPLACE_ISWCNTRL@
|
||||
|
||||
/* IRIX 5.3 has macros but no functions, its isw* macros refer to an
|
||||
undefined variable _ctmp_ and to <ctype.h> macros like _P, and they
|
||||
refer to system functions like _iswctype that are not in the
|
||||
standard C library. Rather than try to get ancient buggy
|
||||
implementations like this to work, just disable them. */
|
||||
# undef iswalnum
|
||||
# undef iswalpha
|
||||
# undef iswblank
|
||||
# undef iswcntrl
|
||||
# undef iswdigit
|
||||
# undef iswgraph
|
||||
# undef iswlower
|
||||
# undef iswprint
|
||||
# undef iswpunct
|
||||
# undef iswspace
|
||||
# undef iswupper
|
||||
# undef iswxdigit
|
||||
# undef towlower
|
||||
# undef towupper
|
||||
|
||||
/* Linux libc5 has <wctype.h> and the functions but they are broken. */
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
|
||||
# define iswalnum rpl_iswalnum
|
||||
# define iswalpha rpl_iswalpha
|
||||
# define iswblank rpl_iswblank
|
||||
# define iswcntrl rpl_iswcntrl
|
||||
# define iswdigit rpl_iswdigit
|
||||
# define iswgraph rpl_iswgraph
|
||||
# define iswlower rpl_iswlower
|
||||
# define iswprint rpl_iswprint
|
||||
# define iswpunct rpl_iswpunct
|
||||
# define iswspace rpl_iswspace
|
||||
# define iswupper rpl_iswupper
|
||||
# define iswxdigit rpl_iswxdigit
|
||||
# endif
|
||||
# endif
|
||||
# if @REPLACE_TOWLOWER@
|
||||
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
|
||||
# define towlower rpl_towlower
|
||||
# define towupper rpl_towupper
|
||||
# endif
|
||||
# endif
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswalnum
|
||||
# else
|
||||
iswalnum
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return ((wc >= '0' && wc <= '9')
|
||||
|| ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z'));
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswalpha
|
||||
# else
|
||||
iswalpha
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return (wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswblank
|
||||
# else
|
||||
iswblank
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return wc == ' ' || wc == '\t';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswcntrl
|
||||
# else
|
||||
iswcntrl
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return (wc & ~0x1f) == 0 || wc == 0x7f;
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswdigit
|
||||
# else
|
||||
iswdigit
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return wc >= '0' && wc <= '9';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswgraph
|
||||
# else
|
||||
iswgraph
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return wc >= '!' && wc <= '~';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswlower
|
||||
# else
|
||||
iswlower
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return wc >= 'a' && wc <= 'z';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswprint
|
||||
# else
|
||||
iswprint
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return wc >= ' ' && wc <= '~';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswpunct
|
||||
# else
|
||||
iswpunct
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return (wc >= '!' && wc <= '~'
|
||||
&& !((wc >= '0' && wc <= '9')
|
||||
|| ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z')));
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswspace
|
||||
# else
|
||||
iswspace
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return (wc == ' ' || wc == '\t'
|
||||
|| wc == '\n' || wc == '\v' || wc == '\f' || wc == '\r');
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswupper
|
||||
# else
|
||||
iswupper
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return wc >= 'A' && wc <= 'Z';
|
||||
}
|
||||
|
||||
static inline int
|
||||
# if @REPLACE_ISWCNTRL@
|
||||
rpl_iswxdigit
|
||||
# else
|
||||
iswxdigit
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return ((wc >= '0' && wc <= '9')
|
||||
|| ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'F'));
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
# if @REPLACE_TOWLOWER@
|
||||
rpl_towlower
|
||||
# else
|
||||
towlower
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return (wc >= 'A' && wc <= 'Z' ? wc - 'A' + 'a' : wc);
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
# if @REPLACE_TOWLOWER@
|
||||
rpl_towupper
|
||||
# else
|
||||
towupper
|
||||
# endif
|
||||
(wint_t wc)
|
||||
{
|
||||
return (wc >= 'a' && wc <= 'z' ? wc - 'a' + 'A' : wc);
|
||||
}
|
||||
|
||||
# elif @GNULIB_ISWBLANK@ && (! @HAVE_ISWBLANK@ || @REPLACE_ISWBLANK@)
|
||||
/* Only the iswblank function is missing. */
|
||||
|
||||
# if @REPLACE_ISWBLANK@
|
||||
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
|
||||
# define iswblank rpl_iswblank
|
||||
# endif
|
||||
_GL_FUNCDECL_RPL (iswblank, int, (wint_t wc));
|
||||
# else
|
||||
_GL_FUNCDECL_SYS (iswblank, int, (wint_t wc));
|
||||
# endif
|
||||
|
||||
# endif
|
||||
|
||||
# if defined __MINGW32__
|
||||
|
||||
/* On native Windows, wchar_t is uint16_t, and wint_t is uint32_t.
|
||||
The functions towlower and towupper are implemented in the MSVCRT library
|
||||
to take a wchar_t argument and return a wchar_t result. mingw declares
|
||||
these functions to take a wint_t argument and return a wint_t result.
|
||||
This means that:
|
||||
1. When the user passes an argument outside the range 0x0000..0xFFFF, the
|
||||
function will look only at the lower 16 bits. This is allowed according
|
||||
to POSIX.
|
||||
2. The return value is returned in the lower 16 bits of the result register.
|
||||
The upper 16 bits are random: whatever happened to be in that part of the
|
||||
result register. We need to fix this by adding a zero-extend from
|
||||
wchar_t to wint_t after the call. */
|
||||
|
||||
static inline wint_t
|
||||
rpl_towlower (wint_t wc)
|
||||
{
|
||||
return (wint_t) (wchar_t) towlower (wc);
|
||||
}
|
||||
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
|
||||
# define towlower rpl_towlower
|
||||
# endif
|
||||
|
||||
static inline wint_t
|
||||
rpl_towupper (wint_t wc)
|
||||
{
|
||||
return (wint_t) (wchar_t) towupper (wc);
|
||||
}
|
||||
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
|
||||
# define towupper rpl_towupper
|
||||
# endif
|
||||
|
||||
# endif /* __MINGW32__ */
|
||||
|
||||
# define GNULIB_defined_wctype_functions 1
|
||||
#endif
|
||||
|
||||
#if @REPLACE_ISWCNTRL@
|
||||
_GL_CXXALIAS_RPL (iswalnum, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswalpha, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswcntrl, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswdigit, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswgraph, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswlower, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswprint, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswpunct, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswspace, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswupper, int, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (iswxdigit, int, (wint_t wc));
|
||||
#else
|
||||
_GL_CXXALIAS_SYS (iswalnum, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswalpha, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswcntrl, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswdigit, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswgraph, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswlower, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswprint, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswpunct, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswspace, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswupper, int, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (iswxdigit, int, (wint_t wc));
|
||||
#endif
|
||||
_GL_CXXALIASWARN (iswalnum);
|
||||
_GL_CXXALIASWARN (iswalpha);
|
||||
_GL_CXXALIASWARN (iswcntrl);
|
||||
_GL_CXXALIASWARN (iswdigit);
|
||||
_GL_CXXALIASWARN (iswgraph);
|
||||
_GL_CXXALIASWARN (iswlower);
|
||||
_GL_CXXALIASWARN (iswprint);
|
||||
_GL_CXXALIASWARN (iswpunct);
|
||||
_GL_CXXALIASWARN (iswspace);
|
||||
_GL_CXXALIASWARN (iswupper);
|
||||
_GL_CXXALIASWARN (iswxdigit);
|
||||
|
||||
#if @GNULIB_ISWBLANK@
|
||||
# if @REPLACE_ISWCNTRL@ || @REPLACE_ISWBLANK@
|
||||
_GL_CXXALIAS_RPL (iswblank, int, (wint_t wc));
|
||||
# else
|
||||
_GL_CXXALIAS_SYS (iswblank, int, (wint_t wc));
|
||||
# endif
|
||||
_GL_CXXALIASWARN (iswblank);
|
||||
#endif
|
||||
|
||||
#if !@HAVE_WCTYPE_T@
|
||||
# if !GNULIB_defined_wctype_t
|
||||
typedef void * wctype_t;
|
||||
# define GNULIB_defined_wctype_t 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Get a descriptor for a wide character property. */
|
||||
#if @GNULIB_WCTYPE@
|
||||
# if !@HAVE_WCTYPE_T@
|
||||
_GL_FUNCDECL_SYS (wctype, wctype_t, (const char *name));
|
||||
# endif
|
||||
_GL_CXXALIAS_SYS (wctype, wctype_t, (const char *name));
|
||||
_GL_CXXALIASWARN (wctype);
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef wctype
|
||||
# if HAVE_RAW_DECL_WCTYPE
|
||||
_GL_WARN_ON_USE (wctype, "wctype is unportable - "
|
||||
"use gnulib module wctype for portability");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Test whether a wide character has a given property.
|
||||
The argument WC must be either a wchar_t value or WEOF.
|
||||
The argument DESC must have been returned by the wctype() function. */
|
||||
#if @GNULIB_ISWCTYPE@
|
||||
# if !@HAVE_WCTYPE_T@
|
||||
_GL_FUNCDECL_SYS (iswctype, int, (wint_t wc, wctype_t desc));
|
||||
# endif
|
||||
_GL_CXXALIAS_SYS (iswctype, int, (wint_t wc, wctype_t desc));
|
||||
_GL_CXXALIASWARN (iswctype);
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef iswctype
|
||||
# if HAVE_RAW_DECL_ISWCTYPE
|
||||
_GL_WARN_ON_USE (iswctype, "iswctype is unportable - "
|
||||
"use gnulib module iswctype for portability");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if @REPLACE_TOWLOWER@ || defined __MINGW32__
|
||||
_GL_CXXALIAS_RPL (towlower, wint_t, (wint_t wc));
|
||||
_GL_CXXALIAS_RPL (towupper, wint_t, (wint_t wc));
|
||||
#else
|
||||
_GL_CXXALIAS_SYS (towlower, wint_t, (wint_t wc));
|
||||
_GL_CXXALIAS_SYS (towupper, wint_t, (wint_t wc));
|
||||
#endif
|
||||
_GL_CXXALIASWARN (towlower);
|
||||
_GL_CXXALIASWARN (towupper);
|
||||
|
||||
#if !@HAVE_WCTRANS_T@
|
||||
# if !GNULIB_defined_wctrans_t
|
||||
typedef void * wctrans_t;
|
||||
# define GNULIB_defined_wctrans_t 1
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Get a descriptor for a wide character case conversion. */
|
||||
#if @GNULIB_WCTRANS@
|
||||
# if !@HAVE_WCTRANS_T@
|
||||
_GL_FUNCDECL_SYS (wctrans, wctrans_t, (const char *name));
|
||||
# endif
|
||||
_GL_CXXALIAS_SYS (wctrans, wctrans_t, (const char *name));
|
||||
_GL_CXXALIASWARN (wctrans);
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef wctrans
|
||||
# if HAVE_RAW_DECL_WCTRANS
|
||||
_GL_WARN_ON_USE (wctrans, "wctrans is unportable - "
|
||||
"use gnulib module wctrans for portability");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Perform a given case conversion on a wide character.
|
||||
The argument WC must be either a wchar_t value or WEOF.
|
||||
The argument DESC must have been returned by the wctrans() function. */
|
||||
#if @GNULIB_TOWCTRANS@
|
||||
# if !@HAVE_WCTRANS_T@
|
||||
_GL_FUNCDECL_SYS (towctrans, wint_t, (wint_t wc, wctrans_t desc));
|
||||
# endif
|
||||
_GL_CXXALIAS_SYS (towctrans, wint_t, (wint_t wc, wctrans_t desc));
|
||||
_GL_CXXALIASWARN (towctrans);
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef towctrans
|
||||
# if HAVE_RAW_DECL_TOWCTRANS
|
||||
_GL_WARN_ON_USE (towctrans, "towctrans is unportable - "
|
||||
"use gnulib module towctrans for portability");
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _@GUARD_PREFIX@_WCTYPE_H */
|
||||
#endif /* _@GUARD_PREFIX@_WCTYPE_H */
|
Loading…
Add table
Add a link
Reference in a new issue