1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

Use Gnulib's `regex' module.

This should help with regex portability, as reported in
<http://bugs.gnu.org/10684> for Darwin 8.11.

* m4/gnulib-cache.m4 (gl_MODULES): Add `regex'.

* configure.ac: Remove header checks for regex.h, rxposix.h, and
  rx/rxposix.h.  Remove check for the `regcomp' function.  Remove
  definition of `HAVE_REGCOMP'.  Define `ENABLE_REGEX'.

* libguile/init.c: Check for `ENABLE_REGEX' instead of `HAVE_REGCOMP'.

* libguile/regex-posix.c: Always include <regex.h>.  Remove #ifdefs for
  rxposix.h and co.
This commit is contained in:
Ludovic Courtès 2012-02-03 10:51:46 +01:00
parent 1ba05158eb
commit eb4a14ed47
40 changed files with 16040 additions and 43 deletions

View file

@ -5,7 +5,7 @@ dnl
define(GUILE_CONFIGURE_COPYRIGHT,[[
Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of GUILE
@ -656,7 +656,7 @@ AC_SUBST([SCM_I_GSC_HAVE_STRUCT_DIRENT64])
# sched.h - missing on MinGW
#
AC_CHECK_HEADERS([complex.h fenv.h io.h libc.h limits.h memory.h process.h string.h \
regex.h rxposix.h rx/rxposix.h sys/dir.h sys/ioctl.h sys/select.h \
sys/dir.h sys/ioctl.h sys/select.h \
sys/time.h sys/timeb.h sys/times.h sys/stdtypes.h sys/types.h \
sys/utime.h time.h unistd.h utime.h pwd.h grp.h sys/utsname.h \
direct.h langinfo.h nl_types.h machine/fpu.h poll.h sched.h])
@ -1112,24 +1112,8 @@ if test $guile_cv_localtime_cache = yes; then
fi
if test "$enable_regex" = yes; then
if test "$ac_cv_header_regex_h" = yes ||
test "$ac_cv_header_rxposix_h" = yes ||
test "$ac_cv_header_rx_rxposix_h" = yes; then
GUILE_NAMED_CHECK_FUNC(regcomp, norx, [AC_LIBOBJ([regex-posix])],
[AC_CHECK_LIB(rx, main)
GUILE_NAMED_CHECK_FUNC(regcomp, rx, [AC_LIBOBJ([regex-posix])],
[AC_CHECK_LIB(regex, main)
GUILE_NAMED_CHECK_FUNC(regcomp, regex, [AC_LIBOBJ([regex-posix])])])]
)
dnl The following should not be necessary, but for some reason
dnl autoheader misses it if we don't include it!
if test "$ac_cv_func_regcomp_norx" = yes ||
test "$ac_cv_func_regcomp_regex" = yes ||
test "$ac_cv_func_regcomp_rx" = yes; then
AC_DEFINE([HAVE_REGCOMP], 1,
[This is included as part of a workaround for a autoheader bug.])
fi
fi
AC_LIBOBJ([regex-posix])
AC_DEFINE([ENABLE_REGEX], 1, [Define when regex support is enabled.])
fi
AC_REPLACE_FUNCS([strerror memmove mkstemp])

View file

@ -21,7 +21,7 @@
# the same distribution terms as the rest of that program.
#
# Generated by gnulib-tool.
# Reproduce by: gnulib-tool --import --dir=. --local-dir=gnulib-local --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl=3 --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alignof alloca-opt announce-gen autobuild bind byteswap canonicalize-lgpl ceil close connect dirfd duplocale environ extensions flock floor fpieee frexp full-read full-write func gendocs getaddrinfo getpeername getsockname getsockopt git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload havelib iconv_open-utf inet_ntop inet_pton isinf isnan ldexp lib-symbol-versions lib-symbol-visibility libunistring listen localcharset locale log1p maintainer-makefile malloc-gnu malloca nproc open pipe2 putenv recv recvfrom rename send sendto setenv setsockopt shutdown socket stat-time stdlib strftime striconveh string sys_stat trunc verify vsnprintf warnings wchar
# Reproduce by: gnulib-tool --import --dir=. --local-dir=gnulib-local --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl=3 --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alignof alloca-opt announce-gen autobuild bind byteswap canonicalize-lgpl ceil close connect dirfd duplocale environ extensions flock floor fpieee frexp full-read full-write func gendocs getaddrinfo getpeername getsockname getsockopt git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload havelib iconv_open-utf inet_ntop inet_pton isinf isnan ldexp lib-symbol-versions lib-symbol-visibility libunistring listen localcharset locale log1p maintainer-makefile malloc-gnu malloca nproc open pipe2 putenv recv recvfrom regex rename send sendto setenv setsockopt shutdown socket stat-time stdlib strftime striconveh string sys_stat trunc verify vsnprintf warnings wchar
AUTOMAKE_OPTIONS = 1.5 gnits subdir-objects
@ -165,6 +165,15 @@ EXTRA_libgnu_la_SOURCES += bind.c
## end gnulib module bind
## begin gnulib module btowc
EXTRA_DIST += btowc.c
EXTRA_libgnu_la_SOURCES += btowc.c
## end gnulib module btowc
## begin gnulib module byteswap
BUILT_SOURCES += $(BYTESWAP_H)
@ -755,6 +764,39 @@ EXTRA_libgnu_la_SOURCES += isnan.c isnanl.c
## end gnulib module isnanl
## begin gnulib module langinfo
BUILT_SOURCES += langinfo.h
# We need the following in order to create an empty placeholder for
# <langinfo.h> when the system doesn't have one.
langinfo.h: langinfo.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H)
$(AM_V_GEN)rm -f $@-t $@ && \
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
sed -e 's|@''GUARD_PREFIX''@|GL|g' \
-e 's|@''HAVE_LANGINFO_H''@|$(HAVE_LANGINFO_H)|g' \
-e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
-e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
-e 's|@''NEXT_LANGINFO_H''@|$(NEXT_LANGINFO_H)|g' \
-e 's/@''GNULIB_NL_LANGINFO''@/$(GNULIB_NL_LANGINFO)/g' \
-e 's|@''HAVE_LANGINFO_CODESET''@|$(HAVE_LANGINFO_CODESET)|g' \
-e 's|@''HAVE_LANGINFO_T_FMT_AMPM''@|$(HAVE_LANGINFO_T_FMT_AMPM)|g' \
-e 's|@''HAVE_LANGINFO_ERA''@|$(HAVE_LANGINFO_ERA)|g' \
-e 's|@''HAVE_LANGINFO_YESEXPR''@|$(HAVE_LANGINFO_YESEXPR)|g' \
-e 's|@''HAVE_NL_LANGINFO''@|$(HAVE_NL_LANGINFO)|g' \
-e 's|@''REPLACE_NL_LANGINFO''@|$(REPLACE_NL_LANGINFO)|g' \
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
-e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \
< $(srcdir)/langinfo.in.h; \
} > $@-t && \
mv $@-t $@
MOSTLYCLEANFILES += langinfo.h langinfo.h-t
EXTRA_DIST += langinfo.in.h
## end gnulib module langinfo
## begin gnulib module lib-symbol-visibility
# The value of $(CFLAG_VISIBILITY) needs to be added to the CFLAGS for the
@ -1106,6 +1148,33 @@ EXTRA_DIST += math.in.h
## end gnulib module math
## begin gnulib module mbrtowc
EXTRA_DIST += mbrtowc.c
EXTRA_libgnu_la_SOURCES += mbrtowc.c
## end gnulib module mbrtowc
## begin gnulib module mbsinit
EXTRA_DIST += mbsinit.c
EXTRA_libgnu_la_SOURCES += mbsinit.c
## end gnulib module mbsinit
## begin gnulib module mbtowc
EXTRA_DIST += mbtowc-impl.h mbtowc.c
EXTRA_libgnu_la_SOURCES += mbtowc.c
## end gnulib module mbtowc
## begin gnulib module memchr
@ -1198,6 +1267,15 @@ EXTRA_DIST += netinet_in.in.h
## end gnulib module netinet_in
## begin gnulib module nl_langinfo
EXTRA_DIST += nl_langinfo.c
EXTRA_libgnu_la_SOURCES += nl_langinfo.c
## end gnulib module nl_langinfo
## begin gnulib module nproc
libgnu_la_SOURCES += nproc.c
@ -1282,6 +1360,15 @@ EXTRA_libgnu_la_SOURCES += recvfrom.c
## end gnulib module recvfrom
## begin gnulib module regex
EXTRA_DIST += regcomp.c regex.c regex.h regex_internal.c regex_internal.h regexec.c
EXTRA_libgnu_la_SOURCES += regcomp.c regex.c regex_internal.c regexec.c
## end gnulib module regex
## begin gnulib module rename
@ -1921,6 +2008,22 @@ EXTRA_DIST += stdlib.in.h
## end gnulib module stdlib
## begin gnulib module strcase
EXTRA_DIST += strcasecmp.c strncasecmp.c
EXTRA_libgnu_la_SOURCES += strcasecmp.c strncasecmp.c
## end gnulib module strcase
## begin gnulib module streq
EXTRA_DIST += streq.h
## end gnulib module streq
## begin gnulib module strftime
libgnu_la_SOURCES += strftime.c
@ -2040,6 +2143,37 @@ EXTRA_DIST += string.in.h
## end gnulib module string
## begin gnulib module strings
BUILT_SOURCES += strings.h
# We need the following in order to create <strings.h> when the system
# doesn't have one that works with the given compiler.
strings.h: strings.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H) $(ARG_NONNULL_H)
$(AM_V_GEN)rm -f $@-t $@ && \
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
sed -e 's|@''GUARD_PREFIX''@|GL|g' \
-e 's|@''HAVE_STRINGS_H''@|$(HAVE_STRINGS_H)|g' \
-e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
-e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
-e 's|@''NEXT_STRINGS_H''@|$(NEXT_STRINGS_H)|g' \
-e 's|@''GNULIB_FFS''@|$(GNULIB_FFS)|g' \
-e 's|@''HAVE_FFS''@|$(HAVE_FFS)|g' \
-e 's|@''HAVE_STRCASECMP''@|$(HAVE_STRCASECMP)|g' \
-e 's|@''HAVE_DECL_STRNCASECMP''@|$(HAVE_DECL_STRNCASECMP)|g' \
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
-e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \
-e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \
< $(srcdir)/strings.in.h; \
} > $@-t && \
mv $@-t $@
MOSTLYCLEANFILES += strings.h strings.h-t
EXTRA_DIST += strings.in.h
## end gnulib module strings
## begin gnulib module sys_file
BUILT_SOURCES += sys/file.h
@ -2704,6 +2838,54 @@ EXTRA_DIST += wchar.in.h
## end gnulib module wchar
## begin gnulib module wcrtomb
EXTRA_DIST += wcrtomb.c
EXTRA_libgnu_la_SOURCES += wcrtomb.c
## end gnulib module wcrtomb
## begin gnulib module wctype-h
BUILT_SOURCES += wctype.h
# We need the following in order to create <wctype.h> when the system
# doesn't have one that works with the given compiler.
wctype.h: wctype.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(WARN_ON_USE_H)
$(AM_V_GEN)rm -f $@-t $@ && \
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
sed -e 's|@''GUARD_PREFIX''@|GL|g' \
-e 's/@''HAVE_WCTYPE_H''@/$(HAVE_WCTYPE_H)/g' \
-e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
-e 's|@''PRAGMA_COLUMNS''@|@PRAGMA_COLUMNS@|g' \
-e 's|@''NEXT_WCTYPE_H''@|$(NEXT_WCTYPE_H)|g' \
-e 's/@''GNULIB_ISWBLANK''@/$(GNULIB_ISWBLANK)/g' \
-e 's/@''GNULIB_WCTYPE''@/$(GNULIB_WCTYPE)/g' \
-e 's/@''GNULIB_ISWCTYPE''@/$(GNULIB_ISWCTYPE)/g' \
-e 's/@''GNULIB_WCTRANS''@/$(GNULIB_WCTRANS)/g' \
-e 's/@''GNULIB_TOWCTRANS''@/$(GNULIB_TOWCTRANS)/g' \
-e 's/@''HAVE_ISWBLANK''@/$(HAVE_ISWBLANK)/g' \
-e 's/@''HAVE_ISWCNTRL''@/$(HAVE_ISWCNTRL)/g' \
-e 's/@''HAVE_WCTYPE_T''@/$(HAVE_WCTYPE_T)/g' \
-e 's/@''HAVE_WCTRANS_T''@/$(HAVE_WCTRANS_T)/g' \
-e 's/@''HAVE_WINT_T''@/$(HAVE_WINT_T)/g' \
-e 's/@''REPLACE_ISWBLANK''@/$(REPLACE_ISWBLANK)/g' \
-e 's/@''REPLACE_ISWCNTRL''@/$(REPLACE_ISWCNTRL)/g' \
-e 's/@''REPLACE_TOWLOWER''@/$(REPLACE_TOWLOWER)/g' \
-e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
-e '/definition of _GL_WARN_ON_USE/r $(WARN_ON_USE_H)' \
< $(srcdir)/wctype.in.h; \
} > $@-t && \
mv $@-t $@
MOSTLYCLEANFILES += wctype.h wctype.h-t
EXTRA_DIST += wctype.in.h
## end gnulib module wctype-h
## begin gnulib module write

39
lib/btowc.c Normal file
View file

@ -0,0 +1,39 @@
/* Convert unibyte character to wide character.
Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2008.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include <wchar.h>
#include <stdio.h>
#include <stdlib.h>
wint_t
btowc (int c)
{
if (c != EOF)
{
char buf[1];
wchar_t wc;
buf[0] = c;
if (mbtowc (&wc, buf, 1) >= 0)
return wc;
}
return WEOF;
}

177
lib/langinfo.in.h Normal file
View file

@ -0,0 +1,177 @@
/* Substitute for and wrapper around <langinfo.h>.
Copyright (C) 2009-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/*
* POSIX <langinfo.h> for platforms that lack it or have an incomplete one.
* <http://www.opengroup.org/onlinepubs/9699919799/basedefs/langinfo.h.html>
*/
#ifndef _@GUARD_PREFIX@_LANGINFO_H
#if __GNUC__ >= 3
@PRAGMA_SYSTEM_HEADER@
#endif
@PRAGMA_COLUMNS@
/* The include_next requires a split double-inclusion guard. */
#if @HAVE_LANGINFO_H@
# @INCLUDE_NEXT@ @NEXT_LANGINFO_H@
#endif
#ifndef _@GUARD_PREFIX@_LANGINFO_H
#define _@GUARD_PREFIX@_LANGINFO_H
#if !@HAVE_LANGINFO_H@
/* A platform that lacks <langinfo.h>. */
/* Assume that it also lacks <nl_types.h> and the nl_item type. */
# if !GNULIB_defined_nl_item
typedef int nl_item;
# define GNULIB_defined_nl_item 1
# endif
/* nl_langinfo items of the LC_CTYPE category */
# define CODESET 10000
/* nl_langinfo items of the LC_NUMERIC category */
# define RADIXCHAR 10001
# define THOUSEP 10002
/* nl_langinfo items of the LC_TIME category */
# define D_T_FMT 10003
# define D_FMT 10004
# define T_FMT 10005
# define T_FMT_AMPM 10006
# define AM_STR 10007
# define PM_STR 10008
# define DAY_1 10009
# define DAY_2 (DAY_1 + 1)
# define DAY_3 (DAY_1 + 2)
# define DAY_4 (DAY_1 + 3)
# define DAY_5 (DAY_1 + 4)
# define DAY_6 (DAY_1 + 5)
# define DAY_7 (DAY_1 + 6)
# define ABDAY_1 10016
# define ABDAY_2 (ABDAY_1 + 1)
# define ABDAY_3 (ABDAY_1 + 2)
# define ABDAY_4 (ABDAY_1 + 3)
# define ABDAY_5 (ABDAY_1 + 4)
# define ABDAY_6 (ABDAY_1 + 5)
# define ABDAY_7 (ABDAY_1 + 6)
# define MON_1 10023
# define MON_2 (MON_1 + 1)
# define MON_3 (MON_1 + 2)
# define MON_4 (MON_1 + 3)
# define MON_5 (MON_1 + 4)
# define MON_6 (MON_1 + 5)
# define MON_7 (MON_1 + 6)
# define MON_8 (MON_1 + 7)
# define MON_9 (MON_1 + 8)
# define MON_10 (MON_1 + 9)
# define MON_11 (MON_1 + 10)
# define MON_12 (MON_1 + 11)
# define ABMON_1 10035
# define ABMON_2 (ABMON_1 + 1)
# define ABMON_3 (ABMON_1 + 2)
# define ABMON_4 (ABMON_1 + 3)
# define ABMON_5 (ABMON_1 + 4)
# define ABMON_6 (ABMON_1 + 5)
# define ABMON_7 (ABMON_1 + 6)
# define ABMON_8 (ABMON_1 + 7)
# define ABMON_9 (ABMON_1 + 8)
# define ABMON_10 (ABMON_1 + 9)
# define ABMON_11 (ABMON_1 + 10)
# define ABMON_12 (ABMON_1 + 11)
# define ERA 10047
# define ERA_D_FMT 10048
# define ERA_D_T_FMT 10049
# define ERA_T_FMT 10050
# define ALT_DIGITS 10051
/* nl_langinfo items of the LC_MONETARY category */
# define CRNCYSTR 10052
/* nl_langinfo items of the LC_MESSAGES category */
# define YESEXPR 10053
# define NOEXPR 10054
#else
/* A platform that has <langinfo.h>. */
# if !@HAVE_LANGINFO_CODESET@
# define CODESET 10000
# define GNULIB_defined_CODESET 1
# endif
# if !@HAVE_LANGINFO_T_FMT_AMPM@
# define T_FMT_AMPM 10006
# define GNULIB_defined_T_FMT_AMPM 1
# endif
# if !@HAVE_LANGINFO_ERA@
# define ERA 10047
# define ERA_D_FMT 10048
# define ERA_D_T_FMT 10049
# define ERA_T_FMT 10050
# define ALT_DIGITS 10051
# define GNULIB_defined_ERA 1
# endif
# if !@HAVE_LANGINFO_YESEXPR@
# define YESEXPR 10053
# define NOEXPR 10054
# define GNULIB_defined_YESEXPR 1
# endif
#endif
/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */
/* The definition of _GL_WARN_ON_USE is copied here. */
/* Declare overridden functions. */
/* Return a piece of locale dependent information.
Note: The difference between nl_langinfo (CODESET) and locale_charset ()
is that the latter normalizes the encoding names to GNU conventions. */
#if @GNULIB_NL_LANGINFO@
# if @REPLACE_NL_LANGINFO@
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
# undef nl_langinfo
# define nl_langinfo rpl_nl_langinfo
# endif
_GL_FUNCDECL_RPL (nl_langinfo, char *, (nl_item item));
_GL_CXXALIAS_RPL (nl_langinfo, char *, (nl_item item));
# else
# if !@HAVE_NL_LANGINFO@
_GL_FUNCDECL_SYS (nl_langinfo, char *, (nl_item item));
# endif
_GL_CXXALIAS_SYS (nl_langinfo, char *, (nl_item item));
# endif
_GL_CXXALIASWARN (nl_langinfo);
#elif defined GNULIB_POSIXCHECK
# undef nl_langinfo
# if HAVE_RAW_DECL_NL_LANGINFO
_GL_WARN_ON_USE (nl_langinfo, "nl_langinfo is not portable - "
"use gnulib module nl_langinfo for portability");
# endif
#endif
#endif /* _@GUARD_PREFIX@_LANGINFO_H */
#endif /* _@GUARD_PREFIX@_LANGINFO_H */

396
lib/mbrtowc.c Normal file
View file

@ -0,0 +1,396 @@
/* Convert multibyte character to wide character.
Copyright (C) 1999-2002, 2005-2012 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2008.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include <wchar.h>
#if GNULIB_defined_mbstate_t
/* Implement mbrtowc() on top of mbtowc(). */
# include <errno.h>
# include <stdlib.h>
# include "localcharset.h"
# include "streq.h"
# include "verify.h"
verify (sizeof (mbstate_t) >= 4);
static char internal_state[4];
size_t
mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
char *pstate = (char *)ps;
if (s == NULL)
{
pwc = NULL;
s = "";
n = 1;
}
if (n == 0)
return (size_t)(-2);
/* Here n > 0. */
if (pstate == NULL)
pstate = internal_state;
{
size_t nstate = pstate[0];
char buf[4];
const char *p;
size_t m;
switch (nstate)
{
case 0:
p = s;
m = n;
break;
case 3:
buf[2] = pstate[3];
/*FALLTHROUGH*/
case 2:
buf[1] = pstate[2];
/*FALLTHROUGH*/
case 1:
buf[0] = pstate[1];
p = buf;
m = nstate;
buf[m++] = s[0];
if (n >= 2 && m < 4)
{
buf[m++] = s[1];
if (n >= 3 && m < 4)
buf[m++] = s[2];
}
break;
default:
errno = EINVAL;
return (size_t)(-1);
}
/* Here m > 0. */
# if __GLIBC__ || defined __UCLIBC__
/* Work around bug <http://sourceware.org/bugzilla/show_bug.cgi?id=9674> */
mbtowc (NULL, NULL, 0);
# endif
{
int res = mbtowc (pwc, p, m);
if (res >= 0)
{
if (pwc != NULL && ((*pwc == 0) != (res == 0)))
abort ();
if (nstate >= (res > 0 ? res : 1))
abort ();
res -= nstate;
pstate[0] = 0;
return res;
}
/* mbtowc does not distinguish between invalid and incomplete multibyte
sequences. But mbrtowc needs to make this distinction.
There are two possible approaches:
- Use iconv() and its return value.
- Use built-in knowledge about the possible encodings.
Given the low quality of implementation of iconv() on the systems that
lack mbrtowc(), we use the second approach.
The possible encodings are:
- 8-bit encodings,
- EUC-JP, EUC-KR, GB2312, EUC-TW, BIG5, GB18030, SJIS,
- UTF-8.
Use specialized code for each. */
if (m >= 4 || m >= MB_CUR_MAX)
goto invalid;
/* Here MB_CUR_MAX > 1 and 0 < m < 4. */
{
const char *encoding = locale_charset ();
if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
{
/* Cf. unistr/u8-mblen.c. */
unsigned char c = (unsigned char) p[0];
if (c >= 0xc2)
{
if (c < 0xe0)
{
if (m == 1)
goto incomplete;
}
else if (c < 0xf0)
{
if (m == 1)
goto incomplete;
if (m == 2)
{
unsigned char c2 = (unsigned char) p[1];
if ((c2 ^ 0x80) < 0x40
&& (c >= 0xe1 || c2 >= 0xa0)
&& (c != 0xed || c2 < 0xa0))
goto incomplete;
}
}
else if (c <= 0xf4)
{
if (m == 1)
goto incomplete;
else /* m == 2 || m == 3 */
{
unsigned char c2 = (unsigned char) p[1];
if ((c2 ^ 0x80) < 0x40
&& (c >= 0xf1 || c2 >= 0x90)
&& (c < 0xf4 || (c == 0xf4 && c2 < 0x90)))
{
if (m == 2)
goto incomplete;
else /* m == 3 */
{
unsigned char c3 = (unsigned char) p[2];
if ((c3 ^ 0x80) < 0x40)
goto incomplete;
}
}
}
}
}
goto invalid;
}
/* As a reference for this code, you can use the GNU libiconv
implementation. Look for uses of the RET_TOOFEW macro. */
if (STREQ (encoding, "EUC-JP", 'E', 'U', 'C', '-', 'J', 'P', 0, 0, 0))
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0xa1 && c < 0xff) || c == 0x8e || c == 0x8f)
goto incomplete;
}
if (m == 2)
{
unsigned char c = (unsigned char) p[0];
if (c == 0x8f)
{
unsigned char c2 = (unsigned char) p[1];
if (c2 >= 0xa1 && c2 < 0xff)
goto incomplete;
}
}
goto invalid;
}
if (STREQ (encoding, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
|| STREQ (encoding, "GB2312", 'G', 'B', '2', '3', '1', '2', 0, 0, 0)
|| STREQ (encoding, "BIG5", 'B', 'I', 'G', '5', 0, 0, 0, 0, 0))
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if (c >= 0xa1 && c < 0xff)
goto incomplete;
}
goto invalid;
}
if (STREQ (encoding, "EUC-TW", 'E', 'U', 'C', '-', 'T', 'W', 0, 0, 0))
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0xa1 && c < 0xff) || c == 0x8e)
goto incomplete;
}
else /* m == 2 || m == 3 */
{
unsigned char c = (unsigned char) p[0];
if (c == 0x8e)
goto incomplete;
}
goto invalid;
}
if (STREQ (encoding, "GB18030", 'G', 'B', '1', '8', '0', '3', '0', 0, 0))
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0x90 && c <= 0xe3) || (c >= 0xf8 && c <= 0xfe))
goto incomplete;
}
else /* m == 2 || m == 3 */
{
unsigned char c = (unsigned char) p[0];
if (c >= 0x90 && c <= 0xe3)
{
unsigned char c2 = (unsigned char) p[1];
if (c2 >= 0x30 && c2 <= 0x39)
{
if (m == 2)
goto incomplete;
else /* m == 3 */
{
unsigned char c3 = (unsigned char) p[2];
if (c3 >= 0x81 && c3 <= 0xfe)
goto incomplete;
}
}
}
}
goto invalid;
}
if (STREQ (encoding, "SJIS", 'S', 'J', 'I', 'S', 0, 0, 0, 0, 0))
{
if (m == 1)
{
unsigned char c = (unsigned char) p[0];
if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)
|| (c >= 0xf0 && c <= 0xf9))
goto incomplete;
}
goto invalid;
}
/* An unknown multibyte encoding. */
goto incomplete;
}
incomplete:
{
size_t k = nstate;
/* Here 0 <= k < m < 4. */
pstate[++k] = s[0];
if (k < m)
{
pstate[++k] = s[1];
if (k < m)
pstate[++k] = s[2];
}
if (k != m)
abort ();
}
pstate[0] = m;
return (size_t)(-2);
invalid:
errno = EILSEQ;
/* The conversion state is undefined, says POSIX. */
return (size_t)(-1);
}
}
}
#else
/* Override the system's mbrtowc() function. */
# undef mbrtowc
size_t
rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
# if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG
if (s == NULL)
{
pwc = NULL;
s = "";
n = 1;
}
# endif
# if MBRTOWC_RETVAL_BUG
{
static mbstate_t internal_state;
/* Override mbrtowc's internal state. We cannot call mbsinit() on the
hidden internal state, but we can call it on our variable. */
if (ps == NULL)
ps = &internal_state;
if (!mbsinit (ps))
{
/* Parse the rest of the multibyte character byte for byte. */
size_t count = 0;
for (; n > 0; s++, n--)
{
wchar_t wc;
size_t ret = mbrtowc (&wc, s, 1, ps);
if (ret == (size_t)(-1))
return (size_t)(-1);
count++;
if (ret != (size_t)(-2))
{
/* The multibyte character has been completed. */
if (pwc != NULL)
*pwc = wc;
return (wc == 0 ? 0 : count);
}
}
return (size_t)(-2);
}
}
# endif
# if MBRTOWC_NUL_RETVAL_BUG
{
wchar_t wc;
size_t ret = mbrtowc (&wc, s, n, ps);
if (ret != (size_t)(-1) && ret != (size_t)(-2))
{
if (pwc != NULL)
*pwc = wc;
if (wc == 0)
ret = 0;
}
return ret;
}
# else
{
# if MBRTOWC_NULL_ARG1_BUG
wchar_t dummy;
if (pwc == NULL)
pwc = &dummy;
# endif
return mbrtowc (pwc, s, n, ps);
}
# endif
}
#endif

61
lib/mbsinit.c Normal file
View file

@ -0,0 +1,61 @@
/* Test for initial conversion state.
Copyright (C) 2008-2012 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2008.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include <wchar.h>
#include "verify.h"
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
/* On native Windows, 'mbstate_t' is defined as 'int'. */
int
mbsinit (const mbstate_t *ps)
{
return ps == NULL || *ps == 0;
}
#else
/* Platforms that lack mbsinit() also lack mbrlen(), mbrtowc(), mbsrtowcs()
and wcrtomb(), wcsrtombs().
We assume that
- sizeof (mbstate_t) >= 4,
- only stateless encodings are supported (such as UTF-8 and EUC-JP, but
not ISO-2022 variants),
- for each encoding, the number of bytes for a wide character is <= 4.
(This maximum is attained for UTF-8, GB18030, EUC-TW.)
We define the meaning of mbstate_t as follows:
- In mb -> wc direction, mbstate_t's first byte contains the number of
buffered bytes (in the range 0..3), followed by up to 3 buffered bytes.
- In wc -> mb direction, mbstate_t contains no information. In other
words, it is always in the initial state. */
verify (sizeof (mbstate_t) >= 4);
int
mbsinit (const mbstate_t *ps)
{
const char *pstate = (const char *)ps;
return pstate == NULL || pstate[0] == 0;
}
#endif

44
lib/mbtowc-impl.h Normal file
View file

@ -0,0 +1,44 @@
/* Convert multibyte character to wide character.
Copyright (C) 2011-2012 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2011.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* We don't need a static internal state, because the encoding is not state
dependent, and when mbrtowc returns (size_t)(-2). we throw the result
away. */
int
mbtowc (wchar_t *pwc, const char *s, size_t n)
{
if (s == NULL)
return 0;
else
{
mbstate_t state;
wchar_t wc;
size_t result;
memset (&state, 0, sizeof (mbstate_t));
result = mbrtowc (&wc, s, n, &state);
if (result == (size_t)-1 || result == (size_t)-2)
{
errno = EILSEQ;
return -1;
}
if (pwc != NULL)
*pwc = wc;
return (wc == 0 ? 0 : result);
}
}

26
lib/mbtowc.c Normal file
View file

@ -0,0 +1,26 @@
/* Convert multibyte character to wide character.
Copyright (C) 2011-2012 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2011.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <wchar.h>
#include "mbtowc-impl.h"

271
lib/nl_langinfo.c Normal file
View file

@ -0,0 +1,271 @@
/* nl_langinfo() replacement: query locale dependent information.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include <langinfo.h>
#if REPLACE_NL_LANGINFO
/* Override nl_langinfo with support for added nl_item values. */
# include <locale.h>
# include <string.h>
# undef nl_langinfo
char *
rpl_nl_langinfo (nl_item item)
{
switch (item)
{
# if GNULIB_defined_CODESET
case CODESET:
{
const char *locale;
static char buf[2 + 10 + 1];
locale = setlocale (LC_CTYPE, NULL);
if (locale != NULL && locale[0] != '\0')
{
/* If the locale name contains an encoding after the dot, return
it. */
const char *dot = strchr (locale, '.');
if (dot != NULL)
{
const char *modifier;
dot++;
/* Look for the possible @... trailer and remove it, if any. */
modifier = strchr (dot, '@');
if (modifier == NULL)
return dot;
if (modifier - dot < sizeof (buf))
{
memcpy (buf, dot, modifier - dot);
buf [modifier - dot] = '\0';
return buf;
}
}
}
return "";
}
# endif
# if GNULIB_defined_T_FMT_AMPM
case T_FMT_AMPM:
return "%I:%M:%S %p";
# endif
# if GNULIB_defined_ERA
case ERA:
/* The format is not standardized. In glibc it is a sequence of strings
of the form "direction:offset:start_date:end_date:era_name:era_format"
with an empty string at the end. */
return "";
case ERA_D_FMT:
/* The %Ex conversion in strftime behaves like %x if the locale does not
have an alternative time format. */
item = D_FMT;
break;
case ERA_D_T_FMT:
/* The %Ec conversion in strftime behaves like %c if the locale does not
have an alternative time format. */
item = D_T_FMT;
break;
case ERA_T_FMT:
/* The %EX conversion in strftime behaves like %X if the locale does not
have an alternative time format. */
item = T_FMT;
break;
case ALT_DIGITS:
/* The format is not standardized. In glibc it is a sequence of 10
strings, appended in memory. */
return "\0\0\0\0\0\0\0\0\0\0";
# endif
# if GNULIB_defined_YESEXPR || !FUNC_NL_LANGINFO_YESEXPR_WORKS
case YESEXPR:
return "^[yY]";
case NOEXPR:
return "^[nN]";
# endif
default:
break;
}
return nl_langinfo (item);
}
#else
/* Provide nl_langinfo from scratch. */
# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
/* Native Windows platforms. */
# define WIN32_LEAN_AND_MEAN /* avoid including junk */
# include <windows.h>
# include <stdio.h>
# else
/* An old Unix platform without locales, such as Linux libc5 or BeOS. */
# endif
# include <locale.h>
char *
nl_langinfo (nl_item item)
{
switch (item)
{
/* nl_langinfo items of the LC_CTYPE category */
case CODESET:
# if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
{
static char buf[2 + 10 + 1];
/* The Windows API has a function returning the locale's codepage as
a number. */
sprintf (buf, "CP%u", GetACP ());
return buf;
}
# elif defined __BEOS__
return "UTF-8";
# else
return "ISO-8859-1";
# endif
/* nl_langinfo items of the LC_NUMERIC category */
case RADIXCHAR:
return localeconv () ->decimal_point;
case THOUSEP:
return localeconv () ->thousands_sep;
/* nl_langinfo items of the LC_TIME category.
TODO: Really use the locale. */
case D_T_FMT:
case ERA_D_T_FMT:
return "%a %b %e %H:%M:%S %Y";
case D_FMT:
case ERA_D_FMT:
return "%m/%d/%y";
case T_FMT:
case ERA_T_FMT:
return "%H:%M:%S";
case T_FMT_AMPM:
return "%I:%M:%S %p";
case AM_STR:
return "AM";
case PM_STR:
return "PM";
case DAY_1:
return "Sunday";
case DAY_2:
return "Monday";
case DAY_3:
return "Tuesday";
case DAY_4:
return "Wednesday";
case DAY_5:
return "Thursday";
case DAY_6:
return "Friday";
case DAY_7:
return "Saturday";
case ABDAY_1:
return "Sun";
case ABDAY_2:
return "Mon";
case ABDAY_3:
return "Tue";
case ABDAY_4:
return "Wed";
case ABDAY_5:
return "Thu";
case ABDAY_6:
return "Fri";
case ABDAY_7:
return "Sat";
case MON_1:
return "January";
case MON_2:
return "February";
case MON_3:
return "March";
case MON_4:
return "April";
case MON_5:
return "May";
case MON_6:
return "June";
case MON_7:
return "July";
case MON_8:
return "August";
case MON_9:
return "September";
case MON_10:
return "October";
case MON_11:
return "November";
case MON_12:
return "December";
case ABMON_1:
return "Jan";
case ABMON_2:
return "Feb";
case ABMON_3:
return "Mar";
case ABMON_4:
return "Apr";
case ABMON_5:
return "May";
case ABMON_6:
return "Jun";
case ABMON_7:
return "Jul";
case ABMON_8:
return "Aug";
case ABMON_9:
return "Sep";
case ABMON_10:
return "Oct";
case ABMON_11:
return "Nov";
case ABMON_12:
return "Dec";
case ERA:
return "";
case ALT_DIGITS:
return "\0\0\0\0\0\0\0\0\0\0";
/* nl_langinfo items of the LC_MONETARY category
TODO: Really use the locale. */
case CRNCYSTR:
return "-";
/* nl_langinfo items of the LC_MESSAGES category
TODO: Really use the locale. */
case YESEXPR:
return "^[yY]";
case NOEXPR:
return "^[nN]";
default:
return "";
}
}
#endif

3876
lib/regcomp.c Normal file

File diff suppressed because it is too large Load diff

72
lib/regex.c Normal file
View file

@ -0,0 +1,72 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002-2003, 2005-2006, 2009-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Make sure noone compiles this code with a C++ compiler. */
#if defined __cplusplus && defined _LIBC
# error "This is C code, use a C compiler"
#endif
#ifdef _LIBC
/* We have to keep the namespace clean. */
# define regfree(preg) __regfree (preg)
# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
# define regerror(errcode, preg, errbuf, errbuf_size) \
__regerror(errcode, preg, errbuf, errbuf_size)
# define re_set_registers(bu, re, nu, st, en) \
__re_set_registers (bu, re, nu, st, en)
# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
# define re_match(bufp, string, size, pos, regs) \
__re_match (bufp, string, size, pos, regs)
# define re_search(bufp, string, size, startpos, range, regs) \
__re_search (bufp, string, size, startpos, range, regs)
# define re_compile_pattern(pattern, length, bufp) \
__re_compile_pattern (pattern, length, bufp)
# define re_set_syntax(syntax) __re_set_syntax (syntax)
# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
# include "../locale/localeinfo.h"
#endif
/* On some systems, limits.h sets RE_DUP_MAX to a lower value than
GNU regex allows. Include it before <regex.h>, which correctly
#undefs RE_DUP_MAX and sets it to the right value. */
#include <limits.h>
#include <strings.h>
#include <regex.h>
#include "regex_internal.h"
#include "regex_internal.c"
#include "regcomp.c"
#include "regexec.c"
/* Binary backward compatibility. */
#if _LIBC
# include <shlib-compat.h>
# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3)
link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.")
int re_max_failures = 2000;
# endif
#endif

675
lib/regex.h Normal file
View file

@ -0,0 +1,675 @@
/* Definitions for data structures and routines for the regular
expression library.
Copyright (C) 1985, 1989-1993, 1995-1998, 2000-2003, 2005-2006, 2009-2012
Free Software Foundation, Inc.
This file is part of the GNU C Library.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _REGEX_H
#define _REGEX_H 1
#include <sys/types.h>
/* Allow the use in C++ code. */
#ifdef __cplusplus
extern "C" {
#endif
/* Define __USE_GNU_REGEX to declare GNU extensions that violate the
POSIX name space rules. */
#undef __USE_GNU_REGEX
#if (defined _GNU_SOURCE \
|| (!defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE \
&& !defined _XOPEN_SOURCE))
# define __USE_GNU_REGEX 1
#endif
#ifdef _REGEX_LARGE_OFFSETS
/* Use types and values that are wide enough to represent signed and
unsigned byte offsets in memory. This currently works only when
the regex code is used outside of the GNU C library; it is not yet
supported within glibc itself, and glibc users should not define
_REGEX_LARGE_OFFSETS. */
/* The type of the offset of a byte within a string.
For historical reasons POSIX 1003.1-2004 requires that regoff_t be
at least as wide as off_t. However, many common POSIX platforms set
regoff_t to the more-sensible ssize_t and the Open Group has
signalled its intention to change the requirement to be that
regoff_t be at least as wide as ptrdiff_t and ssize_t; see XBD ERN
60 (2005-08-25). We don't know of any hosts where ssize_t or
ptrdiff_t is wider than ssize_t, so ssize_t is safe. */
typedef ssize_t regoff_t;
/* The type of nonnegative object indexes. Traditionally, GNU regex
uses 'int' for these. Code that uses __re_idx_t should work
regardless of whether the type is signed. */
typedef size_t __re_idx_t;
/* The type of object sizes. */
typedef size_t __re_size_t;
/* The type of object sizes, in places where the traditional code
uses unsigned long int. */
typedef size_t __re_long_size_t;
#else
/* Use types that are binary-compatible with the traditional GNU regex
implementation, which mishandles strings longer than INT_MAX. */
typedef int regoff_t;
typedef int __re_idx_t;
typedef unsigned int __re_size_t;
typedef unsigned long int __re_long_size_t;
#endif
/* The following two types have to be signed and unsigned integer type
wide enough to hold a value of a pointer. For most ANSI compilers
ptrdiff_t and size_t should be likely OK. Still size of these two
types is 2 for Microsoft C. Ugh... */
typedef long int s_reg_t;
typedef unsigned long int active_reg_t;
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
remains the value 0. The bits are given in alphabetical order, and
the definitions shifted by one from the previous bit; thus, when we
add or remove a bit, only one other definition need change. */
typedef unsigned long int reg_syntax_t;
#ifdef __USE_GNU_REGEX
/* If this bit is not set, then \ inside a bracket expression is literal.
If set, then such a \ quotes the following character. */
# define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1)
/* If this bit is not set, then + and ? are operators, and \+ and \? are
literals.
If set, then \+ and \? are operators and + and ? are literals. */
# define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
/* If this bit is set, then character classes are supported. They are:
[:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
[:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
If not set, then character classes are not supported. */
# define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
/* If this bit is set, then ^ and $ are always anchors (outside bracket
expressions, of course).
If this bit is not set, then it depends:
^ is an anchor if it is at the beginning of a regular
expression or after an open-group or an alternation operator;
$ is an anchor if it is at the end of a regular expression, or
before a close-group or an alternation operator.
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
POSIX draft 11.2 says that * etc. in leading positions is undefined.
We already implemented a previous draft which made those constructs
invalid, though, so we haven't changed the code back. */
# define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
/* If this bit is set, then special characters are always special
regardless of where they are in the pattern.
If this bit is not set, then special characters are special only in
some contexts; otherwise they are ordinary. Specifically,
* + ? and intervals are only special when not after the beginning,
open-group, or alternation operator. */
# define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
immediately after an alternation or begin-group operator. */
# define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
/* If this bit is set, then . matches newline.
If not set, then it doesn't. */
# define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
/* If this bit is set, then . doesn't match NUL.
If not set, then it does. */
# define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
/* If this bit is set, nonmatching lists [^...] do not match newline.
If not set, they do. */
# define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
/* If this bit is set, either \{...\} or {...} defines an
interval, depending on RE_NO_BK_BRACES.
If not set, \{, \}, {, and } are literals. */
# define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
/* If this bit is set, +, ? and | aren't recognized as operators.
If not set, they are. */
# define RE_LIMITED_OPS (RE_INTERVALS << 1)
/* If this bit is set, newline is an alternation operator.
If not set, newline is literal. */
# define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
/* If this bit is set, then '{...}' defines an interval, and \{ and \}
are literals.
If not set, then '\{...\}' defines an interval. */
# define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
/* If this bit is set, (...) defines a group, and \( and \) are literals.
If not set, \(...\) defines a group, and ( and ) are literals. */
# define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
/* If this bit is set, then \<digit> matches <digit>.
If not set, then \<digit> is a back-reference. */
# define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
/* If this bit is set, then | is an alternation operator, and \| is literal.
If not set, then \| is an alternation operator, and | is literal. */
# define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
/* If this bit is set, then an ending range point collating higher
than the starting range point, as in [z-a], is invalid.
If not set, then when ending range point collates higher than the
starting range point, the range is ignored. */
# define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
/* If this bit is set, then an unmatched ) is ordinary.
If not set, then an unmatched ) is invalid. */
# define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
/* If this bit is set, succeed as soon as we match the whole pattern,
without further backtracking. */
# define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1)
/* If this bit is set, do not process the GNU regex operators.
If not set, then the GNU regex operators are recognized. */
# define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
/* If this bit is set, turn on internal regex debugging.
If not set, and debugging was on, turn it off.
This only works if regex.c is compiled -DDEBUG.
We define this bit always, so that all that's needed to turn on
debugging is to recompile regex.c; the calling code can always have
this bit set, and it won't affect anything in the normal case. */
# define RE_DEBUG (RE_NO_GNU_OPS << 1)
/* If this bit is set, a syntactically invalid interval is treated as
a string of ordinary characters. For example, the ERE 'a{1' is
treated as 'a\{1'. */
# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
# define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
for ^, because it is difficult to scan the regex backwards to find
whether ^ should be special. */
# define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
/* If this bit is set, then \{ cannot be first in a regex or
immediately after an alternation, open-group or \} operator. */
# define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1)
/* If this bit is set, then no_sub will be set to 1 during
re_compile_pattern. */
# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
#endif /* defined __USE_GNU_REGEX */
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
extern reg_syntax_t re_syntax_options;
#ifdef __USE_GNU_REGEX
/* Define combinations of the above bits for the standard possibilities.
(The [[[ comments delimit what gets put into the Texinfo file, so
don't delete them!) */
/* [[[begin syntaxes]]] */
# define RE_SYNTAX_EMACS 0
# define RE_SYNTAX_AWK \
(RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
| RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
# define RE_SYNTAX_GNU_AWK \
((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \
& ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \
| RE_CONTEXT_INVALID_OPS ))
# define RE_SYNTAX_POSIX_AWK \
(RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
| RE_INTERVALS | RE_NO_GNU_OPS)
# define RE_SYNTAX_GREP \
(RE_BK_PLUS_QM | RE_CHAR_CLASSES \
| RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
| RE_NEWLINE_ALT)
# define RE_SYNTAX_EGREP \
(RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
| RE_NEWLINE_ALT | RE_NO_BK_PARENS \
| RE_NO_BK_VBAR)
# define RE_SYNTAX_POSIX_EGREP \
(RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \
| RE_INVALID_INTERVAL_ORD)
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
# define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
# define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
/* Syntax bits common to both basic and extended POSIX regex syntax. */
# define _RE_SYNTAX_POSIX_COMMON \
(RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
| RE_INTERVALS | RE_NO_EMPTY_RANGES)
# define RE_SYNTAX_POSIX_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP)
/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
isn't minimal, since other operators, such as \`, aren't disabled. */
# define RE_SYNTAX_POSIX_MINIMAL_BASIC \
(_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
# define RE_SYNTAX_POSIX_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is
removed and RE_NO_BK_REFS is added. */
# define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
(_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
| RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
| RE_NO_BK_PARENS | RE_NO_BK_REFS \
| RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
/* [[[end syntaxes]]] */
#endif /* defined __USE_GNU_REGEX */
#ifdef __USE_GNU_REGEX
/* Maximum number of duplicates an interval can allow. POSIX-conforming
systems might define this in <limits.h>, but we want our
value, so remove any previous define. */
# ifdef RE_DUP_MAX
# undef RE_DUP_MAX
# endif
/* RE_DUP_MAX is 2**15 - 1 because an earlier implementation stored
the counter as a 2-byte signed integer. This is no longer true, so
RE_DUP_MAX could be increased to (INT_MAX / 10 - 1), or to
((SIZE_MAX - 2) / 10 - 1) if _REGEX_LARGE_OFFSETS is defined.
However, there would be a huge performance problem if someone
actually used a pattern like a\{214748363\}, so RE_DUP_MAX retains
its historical value. */
# define RE_DUP_MAX (0x7fff)
#endif /* defined __USE_GNU_REGEX */
/* POSIX 'cflags' bits (i.e., information for 'regcomp'). */
/* If this bit is set, then use extended regular expression syntax.
If not set, then use basic regular expression syntax. */
#define REG_EXTENDED 1
/* If this bit is set, then ignore case when matching.
If not set, then case is significant. */
#define REG_ICASE (1 << 1)
/* If this bit is set, then anchors do not match at newline
characters in the string.
If not set, then anchors do match at newlines. */
#define REG_NEWLINE (1 << 2)
/* If this bit is set, then report only success or fail in regexec.
If not set, then returns differ between not matching and errors. */
#define REG_NOSUB (1 << 3)
/* POSIX 'eflags' bits (i.e., information for regexec). */
/* If this bit is set, then the beginning-of-line operator doesn't match
the beginning of the string (presumably because it's not the
beginning of a line).
If not set, then the beginning-of-line operator does match the
beginning of the string. */
#define REG_NOTBOL 1
/* Like REG_NOTBOL, except for the end-of-line. */
#define REG_NOTEOL (1 << 1)
/* Use PMATCH[0] to delimit the start and end of the search in the
buffer. */
#define REG_STARTEND (1 << 2)
/* If any error codes are removed, changed, or added, update the
'__re_error_msgid' table in regcomp.c. */
typedef enum
{
_REG_ENOSYS = -1, /* This will never happen for this implementation. */
_REG_NOERROR = 0, /* Success. */
_REG_NOMATCH, /* Didn't find a match (for regexec). */
/* POSIX regcomp return error codes. (In the order listed in the
standard.) */
_REG_BADPAT, /* Invalid pattern. */
_REG_ECOLLATE, /* Invalid collating element. */
_REG_ECTYPE, /* Invalid character class name. */
_REG_EESCAPE, /* Trailing backslash. */
_REG_ESUBREG, /* Invalid back reference. */
_REG_EBRACK, /* Unmatched left bracket. */
_REG_EPAREN, /* Parenthesis imbalance. */
_REG_EBRACE, /* Unmatched \{. */
_REG_BADBR, /* Invalid contents of \{\}. */
_REG_ERANGE, /* Invalid range end. */
_REG_ESPACE, /* Ran out of memory. */
_REG_BADRPT, /* No preceding re for repetition op. */
/* Error codes we've added. */
_REG_EEND, /* Premature end. */
_REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
_REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
} reg_errcode_t;
#ifdef _XOPEN_SOURCE
# define REG_ENOSYS _REG_ENOSYS
#endif
#define REG_NOERROR _REG_NOERROR
#define REG_NOMATCH _REG_NOMATCH
#define REG_BADPAT _REG_BADPAT
#define REG_ECOLLATE _REG_ECOLLATE
#define REG_ECTYPE _REG_ECTYPE
#define REG_EESCAPE _REG_EESCAPE
#define REG_ESUBREG _REG_ESUBREG
#define REG_EBRACK _REG_EBRACK
#define REG_EPAREN _REG_EPAREN
#define REG_EBRACE _REG_EBRACE
#define REG_BADBR _REG_BADBR
#define REG_ERANGE _REG_ERANGE
#define REG_ESPACE _REG_ESPACE
#define REG_BADRPT _REG_BADRPT
#define REG_EEND _REG_EEND
#define REG_ESIZE _REG_ESIZE
#define REG_ERPAREN _REG_ERPAREN
/* struct re_pattern_buffer normally uses member names like 'buffer'
that POSIX does not allow. In POSIX mode these members have names
with leading 're_' (e.g., 're_buffer'). */
#ifdef __USE_GNU_REGEX
# define _REG_RE_NAME(id) id
# define _REG_RM_NAME(id) id
#else
# define _REG_RE_NAME(id) re_##id
# define _REG_RM_NAME(id) rm_##id
#endif
/* The user can specify the type of the re_translate member by
defining the macro RE_TRANSLATE_TYPE, which defaults to unsigned
char *. This pollutes the POSIX name space, so in POSIX mode just
use unsigned char *. */
#ifdef __USE_GNU_REGEX
# ifndef RE_TRANSLATE_TYPE
# define RE_TRANSLATE_TYPE unsigned char *
# endif
# define REG_TRANSLATE_TYPE RE_TRANSLATE_TYPE
#else
# define REG_TRANSLATE_TYPE unsigned char *
#endif
/* This data structure represents a compiled pattern. Before calling
the pattern compiler, the fields 'buffer', 'allocated', 'fastmap',
'translate', and 'no_sub' can be set. After the pattern has been
compiled, the 're_nsub' field is available. All other fields are
private to the regex routines. */
struct re_pattern_buffer
{
/* Space that holds the compiled pattern. It is declared as
'unsigned char *' because its elements are sometimes used as
array indexes. */
unsigned char *_REG_RE_NAME (buffer);
/* Number of bytes to which 'buffer' points. */
__re_long_size_t _REG_RE_NAME (allocated);
/* Number of bytes actually used in 'buffer'. */
__re_long_size_t _REG_RE_NAME (used);
/* Syntax setting with which the pattern was compiled. */
reg_syntax_t _REG_RE_NAME (syntax);
/* Pointer to a fastmap, if any, otherwise zero. re_search uses the
fastmap, if there is one, to skip over impossible starting points
for matches. */
char *_REG_RE_NAME (fastmap);
/* Either a translate table to apply to all characters before
comparing them, or zero for no translation. The translation is
applied to a pattern when it is compiled and to a string when it
is matched. */
REG_TRANSLATE_TYPE _REG_RE_NAME (translate);
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
/* Zero if this pattern cannot match the empty string, one else.
Well, in truth it's used only in 're_search_2', to see whether or
not we should use the fastmap, so we don't set this absolutely
perfectly; see 're_compile_fastmap' (the "duplicate" case). */
unsigned int _REG_RE_NAME (can_be_null) : 1;
/* If REGS_UNALLOCATED, allocate space in the 'regs' structure
for 'max (RE_NREGS, re_nsub + 1)' groups.
If REGS_REALLOCATE, reallocate space if necessary.
If REGS_FIXED, use what's there. */
#ifdef __USE_GNU_REGEX
# define REGS_UNALLOCATED 0
# define REGS_REALLOCATE 1
# define REGS_FIXED 2
#endif
unsigned int _REG_RE_NAME (regs_allocated) : 2;
/* Set to zero when 're_compile_pattern' compiles a pattern; set to
one by 're_compile_fastmap' if it updates the fastmap. */
unsigned int _REG_RE_NAME (fastmap_accurate) : 1;
/* If set, 're_match_2' does not return information about
subexpressions. */
unsigned int _REG_RE_NAME (no_sub) : 1;
/* If set, a beginning-of-line anchor doesn't match at the beginning
of the string. */
unsigned int _REG_RE_NAME (not_bol) : 1;
/* Similarly for an end-of-line anchor. */
unsigned int _REG_RE_NAME (not_eol) : 1;
/* If true, an anchor at a newline matches. */
unsigned int _REG_RE_NAME (newline_anchor) : 1;
/* [[[end pattern_buffer]]] */
};
typedef struct re_pattern_buffer regex_t;
/* This is the structure we store register match data in. See
regex.texinfo for a full description of what registers match. */
struct re_registers
{
__re_size_t _REG_RM_NAME (num_regs);
regoff_t *_REG_RM_NAME (start);
regoff_t *_REG_RM_NAME (end);
};
/* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
're_match_2' returns information about at least this many registers
the first time a 'regs' structure is passed. */
#if !defined RE_NREGS && defined __USE_GNU_REGEX
# define RE_NREGS 30
#endif
/* POSIX specification for registers. Aside from the different names than
're_registers', POSIX uses an array of structures, instead of a
structure of arrays. */
typedef struct
{
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
} regmatch_t;
/* Declarations for routines. */
/* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the 're_syntax_options' variable. */
extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
/* Compile the regular expression PATTERN, with length LENGTH
and syntax given by the global 're_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
extern const char *re_compile_pattern (const char *__pattern, size_t __length,
struct re_pattern_buffer *__buffer);
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
extern int re_compile_fastmap (struct re_pattern_buffer *__buffer);
/* Search in the string STRING (with length LENGTH) for the pattern
compiled into BUFFER. Start searching at position START, for RANGE
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
extern regoff_t re_search (struct re_pattern_buffer *__buffer,
const char *__string, __re_idx_t __length,
__re_idx_t __start, regoff_t __range,
struct re_registers *__regs);
/* Like 're_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
const char *__string1, __re_idx_t __length1,
const char *__string2, __re_idx_t __length2,
__re_idx_t __start, regoff_t __range,
struct re_registers *__regs,
__re_idx_t __stop);
/* Like 're_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
extern regoff_t re_match (struct re_pattern_buffer *__buffer,
const char *__string, __re_idx_t __length,
__re_idx_t __start, struct re_registers *__regs);
/* Relates to 're_match' as 're_search_2' relates to 're_search'. */
extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
const char *__string1, __re_idx_t __length1,
const char *__string2, __re_idx_t __length2,
__re_idx_t __start, struct re_registers *__regs,
__re_idx_t __stop);
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
ENDS. Subsequent matches using BUFFER and REGS will use this memory
for recording register information. STARTS and ENDS must be
allocated with malloc, and must each be at least 'NUM_REGS * sizeof
(regoff_t)' bytes long.
If NUM_REGS == 0, then subsequent matches should allocate their own
register data.
Unless this function is called, the first search or match using
BUFFER will allocate its own register data, without freeing the old
data. */
extern void re_set_registers (struct re_pattern_buffer *__buffer,
struct re_registers *__regs,
__re_size_t __num_regs,
regoff_t *__starts, regoff_t *__ends);
#if defined _REGEX_RE_COMP || defined _LIBC
# ifndef _CRAY
/* 4.2 bsd compatibility. */
extern char *re_comp (const char *);
extern int re_exec (const char *);
# endif
#endif
/* GCC 2.95 and later have "__restrict"; C99 compilers have
"restrict", and "configure" may have defined "restrict".
Other compilers use __restrict, __restrict__, and _Restrict, and
'configure' might #define 'restrict' to those words, so pick a
different name. */
#ifndef _Restrict_
# if 199901L <= __STDC_VERSION__
# define _Restrict_ restrict
# elif 2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)
# define _Restrict_ __restrict
# else
# define _Restrict_
# endif
#endif
/* gcc 3.1 and up support the [restrict] syntax. Don't trust
sys/cdefs.h's definition of __restrict_arr, though, as it
mishandles gcc -ansi -pedantic. */
#ifndef _Restrict_arr_
# if ((199901L <= __STDC_VERSION__ \
|| ((3 < __GNUC__ || (3 == __GNUC__ && 1 <= __GNUC_MINOR__)) \
&& !defined __STRICT_ANSI__)) \
&& !defined __GNUG__)
# define _Restrict_arr_ _Restrict_
# else
# define _Restrict_arr_
# endif
#endif
/* POSIX compatibility. */
extern int regcomp (regex_t *_Restrict_ __preg,
const char *_Restrict_ __pattern,
int __cflags);
extern int regexec (const regex_t *_Restrict_ __preg,
const char *_Restrict_ __string, size_t __nmatch,
regmatch_t __pmatch[_Restrict_arr_],
int __eflags);
extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
char *_Restrict_ __errbuf, size_t __errbuf_size);
extern void regfree (regex_t *__preg);
#ifdef __cplusplus
}
#endif /* C++ */
#endif /* regex.h */

1741
lib/regex_internal.c Normal file

File diff suppressed because it is too large Load diff

866
lib/regex_internal.h Normal file
View file

@ -0,0 +1,866 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002-2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _REGEX_INTERNAL_H
#define _REGEX_INTERNAL_H 1
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <langinfo.h>
#ifndef _LIBC
# include "localcharset.h"
#endif
#include <locale.h>
#include <wchar.h>
#include <wctype.h>
#include <stdint.h>
#if defined _LIBC
# include <bits/libc-lock.h>
#else
# define __libc_lock_init(NAME) do { } while (0)
# define __libc_lock_lock(NAME) do { } while (0)
# define __libc_lock_unlock(NAME) do { } while (0)
#endif
/* In case that the system doesn't have isblank(). */
#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK))
# define isblank(ch) ((ch) == ' ' || (ch) == '\t')
#endif
#ifdef _LIBC
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS
# define _RE_DEFINE_LOCALE_FUNCTIONS 1
# include <locale/localeinfo.h>
# include <locale/elem-hash.h>
# include <locale/coll-lookup.h>
# endif
#endif
/* This is for other GNU distributions with internationalized messages. */
#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
# include <libintl.h>
# ifdef _LIBC
# undef gettext
# define gettext(msgid) \
INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
# endif
#else
# define gettext(msgid) (msgid)
#endif
#ifndef gettext_noop
/* This define is so xgettext can find the internationalizable
strings. */
# define gettext_noop(String) String
#endif
/* For loser systems without the definition. */
#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif
#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE && HAVE_WCSCOLL) || _LIBC
# define RE_ENABLE_I18N
#endif
#if __GNUC__ >= 3
# define BE(expr, val) __builtin_expect (expr, val)
#else
# define BE(expr, val) (expr)
# ifdef _LIBC
# define inline
# endif
#endif
/* Number of ASCII characters. */
#define ASCII_CHARS 0x80
/* Number of single byte characters. */
#define SBC_MAX (UCHAR_MAX + 1)
#define COLL_ELEM_LEN_MAX 8
/* The character which represents newline. */
#define NEWLINE_CHAR '\n'
#define WIDE_NEWLINE_CHAR L'\n'
/* Rename to standard API for using out of glibc. */
#ifndef _LIBC
# define __wctype wctype
# define __iswctype iswctype
# define __btowc btowc
# define __wcrtomb wcrtomb
# define __mbrtowc mbrtowc
# define __regfree regfree
# define attribute_hidden
#endif /* not _LIBC */
#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
# define __attribute(arg) __attribute__ (arg)
#else
# define __attribute(arg)
#endif
typedef __re_idx_t Idx;
/* Special return value for failure to match. */
#define REG_MISSING ((Idx) -1)
/* Special return value for internal error. */
#define REG_ERROR ((Idx) -2)
/* Test whether N is a valid index, and is not one of the above. */
#ifdef _REGEX_LARGE_OFFSETS
# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR)
#else
# define REG_VALID_INDEX(n) (0 <= (n))
#endif
/* Test whether N is a valid nonzero index. */
#ifdef _REGEX_LARGE_OFFSETS
# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1))
#else
# define REG_VALID_NONZERO_INDEX(n) (0 < (n))
#endif
/* A hash value, suitable for computing hash tables. */
typedef __re_size_t re_hashval_t;
/* An integer used to represent a set of bits. It must be unsigned,
and must be at least as wide as unsigned int. */
typedef unsigned long int bitset_word_t;
/* All bits set in a bitset_word_t. */
#define BITSET_WORD_MAX ULONG_MAX
/* Number of bits in a bitset_word_t. For portability to hosts with
padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)';
instead, deduce it directly from BITSET_WORD_MAX. Avoid
greater-than-32-bit integers and unconditional shifts by more than
31 bits, as they're not portable. */
#if BITSET_WORD_MAX == 0xffffffffUL
# define BITSET_WORD_BITS 32
#elif BITSET_WORD_MAX >> 31 >> 4 == 1
# define BITSET_WORD_BITS 36
#elif BITSET_WORD_MAX >> 31 >> 16 == 1
# define BITSET_WORD_BITS 48
#elif BITSET_WORD_MAX >> 31 >> 28 == 1
# define BITSET_WORD_BITS 60
#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1
# define BITSET_WORD_BITS 64
#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1
# define BITSET_WORD_BITS 72
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1
# define BITSET_WORD_BITS 128
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1
# define BITSET_WORD_BITS 256
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1
# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */
# if BITSET_WORD_BITS <= SBC_MAX
# error "Invalid SBC_MAX"
# endif
#else
# error "Add case for new bitset_word_t size"
#endif
/* Number of bitset_word_t values in a bitset_t. */
#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS)
typedef bitset_word_t bitset_t[BITSET_WORDS];
typedef bitset_word_t *re_bitset_ptr_t;
typedef const bitset_word_t *re_const_bitset_ptr_t;
#define PREV_WORD_CONSTRAINT 0x0001
#define PREV_NOTWORD_CONSTRAINT 0x0002
#define NEXT_WORD_CONSTRAINT 0x0004
#define NEXT_NOTWORD_CONSTRAINT 0x0008
#define PREV_NEWLINE_CONSTRAINT 0x0010
#define NEXT_NEWLINE_CONSTRAINT 0x0020
#define PREV_BEGBUF_CONSTRAINT 0x0040
#define NEXT_ENDBUF_CONSTRAINT 0x0080
#define WORD_DELIM_CONSTRAINT 0x0100
#define NOT_WORD_DELIM_CONSTRAINT 0x0200
typedef enum
{
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
WORD_DELIM = WORD_DELIM_CONSTRAINT,
NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
} re_context_type;
typedef struct
{
Idx alloc;
Idx nelem;
Idx *elems;
} re_node_set;
typedef enum
{
NON_TYPE = 0,
/* Node type, These are used by token, node, tree. */
CHARACTER = 1,
END_OF_RE = 2,
SIMPLE_BRACKET = 3,
OP_BACK_REF = 4,
OP_PERIOD = 5,
#ifdef RE_ENABLE_I18N
COMPLEX_BRACKET = 6,
OP_UTF8_PERIOD = 7,
#endif /* RE_ENABLE_I18N */
/* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used
when the debugger shows values of this enum type. */
#define EPSILON_BIT 8
OP_OPEN_SUBEXP = EPSILON_BIT | 0,
OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
OP_ALT = EPSILON_BIT | 2,
OP_DUP_ASTERISK = EPSILON_BIT | 3,
ANCHOR = EPSILON_BIT | 4,
/* Tree type, these are used only by tree. */
CONCAT = 16,
SUBEXP = 17,
/* Token type, these are used only by token. */
OP_DUP_PLUS = 18,
OP_DUP_QUESTION,
OP_OPEN_BRACKET,
OP_CLOSE_BRACKET,
OP_CHARSET_RANGE,
OP_OPEN_DUP_NUM,
OP_CLOSE_DUP_NUM,
OP_NON_MATCH_LIST,
OP_OPEN_COLL_ELEM,
OP_CLOSE_COLL_ELEM,
OP_OPEN_EQUIV_CLASS,
OP_CLOSE_EQUIV_CLASS,
OP_OPEN_CHAR_CLASS,
OP_CLOSE_CHAR_CLASS,
OP_WORD,
OP_NOTWORD,
OP_SPACE,
OP_NOTSPACE,
BACK_SLASH
} re_token_type_t;
#ifdef RE_ENABLE_I18N
typedef struct
{
/* Multibyte characters. */
wchar_t *mbchars;
/* Collating symbols. */
# ifdef _LIBC
int32_t *coll_syms;
# endif
/* Equivalence classes. */
# ifdef _LIBC
int32_t *equiv_classes;
# endif
/* Range expressions. */
# ifdef _LIBC
uint32_t *range_starts;
uint32_t *range_ends;
# else /* not _LIBC */
wchar_t *range_starts;
wchar_t *range_ends;
# endif /* not _LIBC */
/* Character classes. */
wctype_t *char_classes;
/* If this character set is the non-matching list. */
unsigned int non_match : 1;
/* # of multibyte characters. */
Idx nmbchars;
/* # of collating symbols. */
Idx ncoll_syms;
/* # of equivalence classes. */
Idx nequiv_classes;
/* # of range expressions. */
Idx nranges;
/* # of character classes. */
Idx nchar_classes;
} re_charset_t;
#endif /* RE_ENABLE_I18N */
typedef struct
{
union
{
unsigned char c; /* for CHARACTER */
re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */
#ifdef RE_ENABLE_I18N
re_charset_t *mbcset; /* for COMPLEX_BRACKET */
#endif /* RE_ENABLE_I18N */
Idx idx; /* for BACK_REF */
re_context_type ctx_type; /* for ANCHOR */
} opr;
#if __GNUC__ >= 2 && !defined __STRICT_ANSI__
re_token_type_t type : 8;
#else
re_token_type_t type;
#endif
unsigned int constraint : 10; /* context constraint */
unsigned int duplicated : 1;
unsigned int opt_subexp : 1;
#ifdef RE_ENABLE_I18N
unsigned int accept_mb : 1;
/* These 2 bits can be moved into the union if needed (e.g. if running out
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
unsigned int mb_partial : 1;
#endif
unsigned int word_char : 1;
} re_token_t;
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
struct re_string_t
{
/* Indicate the raw buffer which is the original string passed as an
argument of regexec(), re_search(), etc.. */
const unsigned char *raw_mbs;
/* Store the multibyte string. In case of "case insensitive mode" like
REG_ICASE, upper cases of the string are stored, otherwise MBS points
the same address that RAW_MBS points. */
unsigned char *mbs;
#ifdef RE_ENABLE_I18N
/* Store the wide character string which is corresponding to MBS. */
wint_t *wcs;
Idx *offsets;
mbstate_t cur_state;
#endif
/* Index in RAW_MBS. Each character mbs[i] corresponds to
raw_mbs[raw_mbs_idx + i]. */
Idx raw_mbs_idx;
/* The length of the valid characters in the buffers. */
Idx valid_len;
/* The corresponding number of bytes in raw_mbs array. */
Idx valid_raw_len;
/* The length of the buffers MBS and WCS. */
Idx bufs_len;
/* The index in MBS, which is updated by re_string_fetch_byte. */
Idx cur_idx;
/* length of RAW_MBS array. */
Idx raw_len;
/* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */
Idx len;
/* End of the buffer may be shorter than its length in the cases such
as re_match_2, re_search_2. Then, we use STOP for end of the buffer
instead of LEN. */
Idx raw_stop;
/* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */
Idx stop;
/* The context of mbs[0]. We store the context independently, since
the context of mbs[0] may be different from raw_mbs[0], which is
the beginning of the input string. */
unsigned int tip_context;
/* The translation passed as a part of an argument of re_compile_pattern. */
RE_TRANSLATE_TYPE trans;
/* Copy of re_dfa_t's word_char. */
re_const_bitset_ptr_t word_char;
/* true if REG_ICASE. */
unsigned char icase;
unsigned char is_utf8;
unsigned char map_notascii;
unsigned char mbs_allocated;
unsigned char offsets_needed;
unsigned char newline_anchor;
unsigned char word_ops_used;
int mb_cur_max;
};
typedef struct re_string_t re_string_t;
struct re_dfa_t;
typedef struct re_dfa_t re_dfa_t;
#ifndef _LIBC
# define internal_function
#endif
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
Idx new_buf_len)
internal_function;
#ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr) internal_function;
static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr)
internal_function;
#endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr) internal_function;
static void re_string_translate_buffer (re_string_t *pstr) internal_function;
static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
int eflags)
internal_function __attribute ((pure));
#define re_string_peek_byte(pstr, offset) \
((pstr)->mbs[(pstr)->cur_idx + offset])
#define re_string_fetch_byte(pstr) \
((pstr)->mbs[(pstr)->cur_idx++])
#define re_string_first_byte(pstr, idx) \
((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF)
#define re_string_is_single_byte_char(pstr, idx) \
((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \
|| (pstr)->wcs[(idx) + 1] != WEOF))
#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx)
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
#define re_string_get_buffer(pstr) ((pstr)->mbs)
#define re_string_length(pstr) ((pstr)->len)
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
#include <alloca.h>
#ifndef _LIBC
# if HAVE_ALLOCA
/* The OS usually guarantees only one guard page at the bottom of the stack,
and a page size can be as small as 4096 bytes. So we cannot safely
allocate anything larger than 4096 bytes. Also care for the possibility
of a few compiler-allocated temporary stack slots. */
# define __libc_use_alloca(n) ((n) < 4032)
# else
/* alloca is implemented with malloc, so just use malloc. */
# define __libc_use_alloca(n) 0
# undef alloca
# define alloca(n) malloc (n)
# endif
#endif
#ifndef MAX
# define MAX(a,b) ((a) < (b) ? (b) : (a))
#endif
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
#define re_free(p) free (p)
struct bin_tree_t
{
struct bin_tree_t *parent;
struct bin_tree_t *left;
struct bin_tree_t *right;
struct bin_tree_t *first;
struct bin_tree_t *next;
re_token_t token;
/* 'node_idx' is the index in dfa->nodes, if 'type' == 0.
Otherwise 'type' indicate the type of this node. */
Idx node_idx;
};
typedef struct bin_tree_t bin_tree_t;
#define BIN_TREE_STORAGE_SIZE \
((1024 - sizeof (void *)) / sizeof (bin_tree_t))
struct bin_tree_storage_t
{
struct bin_tree_storage_t *next;
bin_tree_t data[BIN_TREE_STORAGE_SIZE];
};
typedef struct bin_tree_storage_t bin_tree_storage_t;
#define CONTEXT_WORD 1
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1)
#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1)
#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD)
#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE)
#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF)
#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF)
#define IS_ORDINARY_CONTEXT(c) ((c) == 0)
#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_')
#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR)
#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_')
#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR)
#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \
((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
|| ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
|| ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\
|| ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context)))
#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \
((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \
|| (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \
|| (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \
|| (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context)))
struct re_dfastate_t
{
re_hashval_t hash;
re_node_set nodes;
re_node_set non_eps_nodes;
re_node_set inveclosure;
re_node_set *entrance_nodes;
struct re_dfastate_t **trtable, **word_trtable;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept "multi byte".
Note that we refer to multibyte characters, and multi character
collating elements as "multi byte". */
unsigned int accept_mb : 1;
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
unsigned int has_constraint : 1;
};
typedef struct re_dfastate_t re_dfastate_t;
struct re_state_table_entry
{
Idx num;
Idx alloc;
re_dfastate_t **array;
};
/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */
typedef struct
{
Idx next_idx;
Idx alloc;
re_dfastate_t **array;
} state_array_t;
/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */
typedef struct
{
Idx node;
Idx str_idx; /* The position NODE match at. */
state_array_t path;
} re_sub_match_last_t;
/* Store information about the node NODE whose type is OP_OPEN_SUBEXP.
And information about the node, whose type is OP_CLOSE_SUBEXP,
corresponding to NODE is stored in LASTS. */
typedef struct
{
Idx str_idx;
Idx node;
state_array_t *path;
Idx alasts; /* Allocation size of LASTS. */
Idx nlasts; /* The number of LASTS. */
re_sub_match_last_t **lasts;
} re_sub_match_top_t;
struct re_backref_cache_entry
{
Idx node;
Idx str_idx;
Idx subexp_from;
Idx subexp_to;
char more;
char unused;
unsigned short int eps_reachable_subexps_map;
};
typedef struct
{
/* The string object corresponding to the input string. */
re_string_t input;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
const re_dfa_t *const dfa;
#else
const re_dfa_t *dfa;
#endif
/* EFLAGS of the argument of regexec. */
int eflags;
/* Where the matching ends. */
Idx match_last;
Idx last_node;
/* The state log used by the matcher. */
re_dfastate_t **state_log;
Idx state_log_top;
/* Back reference cache. */
Idx nbkref_ents;
Idx abkref_ents;
struct re_backref_cache_entry *bkref_ents;
int max_mb_elem_len;
Idx nsub_tops;
Idx asub_tops;
re_sub_match_top_t **sub_tops;
} re_match_context_t;
typedef struct
{
re_dfastate_t **sifted_states;
re_dfastate_t **limited_states;
Idx last_node;
Idx last_str_idx;
re_node_set limits;
} re_sift_context_t;
struct re_fail_stack_ent_t
{
Idx idx;
Idx node;
regmatch_t *regs;
re_node_set eps_via_nodes;
};
struct re_fail_stack_t
{
Idx num;
Idx alloc;
struct re_fail_stack_ent_t *stack;
};
struct re_dfa_t
{
re_token_t *nodes;
size_t nodes_alloc;
size_t nodes_len;
Idx *nexts;
Idx *org_indices;
re_node_set *edests;
re_node_set *eclosures;
re_node_set *inveclosures;
struct re_state_table_entry *state_table;
re_dfastate_t *init_state;
re_dfastate_t *init_state_word;
re_dfastate_t *init_state_nl;
re_dfastate_t *init_state_begbuf;
bin_tree_t *str_tree;
bin_tree_storage_t *str_tree_storage;
re_bitset_ptr_t sb_char;
int str_tree_storage_idx;
/* number of subexpressions 're_nsub' is in regex_t. */
re_hashval_t state_hash_mask;
Idx init_node;
Idx nbackref; /* The number of backreference in this dfa. */
/* Bitmap expressing which backreference is used. */
bitset_word_t used_bkref_map;
bitset_word_t completed_bkref_map;
unsigned int has_plural_match : 1;
/* If this dfa has "multibyte node", which is a backreference or
a node which can accept multibyte character or multi character
collating element. */
unsigned int has_mb_node : 1;
unsigned int is_utf8 : 1;
unsigned int map_notascii : 1;
unsigned int word_ops_used : 1;
int mb_cur_max;
bitset_t word_char;
reg_syntax_t syntax;
Idx *subexp_map;
#ifdef DEBUG
char* re_str;
#endif
#ifdef _LIBC
__libc_lock_define (, lock)
#endif
};
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
#define re_node_set_remove(set,id) \
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
typedef enum
{
SB_CHAR,
MB_CHAR,
EQUIV_CLASS,
COLL_SYM,
CHAR_CLASS
} bracket_elem_type;
typedef struct
{
bracket_elem_type type;
union
{
unsigned char ch;
unsigned char *name;
wchar_t wch;
} opr;
} bracket_elem_t;
/* Inline functions for bitset_t operation. */
static inline void
bitset_set (bitset_t set, Idx i)
{
set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS;
}
static inline void
bitset_clear (bitset_t set, Idx i)
{
set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS);
}
static inline bool
bitset_contain (const bitset_t set, Idx i)
{
return (set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1;
}
static inline void
bitset_empty (bitset_t set)
{
memset (set, '\0', sizeof (bitset_t));
}
static inline void
bitset_set_all (bitset_t set)
{
memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS));
if (SBC_MAX % BITSET_WORD_BITS != 0)
set[BITSET_WORDS - 1] =
((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1;
}
static inline void
bitset_copy (bitset_t dest, const bitset_t src)
{
memcpy (dest, src, sizeof (bitset_t));
}
static inline void
bitset_not (bitset_t set)
{
int bitset_i;
for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i)
set[bitset_i] = ~set[bitset_i];
if (SBC_MAX % BITSET_WORD_BITS != 0)
set[BITSET_WORDS - 1] =
((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1)
& ~set[BITSET_WORDS - 1]);
}
static inline void
bitset_merge (bitset_t dest, const bitset_t src)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
dest[bitset_i] |= src[bitset_i];
}
static inline void
bitset_mask (bitset_t dest, const bitset_t src)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
dest[bitset_i] &= src[bitset_i];
}
#ifdef RE_ENABLE_I18N
/* Inline functions for re_string. */
static inline int
internal_function __attribute ((pure))
re_string_char_size_at (const re_string_t *pstr, Idx idx)
{
int byte_idx;
if (pstr->mb_cur_max == 1)
return 1;
for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx)
if (pstr->wcs[idx + byte_idx] != WEOF)
break;
return byte_idx;
}
static inline wint_t
internal_function __attribute ((pure))
re_string_wchar_at (const re_string_t *pstr, Idx idx)
{
if (pstr->mb_cur_max == 1)
return (wint_t) pstr->mbs[idx];
return (wint_t) pstr->wcs[idx];
}
static int
internal_function __attribute ((pure))
re_string_elem_size_at (const re_string_t *pstr, Idx idx)
{
# ifdef _LIBC
const unsigned char *p, *extra;
const int32_t *table, *indirect;
int32_t tmp;
# include <locale/weight.h>
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
{
table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
extra = (const unsigned char *)
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
_NL_COLLATE_INDIRECTMB);
p = pstr->mbs + idx;
tmp = findidx (&p);
return p - pstr->mbs - idx;
}
else
# endif /* _LIBC */
return 1;
}
#endif /* RE_ENABLE_I18N */
#ifndef __GNUC_PREREQ
# if defined __GNUC__ && defined __GNUC_MINOR__
# define __GNUC_PREREQ(maj, min) \
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
# else
# define __GNUC_PREREQ(maj, min) 0
# endif
#endif
#if __GNUC_PREREQ (3,4)
# undef __attribute_warn_unused_result__
# define __attribute_warn_unused_result__ \
__attribute__ ((__warn_unused_result__))
#else
# define __attribute_warn_unused_result__ /* empty */
#endif
#endif /* _REGEX_INTERNAL_H */

4417
lib/regexec.c Normal file

File diff suppressed because it is too large Load diff

63
lib/strcasecmp.c Normal file
View file

@ -0,0 +1,63 @@
/* Case-insensitive string comparison function.
Copyright (C) 1998-1999, 2005-2007, 2009-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include <string.h>
#include <ctype.h>
#include <limits.h>
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater
than S2.
Note: This function does not work with multibyte strings! */
int
strcasecmp (const char *s1, const char *s2)
{
const unsigned char *p1 = (const unsigned char *) s1;
const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
if (p1 == p2)
return 0;
do
{
c1 = TOLOWER (*p1);
c2 = TOLOWER (*p2);
if (c1 == '\0')
break;
++p1;
++p2;
}
while (c1 == c2);
if (UCHAR_MAX <= INT_MAX)
return c1 - c2;
else
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
}

176
lib/streq.h Normal file
View file

@ -0,0 +1,176 @@
/* Optimized string comparison.
Copyright (C) 2001-2002, 2007, 2009-2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Bruno Haible <bruno@clisp.org>. */
#ifndef _GL_STREQ_H
#define _GL_STREQ_H
#include <string.h>
/* STREQ allows to optimize string comparison with a small literal string.
STREQ (s, "EUC-KR", 'E', 'U', 'C', '-', 'K', 'R', 0, 0, 0)
is semantically equivalent to
strcmp (s, "EUC-KR") == 0
just faster. */
/* Help GCC to generate good code for string comparisons with
immediate strings. */
#if defined (__GNUC__) && defined (__OPTIMIZE__)
static inline int
streq9 (const char *s1, const char *s2)
{
return strcmp (s1 + 9, s2 + 9) == 0;
}
static inline int
streq8 (const char *s1, const char *s2, char s28)
{
if (s1[8] == s28)
{
if (s28 == 0)
return 1;
else
return streq9 (s1, s2);
}
else
return 0;
}
static inline int
streq7 (const char *s1, const char *s2, char s27, char s28)
{
if (s1[7] == s27)
{
if (s27 == 0)
return 1;
else
return streq8 (s1, s2, s28);
}
else
return 0;
}
static inline int
streq6 (const char *s1, const char *s2, char s26, char s27, char s28)
{
if (s1[6] == s26)
{
if (s26 == 0)
return 1;
else
return streq7 (s1, s2, s27, s28);
}
else
return 0;
}
static inline int
streq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28)
{
if (s1[5] == s25)
{
if (s25 == 0)
return 1;
else
return streq6 (s1, s2, s26, s27, s28);
}
else
return 0;
}
static inline int
streq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28)
{
if (s1[4] == s24)
{
if (s24 == 0)
return 1;
else
return streq5 (s1, s2, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
streq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (s1[3] == s23)
{
if (s23 == 0)
return 1;
else
return streq4 (s1, s2, s24, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
streq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (s1[2] == s22)
{
if (s22 == 0)
return 1;
else
return streq3 (s1, s2, s23, s24, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
streq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (s1[1] == s21)
{
if (s21 == 0)
return 1;
else
return streq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
streq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (s1[0] == s20)
{
if (s20 == 0)
return 1;
else
return streq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28);
}
else
return 0;
}
#define STREQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
streq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28)
#else
#define STREQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
(strcmp (s1, s2) == 0)
#endif
#endif /* _GL_STREQ_H */

123
lib/strings.in.h Normal file
View file

@ -0,0 +1,123 @@
/* A substitute <strings.h>.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _@GUARD_PREFIX@_STRINGS_H
#if __GNUC__ >= 3
@PRAGMA_SYSTEM_HEADER@
#endif
@PRAGMA_COLUMNS@
/* Minix 3.1.8 has a bug: <sys/types.h> must be included before <strings.h>.
But avoid namespace pollution on glibc systems. */
#if defined __minix && !defined __GLIBC__
# include <sys/types.h>
#endif
/* The include_next requires a split double-inclusion guard. */
#if @HAVE_STRINGS_H@
# @INCLUDE_NEXT@ @NEXT_STRINGS_H@
#endif
#ifndef _@GUARD_PREFIX@_STRINGS_H
#define _@GUARD_PREFIX@_STRINGS_H
#if ! @HAVE_DECL_STRNCASECMP@
/* Get size_t. */
# include <stddef.h>
#endif
/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */
/* The definition of _GL_ARG_NONNULL is copied here. */
/* The definition of _GL_WARN_ON_USE is copied here. */
#ifdef __cplusplus
extern "C" {
#endif
/* Find the index of the least-significant set bit. */
#if @GNULIB_FFS@
# if !@HAVE_FFS@
_GL_FUNCDECL_SYS (ffs, int, (int i));
# endif
_GL_CXXALIAS_SYS (ffs, int, (int i));
_GL_CXXALIASWARN (ffs);
#elif defined GNULIB_POSIXCHECK
# undef ffs
# if HAVE_RAW_DECL_FFS
_GL_WARN_ON_USE (ffs, "ffs is not portable - use the ffs module");
# endif
#endif
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater
than S2.
Note: This function does not work in multibyte locales. */
#if ! @HAVE_STRCASECMP@
extern int strcasecmp (char const *s1, char const *s2)
_GL_ARG_NONNULL ((1, 2));
#endif
#if defined GNULIB_POSIXCHECK
/* strcasecmp() does not work with multibyte strings:
POSIX says that it operates on "strings", and "string" in POSIX is defined
as a sequence of bytes, not of characters. */
# undef strcasecmp
# if HAVE_RAW_DECL_STRCASECMP
_GL_WARN_ON_USE (strcasecmp, "strcasecmp cannot work correctly on character "
"strings in multibyte locales - "
"use mbscasecmp if you care about "
"internationalization, or use c_strcasecmp , "
"gnulib module c-strcase) if you want a locale "
"independent function");
# endif
#endif
/* Compare no more than N bytes of strings S1 and S2, ignoring case,
returning less than, equal to or greater than zero if S1 is
lexicographically less than, equal to or greater than S2.
Note: This function cannot work correctly in multibyte locales. */
#if ! @HAVE_DECL_STRNCASECMP@
extern int strncasecmp (char const *s1, char const *s2, size_t n)
_GL_ARG_NONNULL ((1, 2));
#endif
#if defined GNULIB_POSIXCHECK
/* strncasecmp() does not work with multibyte strings:
POSIX says that it operates on "strings", and "string" in POSIX is defined
as a sequence of bytes, not of characters. */
# undef strncasecmp
# if HAVE_RAW_DECL_STRNCASECMP
_GL_WARN_ON_USE (strncasecmp, "strncasecmp cannot work correctly on character "
"strings in multibyte locales - "
"use mbsncasecmp or mbspcasecmp if you care about "
"internationalization, or use c_strncasecmp , "
"gnulib module c-strcase) if you want a locale "
"independent function");
# endif
#endif
#ifdef __cplusplus
}
#endif
#endif /* _@GUARD_PREFIX@_STRING_H */
#endif /* _@GUARD_PREFIX@_STRING_H */

63
lib/strncasecmp.c Normal file
View file

@ -0,0 +1,63 @@
/* strncasecmp.c -- case insensitive string comparator
Copyright (C) 1998-1999, 2005-2007, 2009-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include <string.h>
#include <ctype.h>
#include <limits.h>
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
/* Compare no more than N bytes of strings S1 and S2, ignoring case,
returning less than, equal to or greater than zero if S1 is
lexicographically less than, equal to or greater than S2.
Note: This function cannot work correctly in multibyte locales. */
int
strncasecmp (const char *s1, const char *s2, size_t n)
{
register const unsigned char *p1 = (const unsigned char *) s1;
register const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
if (p1 == p2 || n == 0)
return 0;
do
{
c1 = TOLOWER (*p1);
c2 = TOLOWER (*p2);
if (--n == 0 || c1 == '\0')
break;
++p1;
++p2;
}
while (c1 == c2);
if (UCHAR_MAX <= INT_MAX)
return c1 - c2;
else
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
}

53
lib/wcrtomb.c Normal file
View file

@ -0,0 +1,53 @@
/* Convert wide character to multibyte character.
Copyright (C) 2008-2012 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2008.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include <wchar.h>
#include <errno.h>
#include <stdlib.h>
size_t
wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
{
/* This implementation of wcrtomb on top of wctomb() supports only
stateless encodings. ps must be in the initial state. */
if (ps != NULL && !mbsinit (ps))
{
errno = EINVAL;
return (size_t)(-1);
}
if (s == NULL)
/* We know the NUL wide character corresponds to the NUL character. */
return 1;
else
{
int ret = wctomb (s, wc);
if (ret >= 0)
return ret;
else
{
errno = EILSEQ;
return (size_t)(-1);
}
}
}

499
lib/wctype.in.h Normal file
View file

@ -0,0 +1,499 @@
/* A substitute for ISO C99 <wctype.h>, for platforms that lack it.
Copyright (C) 2006-2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Written by Bruno Haible and Paul Eggert. */
/*
* ISO C 99 <wctype.h> for platforms that lack it.
* <http://www.opengroup.org/susv3xbd/wctype.h.html>
*
* iswctype, towctrans, towlower, towupper, wctrans, wctype,
* wctrans_t, and wctype_t are not yet implemented.
*/
#ifndef _@GUARD_PREFIX@_WCTYPE_H
#if __GNUC__ >= 3
@PRAGMA_SYSTEM_HEADER@
#endif
@PRAGMA_COLUMNS@
#if @HAVE_WINT_T@
/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.
Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
# include <stddef.h>
# include <stdio.h>
# include <time.h>
# include <wchar.h>
#endif
/* Include the original <wctype.h> if it exists.
BeOS 5 has the functions but no <wctype.h>. */
/* The include_next requires a split double-inclusion guard. */
#if @HAVE_WCTYPE_H@
# @INCLUDE_NEXT@ @NEXT_WCTYPE_H@
#endif
#ifndef _@GUARD_PREFIX@_WCTYPE_H
#define _@GUARD_PREFIX@_WCTYPE_H
/* The definitions of _GL_FUNCDECL_RPL etc. are copied here. */
/* The definition of _GL_WARN_ON_USE is copied here. */
/* Solaris 2.6 <wctype.h> includes <widec.h> which includes <euc.h> which
#defines a number of identifiers in the application namespace. Revert
these #defines. */
#ifdef __sun
# undef multibyte
# undef eucw1
# undef eucw2
# undef eucw3
# undef scrw1
# undef scrw2
# undef scrw3
#endif
/* Define wint_t and WEOF. (Also done in wchar.in.h.) */
#if !@HAVE_WINT_T@ && !defined wint_t
# define wint_t int
# ifndef WEOF
# define WEOF -1
# endif
#else
/* MSVC defines wint_t as 'unsigned short' in <crtdefs.h>.
This is too small: ISO C 99 section 7.24.1.(2) says that wint_t must be
"unchanged by default argument promotions". Override it. */
# if defined _MSC_VER
# if !GNULIB_defined_wint_t
# include <crtdefs.h>
typedef unsigned int rpl_wint_t;
# undef wint_t
# define wint_t rpl_wint_t
# define GNULIB_defined_wint_t 1
# endif
# endif
# ifndef WEOF
# define WEOF ((wint_t) -1)
# endif
#endif
#if !GNULIB_defined_wctype_functions
/* FreeBSD 4.4 to 4.11 has <wctype.h> but lacks the functions.
Linux libc5 has <wctype.h> and the functions but they are broken.
Assume all 11 functions (all isw* except iswblank) are implemented the
same way, or not at all. */
# if ! @HAVE_ISWCNTRL@ || @REPLACE_ISWCNTRL@
/* IRIX 5.3 has macros but no functions, its isw* macros refer to an
undefined variable _ctmp_ and to <ctype.h> macros like _P, and they
refer to system functions like _iswctype that are not in the
standard C library. Rather than try to get ancient buggy
implementations like this to work, just disable them. */
# undef iswalnum
# undef iswalpha
# undef iswblank
# undef iswcntrl
# undef iswdigit
# undef iswgraph
# undef iswlower
# undef iswprint
# undef iswpunct
# undef iswspace
# undef iswupper
# undef iswxdigit
# undef towlower
# undef towupper
/* Linux libc5 has <wctype.h> and the functions but they are broken. */
# if @REPLACE_ISWCNTRL@
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
# define iswalnum rpl_iswalnum
# define iswalpha rpl_iswalpha
# define iswblank rpl_iswblank
# define iswcntrl rpl_iswcntrl
# define iswdigit rpl_iswdigit
# define iswgraph rpl_iswgraph
# define iswlower rpl_iswlower
# define iswprint rpl_iswprint
# define iswpunct rpl_iswpunct
# define iswspace rpl_iswspace
# define iswupper rpl_iswupper
# define iswxdigit rpl_iswxdigit
# endif
# endif
# if @REPLACE_TOWLOWER@
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
# define towlower rpl_towlower
# define towupper rpl_towupper
# endif
# endif
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswalnum
# else
iswalnum
# endif
(wint_t wc)
{
return ((wc >= '0' && wc <= '9')
|| ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z'));
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswalpha
# else
iswalpha
# endif
(wint_t wc)
{
return (wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswblank
# else
iswblank
# endif
(wint_t wc)
{
return wc == ' ' || wc == '\t';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswcntrl
# else
iswcntrl
# endif
(wint_t wc)
{
return (wc & ~0x1f) == 0 || wc == 0x7f;
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswdigit
# else
iswdigit
# endif
(wint_t wc)
{
return wc >= '0' && wc <= '9';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswgraph
# else
iswgraph
# endif
(wint_t wc)
{
return wc >= '!' && wc <= '~';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswlower
# else
iswlower
# endif
(wint_t wc)
{
return wc >= 'a' && wc <= 'z';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswprint
# else
iswprint
# endif
(wint_t wc)
{
return wc >= ' ' && wc <= '~';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswpunct
# else
iswpunct
# endif
(wint_t wc)
{
return (wc >= '!' && wc <= '~'
&& !((wc >= '0' && wc <= '9')
|| ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'Z')));
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswspace
# else
iswspace
# endif
(wint_t wc)
{
return (wc == ' ' || wc == '\t'
|| wc == '\n' || wc == '\v' || wc == '\f' || wc == '\r');
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswupper
# else
iswupper
# endif
(wint_t wc)
{
return wc >= 'A' && wc <= 'Z';
}
static inline int
# if @REPLACE_ISWCNTRL@
rpl_iswxdigit
# else
iswxdigit
# endif
(wint_t wc)
{
return ((wc >= '0' && wc <= '9')
|| ((wc & ~0x20) >= 'A' && (wc & ~0x20) <= 'F'));
}
static inline wint_t
# if @REPLACE_TOWLOWER@
rpl_towlower
# else
towlower
# endif
(wint_t wc)
{
return (wc >= 'A' && wc <= 'Z' ? wc - 'A' + 'a' : wc);
}
static inline wint_t
# if @REPLACE_TOWLOWER@
rpl_towupper
# else
towupper
# endif
(wint_t wc)
{
return (wc >= 'a' && wc <= 'z' ? wc - 'a' + 'A' : wc);
}
# elif @GNULIB_ISWBLANK@ && (! @HAVE_ISWBLANK@ || @REPLACE_ISWBLANK@)
/* Only the iswblank function is missing. */
# if @REPLACE_ISWBLANK@
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
# define iswblank rpl_iswblank
# endif
_GL_FUNCDECL_RPL (iswblank, int, (wint_t wc));
# else
_GL_FUNCDECL_SYS (iswblank, int, (wint_t wc));
# endif
# endif
# if defined __MINGW32__
/* On native Windows, wchar_t is uint16_t, and wint_t is uint32_t.
The functions towlower and towupper are implemented in the MSVCRT library
to take a wchar_t argument and return a wchar_t result. mingw declares
these functions to take a wint_t argument and return a wint_t result.
This means that:
1. When the user passes an argument outside the range 0x0000..0xFFFF, the
function will look only at the lower 16 bits. This is allowed according
to POSIX.
2. The return value is returned in the lower 16 bits of the result register.
The upper 16 bits are random: whatever happened to be in that part of the
result register. We need to fix this by adding a zero-extend from
wchar_t to wint_t after the call. */
static inline wint_t
rpl_towlower (wint_t wc)
{
return (wint_t) (wchar_t) towlower (wc);
}
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
# define towlower rpl_towlower
# endif
static inline wint_t
rpl_towupper (wint_t wc)
{
return (wint_t) (wchar_t) towupper (wc);
}
# if !(defined __cplusplus && defined GNULIB_NAMESPACE)
# define towupper rpl_towupper
# endif
# endif /* __MINGW32__ */
# define GNULIB_defined_wctype_functions 1
#endif
#if @REPLACE_ISWCNTRL@
_GL_CXXALIAS_RPL (iswalnum, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswalpha, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswcntrl, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswdigit, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswgraph, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswlower, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswprint, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswpunct, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswspace, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswupper, int, (wint_t wc));
_GL_CXXALIAS_RPL (iswxdigit, int, (wint_t wc));
#else
_GL_CXXALIAS_SYS (iswalnum, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswalpha, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswcntrl, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswdigit, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswgraph, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswlower, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswprint, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswpunct, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswspace, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswupper, int, (wint_t wc));
_GL_CXXALIAS_SYS (iswxdigit, int, (wint_t wc));
#endif
_GL_CXXALIASWARN (iswalnum);
_GL_CXXALIASWARN (iswalpha);
_GL_CXXALIASWARN (iswcntrl);
_GL_CXXALIASWARN (iswdigit);
_GL_CXXALIASWARN (iswgraph);
_GL_CXXALIASWARN (iswlower);
_GL_CXXALIASWARN (iswprint);
_GL_CXXALIASWARN (iswpunct);
_GL_CXXALIASWARN (iswspace);
_GL_CXXALIASWARN (iswupper);
_GL_CXXALIASWARN (iswxdigit);
#if @GNULIB_ISWBLANK@
# if @REPLACE_ISWCNTRL@ || @REPLACE_ISWBLANK@
_GL_CXXALIAS_RPL (iswblank, int, (wint_t wc));
# else
_GL_CXXALIAS_SYS (iswblank, int, (wint_t wc));
# endif
_GL_CXXALIASWARN (iswblank);
#endif
#if !@HAVE_WCTYPE_T@
# if !GNULIB_defined_wctype_t
typedef void * wctype_t;
# define GNULIB_defined_wctype_t 1
# endif
#endif
/* Get a descriptor for a wide character property. */
#if @GNULIB_WCTYPE@
# if !@HAVE_WCTYPE_T@
_GL_FUNCDECL_SYS (wctype, wctype_t, (const char *name));
# endif
_GL_CXXALIAS_SYS (wctype, wctype_t, (const char *name));
_GL_CXXALIASWARN (wctype);
#elif defined GNULIB_POSIXCHECK
# undef wctype
# if HAVE_RAW_DECL_WCTYPE
_GL_WARN_ON_USE (wctype, "wctype is unportable - "
"use gnulib module wctype for portability");
# endif
#endif
/* Test whether a wide character has a given property.
The argument WC must be either a wchar_t value or WEOF.
The argument DESC must have been returned by the wctype() function. */
#if @GNULIB_ISWCTYPE@
# if !@HAVE_WCTYPE_T@
_GL_FUNCDECL_SYS (iswctype, int, (wint_t wc, wctype_t desc));
# endif
_GL_CXXALIAS_SYS (iswctype, int, (wint_t wc, wctype_t desc));
_GL_CXXALIASWARN (iswctype);
#elif defined GNULIB_POSIXCHECK
# undef iswctype
# if HAVE_RAW_DECL_ISWCTYPE
_GL_WARN_ON_USE (iswctype, "iswctype is unportable - "
"use gnulib module iswctype for portability");
# endif
#endif
#if @REPLACE_TOWLOWER@ || defined __MINGW32__
_GL_CXXALIAS_RPL (towlower, wint_t, (wint_t wc));
_GL_CXXALIAS_RPL (towupper, wint_t, (wint_t wc));
#else
_GL_CXXALIAS_SYS (towlower, wint_t, (wint_t wc));
_GL_CXXALIAS_SYS (towupper, wint_t, (wint_t wc));
#endif
_GL_CXXALIASWARN (towlower);
_GL_CXXALIASWARN (towupper);
#if !@HAVE_WCTRANS_T@
# if !GNULIB_defined_wctrans_t
typedef void * wctrans_t;
# define GNULIB_defined_wctrans_t 1
# endif
#endif
/* Get a descriptor for a wide character case conversion. */
#if @GNULIB_WCTRANS@
# if !@HAVE_WCTRANS_T@
_GL_FUNCDECL_SYS (wctrans, wctrans_t, (const char *name));
# endif
_GL_CXXALIAS_SYS (wctrans, wctrans_t, (const char *name));
_GL_CXXALIASWARN (wctrans);
#elif defined GNULIB_POSIXCHECK
# undef wctrans
# if HAVE_RAW_DECL_WCTRANS
_GL_WARN_ON_USE (wctrans, "wctrans is unportable - "
"use gnulib module wctrans for portability");
# endif
#endif
/* Perform a given case conversion on a wide character.
The argument WC must be either a wchar_t value or WEOF.
The argument DESC must have been returned by the wctrans() function. */
#if @GNULIB_TOWCTRANS@
# if !@HAVE_WCTRANS_T@
_GL_FUNCDECL_SYS (towctrans, wint_t, (wint_t wc, wctrans_t desc));
# endif
_GL_CXXALIAS_SYS (towctrans, wint_t, (wint_t wc, wctrans_t desc));
_GL_CXXALIASWARN (towctrans);
#elif defined GNULIB_POSIXCHECK
# undef towctrans
# if HAVE_RAW_DECL_TOWCTRANS
_GL_WARN_ON_USE (towctrans, "towctrans is unportable - "
"use gnulib module towctrans for portability");
# endif
#endif
#endif /* _@GUARD_PREFIX@_WCTYPE_H */
#endif /* _@GUARD_PREFIX@_WCTYPE_H */

View file

@ -1,5 +1,6 @@
/* Copyright (C) 1995,1996,1997,1998,1999,2000,2001, 2002, 2003, 2004, 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
*
/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
* 2004, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
@ -91,7 +92,7 @@
#include "libguile/poll.h"
#include "libguile/ports.h"
#include "libguile/posix.h"
#ifdef HAVE_REGCOMP
#ifdef ENABLE_REGEX
#include "libguile/regex-posix.h"
#endif
#include "libguile/print.h"
@ -453,7 +454,7 @@ scm_i_init_guile (void *base)
#ifdef HAVE_POSIX
scm_init_posix ();
#endif
#ifdef HAVE_REGCOMP
#ifdef ENABLE_REGEX
scm_init_regex_posix (); /* Requires smob_prehistory */
#endif
scm_init_procs ();

View file

@ -1,4 +1,4 @@
/* Copyright (C) 1997, 1998, 1999, 2000, 2001, 2004, 2006, 2007, 2010, 2011 Free Software Foundation, Inc.
/* Copyright (C) 1997, 1998, 1999, 2000, 2001, 2004, 2006, 2007, 2010, 2011, 2012 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
@ -35,23 +35,7 @@
#include "libguile/_scm.h"
/* Supposedly, this file is never compiled unless we know we have
POSIX regular expressions. But we still put this in an #ifdef so
the file is CPP'able (for dependency scanning) even on systems that
don't have a <regex.h> header. */
#ifdef HAVE_REGCOMP
#ifdef HAVE_REGEX_H
#include <regex.h>
#else
#ifdef HAVE_RXPOSIX_H
#include <rxposix.h> /* GNU Rx library */
#else
#ifdef HAVE_RX_RXPOSIX_H
#include <rx/rxposix.h> /* GNU Rx library on Linux */
#endif
#endif
#endif
#endif
#ifdef HAVE_WCHAR_H
#include <wchar.h>

116
m4/btowc.m4 Normal file
View file

@ -0,0 +1,116 @@
# btowc.m4 serial 10
dnl Copyright (C) 2008-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_BTOWC],
[
AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
dnl Check whether <wchar.h> is usable at all, first. Otherwise the test
dnl program below may lead to an endless loop. See
dnl <http://gcc.gnu.org/bugzilla/show_bug.cgi?id=42440>.
AC_REQUIRE([gl_WCHAR_H_INLINE_OK])
AC_CHECK_FUNCS_ONCE([btowc])
if test $ac_cv_func_btowc = no; then
HAVE_BTOWC=0
else
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_FR])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
dnl Cygwin 1.7.2 btowc('\0') is WEOF, not 0.
AC_CACHE_CHECK([whether btowc(0) is correct],
[gl_cv_func_btowc_nul],
[
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
if (btowc ('\0') != 0)
return 1;
return 0;
}]])],
[gl_cv_func_btowc_nul=yes],
[gl_cv_func_btowc_nul=no],
[
changequote(,)dnl
case "$host_os" in
# Guess no on Cygwin.
cygwin*) gl_cv_func_btowc_nul="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_btowc_nul="guessing yes" ;;
esac
changequote([,])dnl
])
])
dnl IRIX 6.5 btowc(EOF) is 0xFF, not WEOF.
AC_CACHE_CHECK([whether btowc(EOF) is correct],
[gl_cv_func_btowc_eof],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on IRIX.
irix*) gl_cv_func_btowc_eof="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_btowc_eof="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_FR != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
if (setlocale (LC_ALL, "$LOCALE_FR") != NULL)
{
if (btowc (EOF) != WEOF)
return 1;
}
return 0;
}]])],
[gl_cv_func_btowc_eof=yes],
[gl_cv_func_btowc_eof=no],
[:])
fi
])
case "$gl_cv_func_btowc_nul" in
*yes) ;;
*) REPLACE_BTOWC=1 ;;
esac
case "$gl_cv_func_btowc_eof" in
*yes) ;;
*) REPLACE_BTOWC=1 ;;
esac
fi
])
# Prerequisites of lib/btowc.c.
AC_DEFUN([gl_PREREQ_BTOWC], [
:
])

View file

@ -27,7 +27,7 @@
# Specification in the form of a command-line invocation:
# gnulib-tool --import --dir=. --local-dir=gnulib-local --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl=3 --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alignof alloca-opt announce-gen autobuild bind byteswap canonicalize-lgpl ceil close connect dirfd duplocale environ extensions flock floor fpieee frexp full-read full-write func gendocs getaddrinfo getpeername getsockname getsockopt git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload havelib iconv_open-utf inet_ntop inet_pton isinf isnan ldexp lib-symbol-versions lib-symbol-visibility libunistring listen localcharset locale log1p maintainer-makefile malloc-gnu malloca nproc open pipe2 putenv recv recvfrom rename send sendto setenv setsockopt shutdown socket stat-time stdlib strftime striconveh string sys_stat trunc verify vsnprintf warnings wchar
# gnulib-tool --import --dir=. --local-dir=gnulib-local --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl=3 --no-conditional-dependencies --libtool --macro-prefix=gl --no-vc-files accept alignof alloca-opt announce-gen autobuild bind byteswap canonicalize-lgpl ceil close connect dirfd duplocale environ extensions flock floor fpieee frexp full-read full-write func gendocs getaddrinfo getpeername getsockname getsockopt git-version-gen gitlog-to-changelog gnu-web-doc-update gnupload havelib iconv_open-utf inet_ntop inet_pton isinf isnan ldexp lib-symbol-versions lib-symbol-visibility libunistring listen localcharset locale log1p maintainer-makefile malloc-gnu malloca nproc open pipe2 putenv recv recvfrom regex rename send sendto setenv setsockopt shutdown socket stat-time stdlib strftime striconveh string sys_stat trunc verify vsnprintf warnings wchar
# Specification in the form of a few gnulib-tool.m4 macro invocations:
gl_LOCAL_DIR([gnulib-local])
@ -86,6 +86,7 @@ gl_MODULES([
putenv
recv
recvfrom
regex
rename
send
sendto

View file

@ -48,6 +48,7 @@ AC_DEFUN([gl_EARLY],
AB_INIT
# Code from module binary-io:
# Code from module bind:
# Code from module btowc:
# Code from module byteswap:
# Code from module c-ctype:
# Code from module c-strcase:
@ -108,6 +109,7 @@ AC_DEFUN([gl_EARLY],
# Code from module isnand-nolibm:
# Code from module isnanf:
# Code from module isnanl:
# Code from module langinfo:
# Code from module largefile:
AC_REQUIRE([AC_SYS_LARGEFILE])
# Code from module ldexp:
@ -124,12 +126,16 @@ AC_DEFUN([gl_EARLY],
# Code from module malloc-posix:
# Code from module malloca:
# Code from module math:
# Code from module mbrtowc:
# Code from module mbsinit:
# Code from module mbtowc:
# Code from module memchr:
# Code from module msvc-inval:
# Code from module msvc-nothrow:
# Code from module multiarch:
# Code from module netdb:
# Code from module netinet_in:
# Code from module nl_langinfo:
# Code from module nocrash:
# Code from module nproc:
# Code from module open:
@ -141,6 +147,7 @@ AC_DEFUN([gl_EARLY],
# Code from module readlink:
# Code from module recv:
# Code from module recvfrom:
# Code from module regex:
# Code from module rename:
# Code from module rmdir:
# Code from module safe-read:
@ -173,9 +180,12 @@ AC_DEFUN([gl_EARLY],
# Code from module stdint:
# Code from module stdio:
# Code from module stdlib:
# Code from module strcase:
# Code from module streq:
# Code from module strftime:
# Code from module striconveh:
# Code from module string:
# Code from module strings:
# Code from module sys_file:
# Code from module sys_socket:
# Code from module sys_stat:
@ -200,6 +210,8 @@ AC_DEFUN([gl_EARLY],
# Code from module vsnprintf:
# Code from module warnings:
# Code from module wchar:
# Code from module wcrtomb:
# Code from module wctype-h:
# Code from module write:
# Code from module xsize:
])
@ -231,6 +243,12 @@ if test "$ac_cv_header_winsock2_h" = yes; then
AC_LIBOBJ([bind])
fi
gl_SYS_SOCKET_MODULE_INDICATOR([bind])
gl_FUNC_BTOWC
if test $HAVE_BTOWC = 0 || test $REPLACE_BTOWC = 1; then
AC_LIBOBJ([btowc])
gl_PREREQ_BTOWC
fi
gl_WCHAR_MODULE_INDICATOR([btowc])
gl_BYTESWAP
gl_CANONICALIZE_LGPL
if test $HAVE_CANONICALIZE_FILE_NAME = 0 || test $REPLACE_CANONICALIZE_FILE_NAME = 1; then
@ -406,6 +424,7 @@ if test $HAVE_ISNANL = 0 || test $REPLACE_ISNAN = 1; then
gl_PREREQ_ISNANL
fi
gl_MATH_MODULE_INDICATOR([isnanl])
gl_LANGINFO_H
gl_FUNC_LDEXP
gl_LD_VERSION_SCRIPT
gl_VISIBILITY
@ -440,6 +459,24 @@ fi
gl_STDLIB_MODULE_INDICATOR([malloc-posix])
gl_MALLOCA
gl_MATH_H
gl_FUNC_MBRTOWC
if test $HAVE_MBRTOWC = 0 || test $REPLACE_MBRTOWC = 1; then
AC_LIBOBJ([mbrtowc])
gl_PREREQ_MBRTOWC
fi
gl_WCHAR_MODULE_INDICATOR([mbrtowc])
gl_FUNC_MBSINIT
if test $HAVE_MBSINIT = 0 || test $REPLACE_MBSINIT = 1; then
AC_LIBOBJ([mbsinit])
gl_PREREQ_MBSINIT
fi
gl_WCHAR_MODULE_INDICATOR([mbsinit])
gl_FUNC_MBTOWC
if test $REPLACE_MBTOWC = 1; then
AC_LIBOBJ([mbtowc])
gl_PREREQ_MBTOWC
fi
gl_STDLIB_MODULE_INDICATOR([mbtowc])
gl_FUNC_MEMCHR
if test $HAVE_MEMCHR = 0 || test $REPLACE_MEMCHR = 1; then
AC_LIBOBJ([memchr])
@ -458,6 +495,11 @@ gl_MULTIARCH
gl_HEADER_NETDB
gl_HEADER_NETINET_IN
AC_PROG_MKDIR_P
gl_FUNC_NL_LANGINFO
if test $HAVE_NL_LANGINFO = 0 || test $REPLACE_NL_LANGINFO = 1; then
AC_LIBOBJ([nl_langinfo])
fi
gl_LANGINFO_MODULE_INDICATOR([nl_langinfo])
gl_NPROC
gl_FUNC_OPEN
if test $REPLACE_OPEN = 1; then
@ -501,6 +543,11 @@ if test "$ac_cv_header_winsock2_h" = yes; then
AC_LIBOBJ([recvfrom])
fi
gl_SYS_SOCKET_MODULE_INDICATOR([recvfrom])
gl_REGEX
if test $ac_use_included_regex = yes; then
AC_LIBOBJ([regex])
gl_PREREQ_REGEX
fi
gl_FUNC_RENAME
if test $REPLACE_RENAME = 1; then
AC_LIBOBJ([rename])
@ -576,12 +623,22 @@ gl_STDDEF_H
gl_STDINT_H
gl_STDIO_H
gl_STDLIB_H
gl_STRCASE
if test $HAVE_STRCASECMP = 0; then
AC_LIBOBJ([strcasecmp])
gl_PREREQ_STRCASECMP
fi
if test $HAVE_STRNCASECMP = 0; then
AC_LIBOBJ([strncasecmp])
gl_PREREQ_STRNCASECMP
fi
gl_FUNC_GNU_STRFTIME
if test $gl_cond_libtool = false; then
gl_ltlibdeps="$gl_ltlibdeps $LTLIBICONV"
gl_libdeps="$gl_libdeps $LIBICONV"
fi
gl_HEADER_STRING_H
gl_HEADER_STRINGS_H
gl_HEADER_SYS_FILE_H
AC_PROG_MKDIR_P
gl_HEADER_SYS_SOCKET
@ -623,6 +680,13 @@ gl_FUNC_VSNPRINTF
gl_STDIO_MODULE_INDICATOR([vsnprintf])
AC_SUBST([WARN_CFLAGS])
gl_WCHAR_H
gl_FUNC_WCRTOMB
if test $HAVE_WCRTOMB = 0 || test $REPLACE_WCRTOMB = 1; then
AC_LIBOBJ([wcrtomb])
gl_PREREQ_WCRTOMB
fi
gl_WCHAR_MODULE_INDICATOR([wcrtomb])
gl_WCTYPE_H
gl_FUNC_WRITE
if test $REPLACE_WRITE = 1; then
AC_LIBOBJ([write])
@ -789,6 +853,7 @@ AC_DEFUN([gl_FILE_LIST], [
lib/basename-lgpl.c
lib/binary-io.h
lib/bind.c
lib/btowc.c
lib/byteswap.in.h
lib/c-ctype.c
lib/c-ctype.h
@ -847,6 +912,7 @@ AC_DEFUN([gl_FILE_LIST], [
lib/isnanf.c
lib/isnanl.c
lib/itold.c
lib/langinfo.in.h
lib/libunistring.valgrind
lib/listen.c
lib/localcharset.c
@ -858,6 +924,10 @@ AC_DEFUN([gl_FILE_LIST], [
lib/malloca.h
lib/malloca.valgrind
lib/math.in.h
lib/mbrtowc.c
lib/mbsinit.c
lib/mbtowc-impl.h
lib/mbtowc.c
lib/memchr.c
lib/memchr.valgrind
lib/msvc-inval.c
@ -866,6 +936,7 @@ AC_DEFUN([gl_FILE_LIST], [
lib/msvc-nothrow.h
lib/netdb.in.h
lib/netinet_in.in.h
lib/nl_langinfo.c
lib/nproc.c
lib/nproc.h
lib/open.c
@ -883,6 +954,12 @@ AC_DEFUN([gl_FILE_LIST], [
lib/recvfrom.c
lib/ref-add.sin
lib/ref-del.sin
lib/regcomp.c
lib/regex.c
lib/regex.h
lib/regex_internal.c
lib/regex_internal.h
lib/regexec.c
lib/rename.c
lib/rmdir.c
lib/safe-read.c
@ -909,12 +986,16 @@ AC_DEFUN([gl_FILE_LIST], [
lib/stdint.in.h
lib/stdio.in.h
lib/stdlib.in.h
lib/strcasecmp.c
lib/streq.h
lib/strftime.c
lib/strftime.h
lib/striconveh.c
lib/striconveh.h
lib/string.in.h
lib/strings.in.h
lib/stripslash.c
lib/strncasecmp.c
lib/sys_file.in.h
lib/sys_socket.in.h
lib/sys_stat.in.h
@ -941,6 +1022,8 @@ AC_DEFUN([gl_FILE_LIST], [
lib/vsnprintf.c
lib/w32sock.h
lib/wchar.in.h
lib/wcrtomb.c
lib/wctype.in.h
lib/write.c
lib/xsize.h
m4/00gnulib.m4
@ -948,6 +1031,7 @@ AC_DEFUN([gl_FILE_LIST], [
m4/alloca.m4
m4/arpa_inet_h.m4
m4/autobuild.m4
m4/btowc.m4
m4/byteswap.m4
m4/canonicalize.m4
m4/ceil.m4
@ -995,6 +1079,7 @@ AC_DEFUN([gl_FILE_LIST], [
m4/isnand.m4
m4/isnanf.m4
m4/isnanl.m4
m4/langinfo_h.m4
m4/largefile.m4
m4/ld-version-script.m4
m4/ldexp.m4
@ -1004,6 +1089,9 @@ AC_DEFUN([gl_FILE_LIST], [
m4/libunistring-base.m4
m4/libunistring.m4
m4/localcharset.m4
m4/locale-fr.m4
m4/locale-ja.m4
m4/locale-zh.m4
m4/locale_h.m4
m4/longlong.m4
m4/lstat.m4
@ -1011,6 +1099,10 @@ AC_DEFUN([gl_FILE_LIST], [
m4/malloca.m4
m4/math_h.m4
m4/mathfunc.m4
m4/mbrtowc.m4
m4/mbsinit.m4
m4/mbstate_t.m4
m4/mbtowc.m4
m4/memchr.m4
m4/mmap-anon.m4
m4/mode_t.m4
@ -1019,6 +1111,7 @@ AC_DEFUN([gl_FILE_LIST], [
m4/multiarch.m4
m4/netdb_h.m4
m4/netinet_in_h.m4
m4/nl_langinfo.m4
m4/nocrash.m4
m4/nproc.m4
m4/open.m4
@ -1029,6 +1122,7 @@ AC_DEFUN([gl_FILE_LIST], [
m4/raise.m4
m4/read.m4
m4/readlink.m4
m4/regex.m4
m4/rename.m4
m4/rmdir.m4
m4/safe-read.m4
@ -1052,8 +1146,10 @@ AC_DEFUN([gl_FILE_LIST], [
m4/stdint_h.m4
m4/stdio_h.m4
m4/stdlib_h.m4
m4/strcase.m4
m4/strftime.m4
m4/string_h.m4
m4/strings_h.m4
m4/sys_file_h.m4
m4/sys_socket_h.m4
m4/sys_stat_h.m4
@ -1072,6 +1168,8 @@ AC_DEFUN([gl_FILE_LIST], [
m4/warnings.m4
m4/wchar_h.m4
m4/wchar_t.m4
m4/wcrtomb.m4
m4/wctype_h.m4
m4/wint_t.m4
m4/write.m4
m4/xsize.m4

105
m4/langinfo_h.m4 Normal file
View file

@ -0,0 +1,105 @@
# langinfo_h.m4 serial 7
dnl Copyright (C) 2009-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_LANGINFO_H],
[
AC_REQUIRE([gl_LANGINFO_H_DEFAULTS])
dnl Persuade glibc-2.0.6 <langinfo.h> to define CODESET.
AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
dnl <langinfo.h> is always overridden, because of GNULIB_POSIXCHECK.
gl_CHECK_NEXT_HEADERS([langinfo.h])
dnl Determine whether <langinfo.h> exists. It is missing on mingw and BeOS.
HAVE_LANGINFO_CODESET=0
HAVE_LANGINFO_T_FMT_AMPM=0
HAVE_LANGINFO_ERA=0
HAVE_LANGINFO_YESEXPR=0
AC_CHECK_HEADERS_ONCE([langinfo.h])
if test $ac_cv_header_langinfo_h = yes; then
HAVE_LANGINFO_H=1
dnl Determine what <langinfo.h> defines. CODESET and ERA etc. are missing
dnl on OpenBSD 3.8. T_FMT_AMPM and YESEXPR, NOEXPR are missing on IRIX 5.3.
AC_CACHE_CHECK([whether langinfo.h defines CODESET],
[gl_cv_header_langinfo_codeset],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <langinfo.h>
int a = CODESET;
]])],
[gl_cv_header_langinfo_codeset=yes],
[gl_cv_header_langinfo_codeset=no])
])
if test $gl_cv_header_langinfo_codeset = yes; then
HAVE_LANGINFO_CODESET=1
fi
AC_CACHE_CHECK([whether langinfo.h defines T_FMT_AMPM],
[gl_cv_header_langinfo_t_fmt_ampm],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <langinfo.h>
int a = T_FMT_AMPM;
]])],
[gl_cv_header_langinfo_t_fmt_ampm=yes],
[gl_cv_header_langinfo_t_fmt_ampm=no])
])
if test $gl_cv_header_langinfo_t_fmt_ampm = yes; then
HAVE_LANGINFO_T_FMT_AMPM=1
fi
AC_CACHE_CHECK([whether langinfo.h defines ERA],
[gl_cv_header_langinfo_era],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <langinfo.h>
int a = ERA;
]])],
[gl_cv_header_langinfo_era=yes],
[gl_cv_header_langinfo_era=no])
])
if test $gl_cv_header_langinfo_era = yes; then
HAVE_LANGINFO_ERA=1
fi
AC_CACHE_CHECK([whether langinfo.h defines YESEXPR],
[gl_cv_header_langinfo_yesexpr],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <langinfo.h>
int a = YESEXPR;
]])],
[gl_cv_header_langinfo_yesexpr=yes],
[gl_cv_header_langinfo_yesexpr=no])
])
if test $gl_cv_header_langinfo_yesexpr = yes; then
HAVE_LANGINFO_YESEXPR=1
fi
else
HAVE_LANGINFO_H=0
fi
AC_SUBST([HAVE_LANGINFO_H])
AC_SUBST([HAVE_LANGINFO_CODESET])
AC_SUBST([HAVE_LANGINFO_T_FMT_AMPM])
AC_SUBST([HAVE_LANGINFO_ERA])
AC_SUBST([HAVE_LANGINFO_YESEXPR])
dnl Check for declarations of anything we want to poison if the
dnl corresponding gnulib module is not in use.
gl_WARN_ON_USE_PREPARE([[#include <langinfo.h>
]], [nl_langinfo])
])
AC_DEFUN([gl_LANGINFO_MODULE_INDICATOR],
[
dnl Use AC_REQUIRE here, so that the default settings are expanded once only.
AC_REQUIRE([gl_LANGINFO_H_DEFAULTS])
gl_MODULE_INDICATOR_SET_VARIABLE([$1])
dnl Define it also as a C macro, for the benefit of the unit tests.
gl_MODULE_INDICATOR_FOR_TESTS([$1])
])
AC_DEFUN([gl_LANGINFO_H_DEFAULTS],
[
GNULIB_NL_LANGINFO=0; AC_SUBST([GNULIB_NL_LANGINFO])
dnl Assume proper GNU behavior unless another module says otherwise.
HAVE_NL_LANGINFO=1; AC_SUBST([HAVE_NL_LANGINFO])
REPLACE_NL_LANGINFO=0; AC_SUBST([REPLACE_NL_LANGINFO])
])

246
m4/locale-fr.m4 Normal file
View file

@ -0,0 +1,246 @@
# locale-fr.m4 serial 14
dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl From Bruno Haible.
dnl Determine the name of a french locale with traditional encoding.
AC_DEFUN([gt_LOCALE_FR],
[
AC_REQUIRE([AC_CANONICAL_HOST])
AC_REQUIRE([AM_LANGINFO_CODESET])
AC_CACHE_CHECK([for a traditional french locale], [gt_cv_locale_fr], [
AC_LANG_CONFTEST([AC_LANG_SOURCE([
changequote(,)dnl
#include <locale.h>
#include <time.h>
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
#endif
#include <stdlib.h>
#include <string.h>
struct tm t;
char buf[16];
int main () {
/* Check whether the given locale name is recognized by the system. */
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
/* On native Windows, setlocale(category, "") looks at the system settings,
not at the environment variables. Also, when an encoding suffix such
as ".65001" or ".54936" is speficied, it succeeds but sets the LC_CTYPE
category of the locale to "C". */
if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL
|| strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
return 1;
#else
if (setlocale (LC_ALL, "") == NULL) return 1;
#endif
/* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646".
On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET)
is empty, and the behaviour of Tcl 8.4 in this locale is not useful.
On OpenBSD 4.0, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "646". In this situation,
some unit tests fail.
On MirBSD 10, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "UTF-8". */
#if HAVE_LANGINFO_CODESET
{
const char *cs = nl_langinfo (CODESET);
if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0
|| strcmp (cs, "UTF-8") == 0)
return 1;
}
#endif
#ifdef __CYGWIN__
/* On Cygwin, avoid locale names without encoding suffix, because the
locale_charset() function relies on the encoding suffix. Note that
LC_ALL is set on the command line. */
if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1;
#endif
/* Check whether in the abbreviation of the second month, the second
character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is only
one byte long. This excludes the UTF-8 encoding. */
t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4;
if (strftime (buf, sizeof (buf), "%b", &t) < 3 || buf[2] != 'v') return 1;
/* Check whether the decimal separator is a comma.
On NetBSD 3.0 in the fr_FR.ISO8859-1 locale, localeconv()->decimal_point
are nl_langinfo(RADIXCHAR) are both ".". */
if (localeconv () ->decimal_point[0] != ',') return 1;
return 0;
}
changequote([,])dnl
])])
if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
case "$host_os" in
# Handle native Windows specially, because there setlocale() interprets
# "ar" as "Arabic" or "Arabic_Saudi Arabia.1256",
# "fr" or "fra" as "French" or "French_France.1252",
# "ge"(!) or "deu"(!) as "German" or "German_Germany.1252",
# "ja" as "Japanese" or "Japanese_Japan.932",
# and similar.
mingw*)
# Test for the native Windows locale name.
if (LC_ALL=French_France.1252 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr=French_France.1252
else
# None found.
gt_cv_locale_fr=none
fi
;;
*)
# Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
# otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
# configure script would override the LC_ALL setting. Likewise for
# LC_CTYPE, which is also set at the beginning of the configure script.
# Test for the usual locale name.
if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr=fr_FR
else
# Test for the locale name with explicit encoding suffix.
if (LC_ALL=fr_FR.ISO-8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr=fr_FR.ISO-8859-1
else
# Test for the AIX, OSF/1, FreeBSD, NetBSD, OpenBSD locale name.
if (LC_ALL=fr_FR.ISO8859-1 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr=fr_FR.ISO8859-1
else
# Test for the HP-UX locale name.
if (LC_ALL=fr_FR.iso88591 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr=fr_FR.iso88591
else
# Test for the Solaris 7 locale name.
if (LC_ALL=fr LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr=fr
else
# None found.
gt_cv_locale_fr=none
fi
fi
fi
fi
fi
;;
esac
fi
rm -fr conftest*
])
LOCALE_FR=$gt_cv_locale_fr
AC_SUBST([LOCALE_FR])
])
dnl Determine the name of a french locale with UTF-8 encoding.
AC_DEFUN([gt_LOCALE_FR_UTF8],
[
AC_REQUIRE([AM_LANGINFO_CODESET])
AC_CACHE_CHECK([for a french Unicode locale], [gt_cv_locale_fr_utf8], [
AC_LANG_CONFTEST([AC_LANG_SOURCE([
changequote(,)dnl
#include <locale.h>
#include <time.h>
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
#endif
#include <stdlib.h>
#include <string.h>
struct tm t;
char buf[16];
int main () {
/* On BeOS and Haiku, locales are not implemented in libc. Rather, libintl
imitates locale dependent behaviour by looking at the environment
variables, and all locales use the UTF-8 encoding. */
#if !(defined __BEOS__ || defined __HAIKU__)
/* Check whether the given locale name is recognized by the system. */
# if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
/* On native Windows, setlocale(category, "") looks at the system settings,
not at the environment variables. Also, when an encoding suffix such
as ".65001" or ".54936" is speficied, it succeeds but sets the LC_CTYPE
category of the locale to "C". */
if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL
|| strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
return 1;
# else
if (setlocale (LC_ALL, "") == NULL) return 1;
# endif
/* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646".
On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET)
is empty, and the behaviour of Tcl 8.4 in this locale is not useful.
On OpenBSD 4.0, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "646". In this situation,
some unit tests fail. */
# if HAVE_LANGINFO_CODESET
{
const char *cs = nl_langinfo (CODESET);
if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0)
return 1;
}
# endif
# ifdef __CYGWIN__
/* On Cygwin, avoid locale names without encoding suffix, because the
locale_charset() function relies on the encoding suffix. Note that
LC_ALL is set on the command line. */
if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1;
# endif
/* Check whether in the abbreviation of the second month, the second
character (should be U+00E9: LATIN SMALL LETTER E WITH ACUTE) is
two bytes long, with UTF-8 encoding. */
t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4;
if (strftime (buf, sizeof (buf), "%b", &t) < 4
|| buf[1] != (char) 0xc3 || buf[2] != (char) 0xa9 || buf[3] != 'v')
return 1;
#endif
/* Check whether the decimal separator is a comma.
On NetBSD 3.0 in the fr_FR.ISO8859-1 locale, localeconv()->decimal_point
are nl_langinfo(RADIXCHAR) are both ".". */
if (localeconv () ->decimal_point[0] != ',') return 1;
return 0;
}
changequote([,])dnl
])])
if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
case "$host_os" in
# Handle native Windows specially, because there setlocale() interprets
# "ar" as "Arabic" or "Arabic_Saudi Arabia.1256",
# "fr" or "fra" as "French" or "French_France.1252",
# "ge"(!) or "deu"(!) as "German" or "German_Germany.1252",
# "ja" as "Japanese" or "Japanese_Japan.932",
# and similar.
mingw*)
# Test for the hypothetical native Windows locale name.
if (LC_ALL=French_France.65001 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr_utf8=French_France.65001
else
# None found.
gt_cv_locale_fr_utf8=none
fi
;;
*)
# Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
# otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
# configure script would override the LC_ALL setting. Likewise for
# LC_CTYPE, which is also set at the beginning of the configure script.
# Test for the usual locale name.
if (LC_ALL=fr_FR LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr_utf8=fr_FR
else
# Test for the locale name with explicit encoding suffix.
if (LC_ALL=fr_FR.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr_utf8=fr_FR.UTF-8
else
# Test for the Solaris 7 locale name.
if (LC_ALL=fr.UTF-8 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_fr_utf8=fr.UTF-8
else
# None found.
gt_cv_locale_fr_utf8=none
fi
fi
fi
;;
esac
fi
rm -fr conftest*
])
LOCALE_FR_UTF8=$gt_cv_locale_fr_utf8
AC_SUBST([LOCALE_FR_UTF8])
])

136
m4/locale-ja.m4 Normal file
View file

@ -0,0 +1,136 @@
# locale-ja.m4 serial 10
dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl From Bruno Haible.
dnl Determine the name of a japanese locale with EUC-JP encoding.
AC_DEFUN([gt_LOCALE_JA],
[
AC_REQUIRE([AC_CANONICAL_HOST])
AC_REQUIRE([AM_LANGINFO_CODESET])
AC_CACHE_CHECK([for a traditional japanese locale], [gt_cv_locale_ja], [
AC_LANG_CONFTEST([AC_LANG_SOURCE([
changequote(,)dnl
#include <locale.h>
#include <time.h>
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
#endif
#include <stdlib.h>
#include <string.h>
struct tm t;
char buf[16];
int main ()
{
const char *p;
/* Check whether the given locale name is recognized by the system. */
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
/* On native Windows, setlocale(category, "") looks at the system settings,
not at the environment variables. Also, when an encoding suffix such
as ".65001" or ".54936" is speficied, it succeeds but sets the LC_CTYPE
category of the locale to "C". */
if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL
|| strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
return 1;
#else
if (setlocale (LC_ALL, "") == NULL) return 1;
#endif
/* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646".
On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET)
is empty, and the behaviour of Tcl 8.4 in this locale is not useful.
On OpenBSD 4.0, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "646". In this situation,
some unit tests fail.
On MirBSD 10, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "UTF-8". */
#if HAVE_LANGINFO_CODESET
{
const char *cs = nl_langinfo (CODESET);
if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0
|| strcmp (cs, "UTF-8") == 0)
return 1;
}
#endif
#ifdef __CYGWIN__
/* On Cygwin, avoid locale names without encoding suffix, because the
locale_charset() function relies on the encoding suffix. Note that
LC_ALL is set on the command line. */
if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1;
#endif
/* Check whether MB_CUR_MAX is > 1. This excludes the dysfunctional locales
on Cygwin 1.5.x. */
if (MB_CUR_MAX == 1)
return 1;
/* Check whether in a month name, no byte in the range 0x80..0x9F occurs.
This excludes the UTF-8 encoding (except on MirBSD). */
t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4;
if (strftime (buf, sizeof (buf), "%B", &t) < 2) return 1;
for (p = buf; *p != '\0'; p++)
if ((unsigned char) *p >= 0x80 && (unsigned char) *p < 0xa0)
return 1;
return 0;
}
changequote([,])dnl
])])
if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
case "$host_os" in
# Handle native Windows specially, because there setlocale() interprets
# "ar" as "Arabic" or "Arabic_Saudi Arabia.1256",
# "fr" or "fra" as "French" or "French_France.1252",
# "ge"(!) or "deu"(!) as "German" or "German_Germany.1252",
# "ja" as "Japanese" or "Japanese_Japan.932",
# and similar.
mingw*)
# Note that on native Windows, the Japanese locale is
# Japanese_Japan.932, and CP932 is very different from EUC-JP, so we
# cannot use it here.
gt_cv_locale_ja=none
;;
*)
# Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
# otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
# configure script would override the LC_ALL setting. Likewise for
# LC_CTYPE, which is also set at the beginning of the configure script.
# Test for the AIX locale name.
if (LC_ALL=ja_JP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_ja=ja_JP
else
# Test for the locale name with explicit encoding suffix.
if (LC_ALL=ja_JP.EUC-JP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_ja=ja_JP.EUC-JP
else
# Test for the HP-UX, OSF/1, NetBSD locale name.
if (LC_ALL=ja_JP.eucJP LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_ja=ja_JP.eucJP
else
# Test for the IRIX, FreeBSD locale name.
if (LC_ALL=ja_JP.EUC LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_ja=ja_JP.EUC
else
# Test for the Solaris 7 locale name.
if (LC_ALL=ja LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_ja=ja
else
# Special test for NetBSD 1.6.
if test -f /usr/share/locale/ja_JP.eucJP/LC_CTYPE; then
gt_cv_locale_ja=ja_JP.eucJP
else
# None found.
gt_cv_locale_ja=none
fi
fi
fi
fi
fi
fi
;;
esac
fi
rm -fr conftest*
])
LOCALE_JA=$gt_cv_locale_ja
AC_SUBST([LOCALE_JA])
])

130
m4/locale-zh.m4 Normal file
View file

@ -0,0 +1,130 @@
# locale-zh.m4 serial 10
dnl Copyright (C) 2003, 2005-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl From Bruno Haible.
dnl Determine the name of a chinese locale with GB18030 encoding.
AC_DEFUN([gt_LOCALE_ZH_CN],
[
AC_REQUIRE([AC_CANONICAL_HOST])
AC_REQUIRE([AM_LANGINFO_CODESET])
AC_CACHE_CHECK([for a transitional chinese locale], [gt_cv_locale_zh_CN], [
AC_LANG_CONFTEST([AC_LANG_SOURCE([
changequote(,)dnl
#include <locale.h>
#include <stdlib.h>
#include <time.h>
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
#endif
#include <stdlib.h>
#include <string.h>
struct tm t;
char buf[16];
int main ()
{
const char *p;
/* Check whether the given locale name is recognized by the system. */
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
/* On native Windows, setlocale(category, "") looks at the system settings,
not at the environment variables. Also, when an encoding suffix such
as ".65001" or ".54936" is speficied, it succeeds but sets the LC_CTYPE
category of the locale to "C". */
if (setlocale (LC_ALL, getenv ("LC_ALL")) == NULL
|| strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
return 1;
#else
if (setlocale (LC_ALL, "") == NULL) return 1;
#endif
/* Check whether nl_langinfo(CODESET) is nonempty and not "ASCII" or "646".
On MacOS X 10.3.5 (Darwin 7.5) in the fr_FR locale, nl_langinfo(CODESET)
is empty, and the behaviour of Tcl 8.4 in this locale is not useful.
On OpenBSD 4.0, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "646". In this situation,
some unit tests fail.
On MirBSD 10, when an unsupported locale is specified, setlocale()
succeeds but then nl_langinfo(CODESET) is "UTF-8". */
#if HAVE_LANGINFO_CODESET
{
const char *cs = nl_langinfo (CODESET);
if (cs[0] == '\0' || strcmp (cs, "ASCII") == 0 || strcmp (cs, "646") == 0
|| strcmp (cs, "UTF-8") == 0)
return 1;
}
#endif
#ifdef __CYGWIN__
/* On Cygwin, avoid locale names without encoding suffix, because the
locale_charset() function relies on the encoding suffix. Note that
LC_ALL is set on the command line. */
if (strchr (getenv ("LC_ALL"), '.') == NULL) return 1;
#endif
/* Check whether in a month name, no byte in the range 0x80..0x9F occurs.
This excludes the UTF-8 encoding (except on MirBSD). */
t.tm_year = 1975 - 1900; t.tm_mon = 2 - 1; t.tm_mday = 4;
if (strftime (buf, sizeof (buf), "%B", &t) < 2) return 1;
for (p = buf; *p != '\0'; p++)
if ((unsigned char) *p >= 0x80 && (unsigned char) *p < 0xa0)
return 1;
/* Check whether a typical GB18030 multibyte sequence is recognized as a
single wide character. This excludes the GB2312 and GBK encodings. */
if (mblen ("\203\062\332\066", 5) != 4)
return 1;
return 0;
}
changequote([,])dnl
])])
if AC_TRY_EVAL([ac_link]) && test -s conftest$ac_exeext; then
case "$host_os" in
# Handle native Windows specially, because there setlocale() interprets
# "ar" as "Arabic" or "Arabic_Saudi Arabia.1256",
# "fr" or "fra" as "French" or "French_France.1252",
# "ge"(!) or "deu"(!) as "German" or "German_Germany.1252",
# "ja" as "Japanese" or "Japanese_Japan.932",
# and similar.
mingw*)
# Test for the hypothetical native Windows locale name.
if (LC_ALL=Chinese_China.54936 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_zh_CN=Chinese_China.54936
else
# None found.
gt_cv_locale_zh_CN=none
fi
;;
solaris2.8)
# On Solaris 8, the locales zh_CN.GB18030, zh_CN.GBK, zh.GBK are
# broken. One witness is the test case in gl_MBRTOWC_SANITYCHECK.
# Another witness is that "LC_ALL=zh_CN.GB18030 bash -c true" dumps core.
gt_cv_locale_zh_CN=none
;;
*)
# Setting LC_ALL is not enough. Need to set LC_TIME to empty, because
# otherwise on MacOS X 10.3.5 the LC_TIME=C from the beginning of the
# configure script would override the LC_ALL setting. Likewise for
# LC_CTYPE, which is also set at the beginning of the configure script.
# Test for the locale name without encoding suffix.
if (LC_ALL=zh_CN LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_zh_CN=zh_CN
else
# Test for the locale name with explicit encoding suffix.
if (LC_ALL=zh_CN.GB18030 LC_TIME= LC_CTYPE= ./conftest; exit) 2>/dev/null; then
gt_cv_locale_zh_CN=zh_CN.GB18030
else
# None found.
gt_cv_locale_zh_CN=none
fi
fi
;;
esac
else
# If there was a link error, due to mblen(), the system is so old that
# it certainly doesn't have a chinese locale.
gt_cv_locale_zh_CN=none
fi
rm -fr conftest*
])
LOCALE_ZH_CN=$gt_cv_locale_zh_CN
AC_SUBST([LOCALE_ZH_CN])
])

572
m4/mbrtowc.m4 Normal file
View file

@ -0,0 +1,572 @@
# mbrtowc.m4 serial 25
dnl Copyright (C) 2001-2002, 2004-2005, 2008-2012 Free Software Foundation,
dnl Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_MBRTOWC],
[
AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
AC_REQUIRE([AC_TYPE_MBSTATE_T])
gl_MBSTATE_T_BROKEN
AC_CHECK_FUNCS_ONCE([mbrtowc])
if test $ac_cv_func_mbrtowc = no; then
HAVE_MBRTOWC=0
AC_CHECK_DECLS([mbrtowc],,, [[
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
]])
if test $ac_cv_have_decl_mbrtowc = yes; then
dnl On Minix 3.1.8, the system's <wchar.h> declares mbrtowc() although
dnl it does not have the function. Avoid a collision with gnulib's
dnl replacement.
REPLACE_MBRTOWC=1
fi
else
if test $REPLACE_MBSTATE_T = 1; then
REPLACE_MBRTOWC=1
else
gl_MBRTOWC_NULL_ARG1
gl_MBRTOWC_NULL_ARG2
gl_MBRTOWC_RETVAL
gl_MBRTOWC_NUL_RETVAL
case "$gl_cv_func_mbrtowc_null_arg1" in
*yes) ;;
*) AC_DEFINE([MBRTOWC_NULL_ARG1_BUG], [1],
[Define if the mbrtowc function has the NULL pwc argument bug.])
REPLACE_MBRTOWC=1
;;
esac
case "$gl_cv_func_mbrtowc_null_arg2" in
*yes) ;;
*) AC_DEFINE([MBRTOWC_NULL_ARG2_BUG], [1],
[Define if the mbrtowc function has the NULL string argument bug.])
REPLACE_MBRTOWC=1
;;
esac
case "$gl_cv_func_mbrtowc_retval" in
*yes) ;;
*) AC_DEFINE([MBRTOWC_RETVAL_BUG], [1],
[Define if the mbrtowc function returns a wrong return value.])
REPLACE_MBRTOWC=1
;;
esac
case "$gl_cv_func_mbrtowc_nul_retval" in
*yes) ;;
*) AC_DEFINE([MBRTOWC_NUL_RETVAL_BUG], [1],
[Define if the mbrtowc function does not return 0 for a NUL character.])
REPLACE_MBRTOWC=1
;;
esac
fi
fi
])
dnl Test whether mbsinit() and mbrtowc() need to be overridden in a way that
dnl redefines the semantics of the given mbstate_t type.
dnl Result is REPLACE_MBSTATE_T.
dnl When this is set to 1, we replace both mbsinit() and mbrtowc(), in order to
dnl avoid inconsistencies.
AC_DEFUN([gl_MBSTATE_T_BROKEN],
[
AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
AC_REQUIRE([AC_TYPE_MBSTATE_T])
AC_CHECK_FUNCS_ONCE([mbsinit])
AC_CHECK_FUNCS_ONCE([mbrtowc])
if test $ac_cv_func_mbsinit = yes && test $ac_cv_func_mbrtowc = yes; then
gl_MBRTOWC_INCOMPLETE_STATE
gl_MBRTOWC_SANITYCHECK
REPLACE_MBSTATE_T=0
case "$gl_cv_func_mbrtowc_incomplete_state" in
*yes) ;;
*) REPLACE_MBSTATE_T=1 ;;
esac
case "$gl_cv_func_mbrtowc_sanitycheck" in
*yes) ;;
*) REPLACE_MBSTATE_T=1 ;;
esac
else
REPLACE_MBSTATE_T=1
fi
])
dnl Test whether mbrtowc puts the state into non-initial state when parsing an
dnl incomplete multibyte character.
dnl Result is gl_cv_func_mbrtowc_incomplete_state.
AC_DEFUN([gl_MBRTOWC_INCOMPLETE_STATE],
[
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_JA])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
AC_CACHE_CHECK([whether mbrtowc handles incomplete characters],
[gl_cv_func_mbrtowc_incomplete_state],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on AIX and OSF/1.
aix* | osf*) gl_cv_func_mbrtowc_incomplete_state="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_mbrtowc_incomplete_state="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_JA != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
{
const char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 1, 1, &state) == (size_t)(-2))
if (mbsinit (&state))
return 1;
}
return 0;
}]])],
[gl_cv_func_mbrtowc_incomplete_state=yes],
[gl_cv_func_mbrtowc_incomplete_state=no],
[:])
fi
])
])
dnl Test whether mbrtowc works not worse than mbtowc.
dnl Result is gl_cv_func_mbrtowc_sanitycheck.
AC_DEFUN([gl_MBRTOWC_SANITYCHECK],
[
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_ZH_CN])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
AC_CACHE_CHECK([whether mbrtowc works as well as mbtowc],
[gl_cv_func_mbrtowc_sanitycheck],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on Solaris 8.
solaris2.8) gl_cv_func_mbrtowc_sanitycheck="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_mbrtowc_sanitycheck="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_ZH_CN != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <stdlib.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
/* This fails on Solaris 8:
mbrtowc returns 2, and sets wc to 0x00F0.
mbtowc returns 4 (correct) and sets wc to 0x5EDC. */
if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
{
char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 3, 6, &state) != 4
&& mbtowc (&wc, input + 3, 6) == 4)
return 1;
}
return 0;
}]])],
[gl_cv_func_mbrtowc_sanitycheck=yes],
[gl_cv_func_mbrtowc_sanitycheck=no],
[:])
fi
])
])
dnl Test whether mbrtowc supports a NULL pwc argument correctly.
dnl Result is gl_cv_func_mbrtowc_null_arg1.
AC_DEFUN([gl_MBRTOWC_NULL_ARG1],
[
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_FR_UTF8])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
AC_CACHE_CHECK([whether mbrtowc handles a NULL pwc argument],
[gl_cv_func_mbrtowc_null_arg1],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on Solaris.
solaris*) gl_cv_func_mbrtowc_null_arg1="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_mbrtowc_null_arg1="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_FR_UTF8 != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <stdlib.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
int result = 0;
if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
{
char input[] = "\303\237er";
mbstate_t state;
wchar_t wc;
size_t ret;
memset (&state, '\0', sizeof (mbstate_t));
wc = (wchar_t) 0xBADFACE;
ret = mbrtowc (&wc, input, 5, &state);
if (ret != 2)
result |= 1;
if (!mbsinit (&state))
result |= 2;
memset (&state, '\0', sizeof (mbstate_t));
ret = mbrtowc (NULL, input, 5, &state);
if (ret != 2) /* Solaris 7 fails here: ret is -1. */
result |= 4;
if (!mbsinit (&state))
result |= 8;
}
return result;
}]])],
[gl_cv_func_mbrtowc_null_arg1=yes],
[gl_cv_func_mbrtowc_null_arg1=no],
[:])
fi
])
])
dnl Test whether mbrtowc supports a NULL string argument correctly.
dnl Result is gl_cv_func_mbrtowc_null_arg2.
AC_DEFUN([gl_MBRTOWC_NULL_ARG2],
[
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_FR_UTF8])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
AC_CACHE_CHECK([whether mbrtowc handles a NULL string argument],
[gl_cv_func_mbrtowc_null_arg2],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on OSF/1.
osf*) gl_cv_func_mbrtowc_null_arg2="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_mbrtowc_null_arg2="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_FR_UTF8 != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
{
mbstate_t state;
wchar_t wc;
int ret;
memset (&state, '\0', sizeof (mbstate_t));
wc = (wchar_t) 0xBADFACE;
mbrtowc (&wc, NULL, 5, &state);
/* Check that wc was not modified. */
if (wc != (wchar_t) 0xBADFACE)
return 1;
}
return 0;
}]])],
[gl_cv_func_mbrtowc_null_arg2=yes],
[gl_cv_func_mbrtowc_null_arg2=no],
[:])
fi
])
])
dnl Test whether mbrtowc, when parsing the end of a multibyte character,
dnl correctly returns the number of bytes that were needed to complete the
dnl character (not the total number of bytes of the multibyte character).
dnl Result is gl_cv_func_mbrtowc_retval.
AC_DEFUN([gl_MBRTOWC_RETVAL],
[
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_FR_UTF8])
AC_REQUIRE([gt_LOCALE_JA])
AC_REQUIRE([AC_CANONICAL_HOST])
AC_CACHE_CHECK([whether mbrtowc has a correct return value],
[gl_cv_func_mbrtowc_retval],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on HP-UX, Solaris, native Windows.
hpux* | solaris* | mingw*) gl_cv_func_mbrtowc_retval="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_mbrtowc_retval="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_FR_UTF8 != none || test $LOCALE_JA != none \
|| { case "$host_os" in mingw*) true;; *) false;; esac; }; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
int result = 0;
int found_some_locale = 0;
/* This fails on Solaris. */
if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
{
char input[] = "B\303\274\303\237er"; /* "Büßer" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 1, 1, &state) == (size_t)(-2))
{
input[1] = '\0';
if (mbrtowc (&wc, input + 2, 5, &state) != 1)
result |= 1;
}
found_some_locale = 1;
}
/* This fails on HP-UX 11.11. */
if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
{
char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 1, 1, &state) == (size_t)(-2))
{
input[1] = '\0';
if (mbrtowc (&wc, input + 2, 5, &state) != 2)
result |= 2;
}
found_some_locale = 1;
}
/* This fails on native Windows. */
if (setlocale (LC_ALL, "Japanese_Japan.932") != NULL)
{
char input[] = "<\223\372\226\173\214\352>"; /* "<日本語>" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 3, 1, &state) == (size_t)(-2))
{
input[3] = '\0';
if (mbrtowc (&wc, input + 4, 4, &state) != 1)
result |= 4;
}
found_some_locale = 1;
}
if (setlocale (LC_ALL, "Chinese_Taiwan.950") != NULL)
{
char input[] = "<\244\351\245\273\273\171>"; /* "<日本語>" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 3, 1, &state) == (size_t)(-2))
{
input[3] = '\0';
if (mbrtowc (&wc, input + 4, 4, &state) != 1)
result |= 8;
}
found_some_locale = 1;
}
if (setlocale (LC_ALL, "Chinese_China.936") != NULL)
{
char input[] = "<\310\325\261\276\325\132>"; /* "<日本語>" */
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, input + 3, 1, &state) == (size_t)(-2))
{
input[3] = '\0';
if (mbrtowc (&wc, input + 4, 4, &state) != 1)
result |= 16;
}
found_some_locale = 1;
}
return (found_some_locale ? result : 77);
}]])],
[gl_cv_func_mbrtowc_retval=yes],
[if test $? != 77; then
gl_cv_func_mbrtowc_retval=no
fi
],
[:])
fi
])
])
dnl Test whether mbrtowc, when parsing a NUL character, correctly returns 0.
dnl Result is gl_cv_func_mbrtowc_nul_retval.
AC_DEFUN([gl_MBRTOWC_NUL_RETVAL],
[
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_ZH_CN])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
AC_CACHE_CHECK([whether mbrtowc returns 0 when parsing a NUL character],
[gl_cv_func_mbrtowc_nul_retval],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on Solaris 8 and 9.
solaris2.[89]) gl_cv_func_mbrtowc_nul_retval="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_mbrtowc_nul_retval="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_ZH_CN != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
/* This fails on Solaris 8 and 9. */
if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
{
mbstate_t state;
wchar_t wc;
memset (&state, '\0', sizeof (mbstate_t));
if (mbrtowc (&wc, "", 1, &state) != 0)
return 1;
}
return 0;
}]])],
[gl_cv_func_mbrtowc_nul_retval=yes],
[gl_cv_func_mbrtowc_nul_retval=no],
[:])
fi
])
])
# Prerequisites of lib/mbrtowc.c.
AC_DEFUN([gl_PREREQ_MBRTOWC], [
:
])
dnl From Paul Eggert
dnl This is an override of an autoconf macro.
AC_DEFUN([AC_FUNC_MBRTOWC],
[
dnl Same as AC_FUNC_MBRTOWC in autoconf-2.60.
AC_CACHE_CHECK([whether mbrtowc and mbstate_t are properly declared],
gl_cv_func_mbrtowc,
[AC_LINK_IFELSE(
[AC_LANG_PROGRAM(
[[/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be
included before <wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h>
must be included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>]],
[[wchar_t wc;
char const s[] = "";
size_t n = 1;
mbstate_t state;
return ! (sizeof state && (mbrtowc) (&wc, s, n, &state));]])],
gl_cv_func_mbrtowc=yes,
gl_cv_func_mbrtowc=no)])
if test $gl_cv_func_mbrtowc = yes; then
AC_DEFINE([HAVE_MBRTOWC], [1],
[Define to 1 if mbrtowc and mbstate_t are properly declared.])
fi
])

51
m4/mbsinit.m4 Normal file
View file

@ -0,0 +1,51 @@
# mbsinit.m4 serial 8
dnl Copyright (C) 2008, 2010-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_MBSINIT],
[
AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
AC_REQUIRE([AC_CANONICAL_HOST])
AC_REQUIRE([AC_TYPE_MBSTATE_T])
gl_MBSTATE_T_BROKEN
AC_CHECK_FUNCS_ONCE([mbsinit])
if test $ac_cv_func_mbsinit = no; then
HAVE_MBSINIT=0
AC_CHECK_DECLS([mbsinit],,, [[
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
]])
if test $ac_cv_have_decl_mbsinit = yes; then
dnl On Minix 3.1.8, the system's <wchar.h> declares mbsinit() although
dnl it does not have the function. Avoid a collision with gnulib's
dnl replacement.
REPLACE_MBSINIT=1
fi
else
if test $REPLACE_MBSTATE_T = 1; then
REPLACE_MBSINIT=1
else
dnl On mingw, mbsinit() always returns 1, which is inappropriate for
dnl states produced by mbrtowc() for an incomplete multibyte character
dnl in multibyte locales.
case "$host_os" in
mingw*) REPLACE_MBSINIT=1 ;;
esac
fi
fi
])
# Prerequisites of lib/mbsinit.c.
AC_DEFUN([gl_PREREQ_MBSINIT], [
:
])

41
m4/mbstate_t.m4 Normal file
View file

@ -0,0 +1,41 @@
# mbstate_t.m4 serial 13
dnl Copyright (C) 2000-2002, 2008-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
# From Paul Eggert.
# BeOS 5 has <wchar.h> but does not define mbstate_t,
# so you can't declare an object of that type.
# Check for this incompatibility with Standard C.
# AC_TYPE_MBSTATE_T
# -----------------
AC_DEFUN([AC_TYPE_MBSTATE_T],
[
AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) dnl for HP-UX 11.11
AC_CACHE_CHECK([for mbstate_t], [ac_cv_type_mbstate_t],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(
[AC_INCLUDES_DEFAULT[
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>]],
[[mbstate_t x; return sizeof x;]])],
[ac_cv_type_mbstate_t=yes],
[ac_cv_type_mbstate_t=no])])
if test $ac_cv_type_mbstate_t = yes; then
AC_DEFINE([HAVE_MBSTATE_T], [1],
[Define to 1 if <wchar.h> declares mbstate_t.])
else
AC_DEFINE([mbstate_t], [int],
[Define to a type if <wchar.h> does not define.])
fi
])

19
m4/mbtowc.m4 Normal file
View file

@ -0,0 +1,19 @@
# mbtowc.m4 serial 2
dnl Copyright (C) 2011-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_MBTOWC],
[
AC_REQUIRE([gl_STDLIB_H_DEFAULTS])
if false; then
REPLACE_MBTOWC=1
fi
])
# Prerequisites of lib/mbtowc.c.
AC_DEFUN([gl_PREREQ_MBTOWC], [
:
])

50
m4/nl_langinfo.m4 Normal file
View file

@ -0,0 +1,50 @@
# nl_langinfo.m4 serial 5
dnl Copyright (C) 2009-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_NL_LANGINFO],
[
AC_REQUIRE([gl_LANGINFO_H_DEFAULTS])
AC_REQUIRE([gl_LANGINFO_H])
AC_CHECK_FUNCS_ONCE([nl_langinfo])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
if test $ac_cv_func_nl_langinfo = yes; then
# On Irix 6.5, YESEXPR is defined, but nl_langinfo(YESEXPR) is broken.
AC_CACHE_CHECK([whether YESEXPR works],
[gl_cv_func_nl_langinfo_yesexpr_works],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM([[#include <langinfo.h>
]], [[return !*nl_langinfo(YESEXPR);
]])],
[gl_cv_func_nl_langinfo_yesexpr_works=yes],
[gl_cv_func_nl_langinfo_yesexpr_works=no],
[
case "$host_os" in
# Guess no on irix systems.
irix*) gl_cv_func_nl_langinfo_yesexpr_works="guessing no";;
# Guess yes elsewhere.
*) gl_cv_func_nl_langinfo_yesexpr_works="guessing yes";;
esac
])
])
case $gl_cv_func_nl_langinfo_yesexpr_works in
*yes) FUNC_NL_LANGINFO_YESEXPR_WORKS=1 ;;
*) FUNC_NL_LANGINFO_YESEXPR_WORKS=0 ;;
esac
AC_DEFINE_UNQUOTED([FUNC_NL_LANGINFO_YESEXPR_WORKS],
[$FUNC_NL_LANGINFO_YESEXPR_WORKS],
[Define to 1 if nl_langinfo (YESEXPR) returns a non-empty string.])
if test $HAVE_LANGINFO_CODESET = 1 && test $HAVE_LANGINFO_ERA = 1 \
&& test $FUNC_NL_LANGINFO_YESEXPR_WORKS = 1; then
:
else
REPLACE_NL_LANGINFO=1
AC_DEFINE([REPLACE_NL_LANGINFO], [1],
[Define if nl_langinfo exists but is overridden by gnulib.])
fi
else
HAVE_NL_LANGINFO=0
fi
])

223
m4/regex.m4 Normal file
View file

@ -0,0 +1,223 @@
# serial 60
# Copyright (C) 1996-2001, 2003-2012 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
dnl Initially derived from code in GNU grep.
dnl Mostly written by Jim Meyering.
AC_PREREQ([2.50])
AC_DEFUN([gl_REGEX],
[
AC_ARG_WITH([included-regex],
[AS_HELP_STRING([--without-included-regex],
[don't compile regex; this is the default on systems
with recent-enough versions of the GNU C Library
(use with caution on other systems).])])
case $with_included_regex in #(
yes|no) ac_use_included_regex=$with_included_regex
;;
'')
# If the system regex support is good enough that it passes the
# following run test, then default to *not* using the included regex.c.
# If cross compiling, assume the test would fail and use the included
# regex.c.
AC_CACHE_CHECK([for working re_compile_pattern],
[gl_cv_func_re_compile_pattern_working],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM(
[AC_INCLUDES_DEFAULT[
#include <locale.h>
#include <limits.h>
#include <regex.h>
]],
[[int result = 0;
static struct re_pattern_buffer regex;
unsigned char folded_chars[UCHAR_MAX + 1];
int i;
const char *s;
struct re_registers regs;
/* http://sourceware.org/ml/libc-hacker/2006-09/msg00008.html
This test needs valgrind to catch the bug on Debian
GNU/Linux 3.1 x86, but it might catch the bug better
on other platforms and it shouldn't hurt to try the
test here. */
if (setlocale (LC_ALL, "en_US.UTF-8"))
{
static char const pat[] = "insert into";
static char const data[] =
"\xFF\0\x12\xA2\xAA\xC4\xB1,K\x12\xC4\xB1*\xACK";
re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE
| RE_ICASE);
memset (&regex, 0, sizeof regex);
s = re_compile_pattern (pat, sizeof pat - 1, &regex);
if (s)
result |= 1;
else if (re_search (&regex, data, sizeof data - 1,
0, sizeof data - 1, &regs)
!= -1)
result |= 1;
if (! setlocale (LC_ALL, "C"))
return 1;
}
/* This test is from glibc bug 3957, reported by Andrew Mackey. */
re_set_syntax (RE_SYNTAX_EGREP | RE_HAT_LISTS_NOT_NEWLINE);
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("a[^x]b", 6, &regex);
if (s)
result |= 2;
/* This should fail, but succeeds for glibc-2.5. */
else if (re_search (&regex, "a\nb", 3, 0, 3, &regs) != -1)
result |= 2;
/* This regular expression is from Spencer ere test number 75
in grep-2.3. */
re_set_syntax (RE_SYNTAX_POSIX_EGREP);
memset (&regex, 0, sizeof regex);
for (i = 0; i <= UCHAR_MAX; i++)
folded_chars[i] = i;
regex.translate = folded_chars;
s = re_compile_pattern ("a[[:@:>@:]]b\n", 11, &regex);
/* This should fail with _Invalid character class name_ error. */
if (!s)
result |= 4;
/* Ensure that [b-a] is diagnosed as invalid, when
using RE_NO_EMPTY_RANGES. */
re_set_syntax (RE_SYNTAX_POSIX_EGREP | RE_NO_EMPTY_RANGES);
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("a[b-a]", 6, &regex);
if (s == 0)
result |= 8;
/* This should succeed, but does not for glibc-2.1.3. */
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("{1", 2, &regex);
if (s)
result |= 8;
/* The following example is derived from a problem report
against gawk from Jorge Stolfi <stolfi@ic.unicamp.br>. */
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("[an\371]*n", 7, &regex);
if (s)
result |= 8;
/* This should match, but does not for glibc-2.2.1. */
else if (re_match (&regex, "an", 2, 0, &regs) != 2)
result |= 8;
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("x", 1, &regex);
if (s)
result |= 8;
/* glibc-2.2.93 does not work with a negative RANGE argument. */
else if (re_search (&regex, "wxy", 3, 2, -2, &regs) != 1)
result |= 8;
/* The version of regex.c in older versions of gnulib
ignored RE_ICASE. Detect that problem too. */
re_set_syntax (RE_SYNTAX_EMACS | RE_ICASE);
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("x", 1, &regex);
if (s)
result |= 16;
else if (re_search (&regex, "WXY", 3, 0, 3, &regs) < 0)
result |= 16;
/* Catch a bug reported by Vin Shelton in
http://lists.gnu.org/archive/html/bug-coreutils/2007-06/msg00089.html
*/
re_set_syntax (RE_SYNTAX_POSIX_BASIC
& ~RE_CONTEXT_INVALID_DUP
& ~RE_NO_EMPTY_RANGES);
memset (&regex, 0, sizeof regex);
s = re_compile_pattern ("[[:alnum:]_-]\\\\+$", 16, &regex);
if (s)
result |= 32;
/* REG_STARTEND was added to glibc on 2004-01-15.
Reject older versions. */
if (! REG_STARTEND)
result |= 64;
#if 0
/* It would be nice to reject hosts whose regoff_t values are too
narrow (including glibc on hosts with 64-bit ptrdiff_t and
32-bit int), but we should wait until glibc implements this
feature. Otherwise, support for equivalence classes and
multibyte collation symbols would always be broken except
when compiling --without-included-regex. */
if (sizeof (regoff_t) < sizeof (ptrdiff_t)
|| sizeof (regoff_t) < sizeof (ssize_t))
result |= 64;
#endif
return result;
]])],
[gl_cv_func_re_compile_pattern_working=yes],
[gl_cv_func_re_compile_pattern_working=no],
dnl When crosscompiling, assume it is not working.
[gl_cv_func_re_compile_pattern_working=no])])
case $gl_cv_func_re_compile_pattern_working in #(
yes) ac_use_included_regex=no;; #(
no) ac_use_included_regex=yes;;
esac
;;
*) AC_MSG_ERROR([Invalid value for --with-included-regex: $with_included_regex])
;;
esac
if test $ac_use_included_regex = yes; then
AC_DEFINE([_REGEX_LARGE_OFFSETS], [1],
[Define if you want regoff_t to be at least as wide POSIX requires.])
AC_DEFINE([re_syntax_options], [rpl_re_syntax_options],
[Define to rpl_re_syntax_options if the replacement should be used.])
AC_DEFINE([re_set_syntax], [rpl_re_set_syntax],
[Define to rpl_re_set_syntax if the replacement should be used.])
AC_DEFINE([re_compile_pattern], [rpl_re_compile_pattern],
[Define to rpl_re_compile_pattern if the replacement should be used.])
AC_DEFINE([re_compile_fastmap], [rpl_re_compile_fastmap],
[Define to rpl_re_compile_fastmap if the replacement should be used.])
AC_DEFINE([re_search], [rpl_re_search],
[Define to rpl_re_search if the replacement should be used.])
AC_DEFINE([re_search_2], [rpl_re_search_2],
[Define to rpl_re_search_2 if the replacement should be used.])
AC_DEFINE([re_match], [rpl_re_match],
[Define to rpl_re_match if the replacement should be used.])
AC_DEFINE([re_match_2], [rpl_re_match_2],
[Define to rpl_re_match_2 if the replacement should be used.])
AC_DEFINE([re_set_registers], [rpl_re_set_registers],
[Define to rpl_re_set_registers if the replacement should be used.])
AC_DEFINE([re_comp], [rpl_re_comp],
[Define to rpl_re_comp if the replacement should be used.])
AC_DEFINE([re_exec], [rpl_re_exec],
[Define to rpl_re_exec if the replacement should be used.])
AC_DEFINE([regcomp], [rpl_regcomp],
[Define to rpl_regcomp if the replacement should be used.])
AC_DEFINE([regexec], [rpl_regexec],
[Define to rpl_regexec if the replacement should be used.])
AC_DEFINE([regerror], [rpl_regerror],
[Define to rpl_regerror if the replacement should be used.])
AC_DEFINE([regfree], [rpl_regfree],
[Define to rpl_regfree if the replacement should be used.])
fi
])
# Prerequisites of lib/regex.c and lib/regex_internal.c.
AC_DEFUN([gl_PREREQ_REGEX],
[
AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
AC_REQUIRE([AC_C_INLINE])
AC_REQUIRE([AC_C_RESTRICT])
AC_REQUIRE([AC_TYPE_MBSTATE_T])
AC_CHECK_HEADERS([libintl.h])
AC_CHECK_FUNCS_ONCE([isblank iswctype wcscoll])
AC_CHECK_DECLS([isblank], [], [], [[#include <ctype.h>]])
])

45
m4/strcase.m4 Normal file
View file

@ -0,0 +1,45 @@
# strcase.m4 serial 11
dnl Copyright (C) 2002, 2005-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_STRCASE],
[
gl_FUNC_STRCASECMP
gl_FUNC_STRNCASECMP
])
AC_DEFUN([gl_FUNC_STRCASECMP],
[
AC_REQUIRE([gl_HEADER_STRINGS_H_DEFAULTS])
AC_CHECK_FUNCS([strcasecmp])
if test $ac_cv_func_strcasecmp = no; then
HAVE_STRCASECMP=0
fi
])
AC_DEFUN([gl_FUNC_STRNCASECMP],
[
AC_REQUIRE([gl_HEADER_STRINGS_H_DEFAULTS])
AC_CHECK_FUNCS([strncasecmp])
if test $ac_cv_func_strncasecmp = yes; then
HAVE_STRNCASECMP=1
else
HAVE_STRNCASECMP=0
fi
AC_CHECK_DECLS([strncasecmp])
if test $ac_cv_have_decl_strncasecmp = no; then
HAVE_DECL_STRNCASECMP=0
fi
])
# Prerequisites of lib/strcasecmp.c.
AC_DEFUN([gl_PREREQ_STRCASECMP], [
:
])
# Prerequisites of lib/strncasecmp.c.
AC_DEFUN([gl_PREREQ_STRNCASECMP], [
:
])

52
m4/strings_h.m4 Normal file
View file

@ -0,0 +1,52 @@
# Configure a replacement for <strings.h>.
# serial 6
# Copyright (C) 2007, 2009-2012 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_HEADER_STRINGS_H],
[
dnl Use AC_REQUIRE here, so that the default behavior below is expanded
dnl once only, before all statements that occur in other macros.
AC_REQUIRE([gl_HEADER_STRINGS_H_BODY])
])
AC_DEFUN([gl_HEADER_STRINGS_H_BODY],
[
AC_REQUIRE([gl_HEADER_STRINGS_H_DEFAULTS])
gl_CHECK_NEXT_HEADERS([strings.h])
if test $ac_cv_header_strings_h = yes; then
HAVE_STRINGS_H=1
else
HAVE_STRINGS_H=0
fi
AC_SUBST([HAVE_STRINGS_H])
dnl Check for declarations of anything we want to poison if the
dnl corresponding gnulib module is not in use.
gl_WARN_ON_USE_PREPARE([[
/* Minix 3.1.8 has a bug: <sys/types.h> must be included before
<strings.h>. */
#include <sys/types.h>
#include <strings.h>
]], [ffs strcasecmp strncasecmp])
])
AC_DEFUN([gl_STRINGS_MODULE_INDICATOR],
[
dnl Use AC_REQUIRE here, so that the default settings are expanded once only.
AC_REQUIRE([gl_HEADER_STRINGS_H_DEFAULTS])
gl_MODULE_INDICATOR_SET_VARIABLE([$1])
])
AC_DEFUN([gl_HEADER_STRINGS_H_DEFAULTS],
[
GNULIB_FFS=0; AC_SUBST([GNULIB_FFS])
dnl Assume proper GNU behavior unless another module says otherwise.
HAVE_FFS=1; AC_SUBST([HAVE_FFS])
HAVE_STRCASECMP=1; AC_SUBST([HAVE_STRCASECMP])
HAVE_DECL_STRNCASECMP=1; AC_SUBST([HAVE_DECL_STRNCASECMP])
])

112
m4/wcrtomb.m4 Normal file
View file

@ -0,0 +1,112 @@
# wcrtomb.m4 serial 11
dnl Copyright (C) 2008-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
AC_DEFUN([gl_FUNC_WCRTOMB],
[
AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
AC_REQUIRE([AC_TYPE_MBSTATE_T])
gl_MBSTATE_T_BROKEN
AC_CHECK_FUNCS_ONCE([wcrtomb])
if test $ac_cv_func_wcrtomb = no; then
HAVE_WCRTOMB=0
AC_CHECK_DECLS([wcrtomb],,, [[
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
]])
if test $ac_cv_have_decl_wcrtomb = yes; then
dnl On Minix 3.1.8, the system's <wchar.h> declares wcrtomb() although
dnl it does not have the function. Avoid a collision with gnulib's
dnl replacement.
REPLACE_WCRTOMB=1
fi
else
if test $REPLACE_MBSTATE_T = 1; then
REPLACE_WCRTOMB=1
else
dnl On AIX 4.3, OSF/1 5.1 and Solaris 10, wcrtomb (NULL, 0, NULL) sometimes
dnl returns 0 instead of 1.
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([gt_LOCALE_FR])
AC_REQUIRE([gt_LOCALE_FR_UTF8])
AC_REQUIRE([gt_LOCALE_JA])
AC_REQUIRE([gt_LOCALE_ZH_CN])
AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
AC_CACHE_CHECK([whether wcrtomb return value is correct],
[gl_cv_func_wcrtomb_retval],
[
dnl Initial guess, used when cross-compiling or when no suitable locale
dnl is present.
changequote(,)dnl
case "$host_os" in
# Guess no on AIX 4, OSF/1 and Solaris.
aix4* | osf* | solaris*) gl_cv_func_wcrtomb_retval="guessing no" ;;
# Guess yes otherwise.
*) gl_cv_func_wcrtomb_retval="guessing yes" ;;
esac
changequote([,])dnl
if test $LOCALE_FR != none || test $LOCALE_FR_UTF8 != none || test $LOCALE_JA != none || test $LOCALE_ZH_CN != none; then
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
#include <locale.h>
#include <string.h>
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
int main ()
{
int result = 0;
if (setlocale (LC_ALL, "$LOCALE_FR") != NULL)
{
if (wcrtomb (NULL, 0, NULL) != 1)
result |= 1;
}
if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
{
if (wcrtomb (NULL, 0, NULL) != 1)
result |= 2;
}
if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
{
if (wcrtomb (NULL, 0, NULL) != 1)
result |= 4;
}
if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
{
if (wcrtomb (NULL, 0, NULL) != 1)
result |= 8;
}
return result;
}]])],
[gl_cv_func_wcrtomb_retval=yes],
[gl_cv_func_wcrtomb_retval=no],
[:])
fi
])
case "$gl_cv_func_wcrtomb_retval" in
*yes) ;;
*) REPLACE_WCRTOMB=1 ;;
esac
fi
fi
])
# Prerequisites of lib/wcrtomb.c.
AC_DEFUN([gl_PREREQ_WCRTOMB], [
:
])

211
m4/wctype_h.m4 Normal file
View file

@ -0,0 +1,211 @@
# wctype_h.m4 serial 16
dnl A placeholder for ISO C99 <wctype.h>, for platforms that lack it.
dnl Copyright (C) 2006-2012 Free Software Foundation, Inc.
dnl This file is free software; the Free Software Foundation
dnl gives unlimited permission to copy and/or distribute it,
dnl with or without modifications, as long as this notice is preserved.
dnl Written by Paul Eggert.
AC_DEFUN([gl_WCTYPE_H],
[
AC_REQUIRE([gl_WCTYPE_H_DEFAULTS])
AC_REQUIRE([AC_PROG_CC])
AC_REQUIRE([AC_CANONICAL_HOST])
AC_CHECK_FUNCS_ONCE([iswcntrl])
if test $ac_cv_func_iswcntrl = yes; then
HAVE_ISWCNTRL=1
else
HAVE_ISWCNTRL=0
fi
AC_SUBST([HAVE_ISWCNTRL])
AC_REQUIRE([AC_C_INLINE])
AC_REQUIRE([gt_TYPE_WINT_T])
if test $gt_cv_c_wint_t = yes; then
HAVE_WINT_T=1
else
HAVE_WINT_T=0
fi
AC_SUBST([HAVE_WINT_T])
gl_CHECK_NEXT_HEADERS([wctype.h])
if test $ac_cv_header_wctype_h = yes; then
if test $ac_cv_func_iswcntrl = yes; then
dnl Linux libc5 has an iswprint function that returns 0 for all arguments.
dnl The other functions are likely broken in the same way.
AC_CACHE_CHECK([whether iswcntrl works], [gl_cv_func_iswcntrl_works],
[
AC_RUN_IFELSE(
[AC_LANG_SOURCE([[
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be
included before <wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h>
must be included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
#include <wctype.h>
int main () { return iswprint ('x') == 0; }
]])],
[gl_cv_func_iswcntrl_works=yes], [gl_cv_func_iswcntrl_works=no],
[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <stdlib.h>
#if __GNU_LIBRARY__ == 1
Linux libc5 i18n is broken.
#endif]], [])],
[gl_cv_func_iswcntrl_works=yes], [gl_cv_func_iswcntrl_works=no])
])
])
fi
HAVE_WCTYPE_H=1
else
HAVE_WCTYPE_H=0
fi
AC_SUBST([HAVE_WCTYPE_H])
if test "$gl_cv_func_iswcntrl_works" = no; then
REPLACE_ISWCNTRL=1
else
REPLACE_ISWCNTRL=0
fi
AC_SUBST([REPLACE_ISWCNTRL])
if test $HAVE_ISWCNTRL = 0 || test $REPLACE_ISWCNTRL = 1; then
dnl Redefine all of iswcntrl, ..., iswxdigit in <wctype.h>.
:
fi
if test $REPLACE_ISWCNTRL = 1; then
REPLACE_TOWLOWER=1
else
AC_CHECK_FUNCS([towlower])
if test $ac_cv_func_towlower = yes; then
REPLACE_TOWLOWER=0
else
AC_CHECK_DECLS([towlower],,,
[[/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be
included before <wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h>
must be included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
#if HAVE_WCTYPE_H
# include <wctype.h>
#endif
]])
if test $ac_cv_have_decl_towlower = yes; then
dnl On Minix 3.1.8, the system's <wctype.h> declares towlower() and
dnl towupper() although it does not have the functions. Avoid a
dnl collision with gnulib's replacement.
REPLACE_TOWLOWER=1
else
REPLACE_TOWLOWER=0
fi
fi
fi
AC_SUBST([REPLACE_TOWLOWER])
if test $HAVE_ISWCNTRL = 0 || test $REPLACE_TOWLOWER = 1; then
dnl Redefine towlower, towupper in <wctype.h>.
:
fi
dnl We assume that the wctype() and iswctype() functions exist if and only
dnl if the type wctype_t is defined in <wchar.h> or in <wctype.h> if that
dnl exists.
dnl HP-UX 11.00 declares all these in <wchar.h> and lacks <wctype.h>.
AC_CACHE_CHECK([for wctype_t], [gl_cv_type_wctype_t],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(
[[/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be
included before <wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h>
must be included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
#if HAVE_WCTYPE_H
# include <wctype.h>
#endif
wctype_t a;
]],
[[]])],
[gl_cv_type_wctype_t=yes],
[gl_cv_type_wctype_t=no])
])
if test $gl_cv_type_wctype_t = no; then
HAVE_WCTYPE_T=0
fi
dnl We assume that the wctrans() and towctrans() functions exist if and only
dnl if the type wctrans_t is defined in <wctype.h>.
AC_CACHE_CHECK([for wctrans_t], [gl_cv_type_wctrans_t],
[AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM(
[[/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be
included before <wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h>
must be included before <wchar.h>. */
#include <stddef.h>
#include <stdio.h>
#include <time.h>
#include <wchar.h>
#include <wctype.h>
wctrans_t a;
]],
[[]])],
[gl_cv_type_wctrans_t=yes],
[gl_cv_type_wctrans_t=no])
])
if test $gl_cv_type_wctrans_t = no; then
HAVE_WCTRANS_T=0
fi
dnl Check for declarations of anything we want to poison if the
dnl corresponding gnulib module is not in use.
gl_WARN_ON_USE_PREPARE([[
/* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
<wchar.h>.
BSD/OS 4.0.1 has a bug: <stddef.h>, <stdio.h> and <time.h> must be
included before <wchar.h>. */
#if !(defined __GLIBC__ && !defined __UCLIBC__)
# include <stddef.h>
# include <stdio.h>
# include <time.h>
# include <wchar.h>
#endif
#include <wctype.h>
]],
[wctype iswctype wctrans towctrans
])
])
AC_DEFUN([gl_WCTYPE_MODULE_INDICATOR],
[
dnl Use AC_REQUIRE here, so that the default settings are expanded once only.
AC_REQUIRE([gl_WCTYPE_H_DEFAULTS])
gl_MODULE_INDICATOR_SET_VARIABLE([$1])
dnl Define it also as a C macro, for the benefit of the unit tests.
gl_MODULE_INDICATOR_FOR_TESTS([$1])
])
AC_DEFUN([gl_WCTYPE_H_DEFAULTS],
[
GNULIB_ISWBLANK=0; AC_SUBST([GNULIB_ISWBLANK])
GNULIB_WCTYPE=0; AC_SUBST([GNULIB_WCTYPE])
GNULIB_ISWCTYPE=0; AC_SUBST([GNULIB_ISWCTYPE])
GNULIB_WCTRANS=0; AC_SUBST([GNULIB_WCTRANS])
GNULIB_TOWCTRANS=0; AC_SUBST([GNULIB_TOWCTRANS])
dnl Assume proper GNU behavior unless another module says otherwise.
HAVE_ISWBLANK=1; AC_SUBST([HAVE_ISWBLANK])
HAVE_WCTYPE_T=1; AC_SUBST([HAVE_WCTYPE_T])
HAVE_WCTRANS_T=1; AC_SUBST([HAVE_WCTRANS_T])
REPLACE_ISWBLANK=0; AC_SUBST([REPLACE_ISWBLANK])
])