1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-01 15:20:34 +02:00

Convert regexps to use statically-allocated tc16

* libguile/scm.h: Allocate tc16.
* libguile/regex-posix.h:
* libguile/regex-posix.c:
* libguile/finalizers.h:
* libguile/finalizers.c: Adapt.
This commit is contained in:
Andy Wingo 2025-06-17 14:35:51 +02:00
parent 197345fa02
commit ace42e191e
5 changed files with 68 additions and 38 deletions

View file

@ -43,6 +43,9 @@
#include "init.h"
#include "numbers.h"
#include "ports.h"
#ifdef ENABLE_REGEX
#include "regex-posix.h"
#endif
#include "smob.h"
#include "struct.h"
#include "symbols.h"
@ -78,6 +81,7 @@ enum builtin_finalizer_kind
FINALIZE_KIND_SMOB,
FINALIZE_KIND_PORT,
FINALIZE_KIND_DIRECTORY,
FINALIZE_KIND_REGEXP,
};
static SCM
@ -133,6 +137,12 @@ scm_i_add_directory_finalizer (struct scm_thread *thread, SCM obj)
return add_builtin_finalizer (thread, obj, FINALIZE_KIND_DIRECTORY);
}
SCM
scm_i_add_regexp_finalizer (struct scm_thread *thread, SCM obj)
{
return add_builtin_finalizer (thread, obj, FINALIZE_KIND_REGEXP);
}
SCM
scm_i_add_pointer_finalizer (struct scm_thread *thread, SCM obj, SCM free)
{
@ -184,6 +194,13 @@ run_finalizer (struct scm_thread *thread, SCM obj, SCM closure)
case FINALIZE_KIND_DIRECTORY:
scm_i_finalize_directory (thread, obj);
break;
case FINALIZE_KIND_REGEXP:
#ifdef ENABLE_REGEX
scm_i_finalize_regexp (thread, obj);
#else
abort ();
#endif
break;
default:
abort ();
}

View file

@ -34,6 +34,8 @@ SCM_INTERNAL SCM scm_i_add_port_finalizer (struct scm_thread *thread,
SCM obj);
SCM_INTERNAL SCM scm_i_add_directory_finalizer (struct scm_thread *thread,
SCM obj);
SCM_INTERNAL SCM scm_i_add_regexp_finalizer (struct scm_thread *thread,
SCM obj);
SCM_INTERNAL SCM scm_i_add_pointer_finalizer (struct scm_thread *thread,
SCM obj, SCM free);
SCM_INTERNAL SCM scm_i_add_finalizer (struct scm_thread *thread, SCM obj,

View file

@ -40,13 +40,13 @@
#include "async.h"
#include "extensions.h"
#include "feature.h"
#include "finalizers.h"
#include "gsubr.h"
#include "list.h"
#include "modules.h"
#include "numbers.h"
#include "pairs.h"
#include "ports.h"
#include "smob.h"
#include "strings.h"
#include "strports.h"
#include "symbols.h"
@ -60,24 +60,41 @@
#define REG_BASIC 0
#endif
scm_t_bits scm_tc16_regex;
struct scm_regexp
{
scm_t_bits tag;
regex_t regex;
};
static inline int
scm_is_regexp (SCM x)
{
return SCM_HAS_TYP16 (x, scm_tc16_regex);
return SCM_HAS_TYP16 (x, scm_tc16_regexp);
}
static inline struct scm_regexp*
scm_to_regexp (SCM x)
{
if (!scm_is_regexp (x))
abort ();
return (struct scm_regexp *) SCM_UNPACK_POINTER (x);
}
static inline SCM
scm_from_regexp (struct scm_regexp *x)
{
return SCM_PACK_POINTER (x);
}
#define SCM_REGEXP_P(x) (scm_is_regexp (x))
#define SCM_RGX(X) ((regex_t *) SCM_SMOB_DATA (X))
#define SCM_VALIDATE_RGXP(pos, a) \
#define SCM_VALIDATE_REGEXP(pos, a) \
SCM_MAKE_VALIDATE_MSG (pos, a, REGEXP_P, "regexp")
static size_t
regex_free (SCM obj)
void
scm_i_finalize_regexp (struct scm_thread *thread, SCM obj)
{
regfree (SCM_RGX (obj));
return 0;
struct scm_regexp *rx = scm_to_regexp (obj);
regfree (&rx->regex);
}
@ -87,18 +104,10 @@ SCM_SYMBOL (scm_regexp_error_key, "regular-expression-syntax");
static SCM
scm_regexp_error_msg (int regerrno, regex_t *rx)
{
char *errmsg;
int l;
errmsg = scm_malloc (80);
l = regerror (regerrno, rx, errmsg, 80);
if (l > 80)
{
free (errmsg);
errmsg = scm_malloc (l);
regerror (regerrno, rx, errmsg, l);
}
return scm_take_locale_string (errmsg);
size_t len = regerror (regerrno, rx, NULL, 0);
char *errmsg = scm_malloc (len);
regerror (regerrno, rx, errmsg, len);
return scm_take_locale_stringn (errmsg, len - 1);
}
SCM_DEFINE_STATIC (regexp_p, "regexp?", 1, 0, 0,
@ -153,7 +162,7 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
#define FUNC_NAME s_make_regexp
{
SCM flag;
regex_t *rx;
struct scm_regexp *rx;
int status, cflags;
char *c_pat;
@ -173,16 +182,17 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
flag = SCM_CDR (flag);
}
rx = scm_gc_malloc_pointerless (sizeof (regex_t), "regex");
rx = scm_gc_malloc_pointerless (sizeof (*rx), "regex");
rx->tag = scm_tc16_regexp;
c_pat = scm_to_locale_string (pat);
status = regcomp (rx, c_pat,
status = regcomp (&rx->regex, c_pat,
/* Make sure they're not passing REG_NOSUB;
regexp-exec assumes we're getting match data. */
cflags & ~REG_NOSUB);
free (c_pat);
if (status != 0)
{
SCM errmsg = scm_regexp_error_msg (status, rx);
SCM errmsg = scm_regexp_error_msg (status, &rx->regex);
scm_error_scm (scm_regexp_error_key,
scm_from_utf8_string (FUNC_NAME),
errmsg,
@ -191,7 +201,9 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
/* never returns */
}
SCM_RETURN_NEWSMOB (scm_tc16_regex, rx);
SCM ret = scm_from_regexp (rx);
scm_i_add_regexp_finalizer (SCM_I_CURRENT_THREAD, ret);
return ret;
}
#undef FUNC_NAME
@ -231,8 +243,8 @@ fixup_multibyte_match (regmatch_t *matches, int nmatches, char *str)
}
SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
(SCM rx, SCM str, SCM start, SCM flags),
"Match the compiled regular expression @var{rx} against\n"
(SCM regexp, SCM str, SCM start, SCM flags),
"Match the compiled regular expression @var{regexp} against\n"
"@code{str}. If the optional integer @var{start} argument is\n"
"provided, begin matching from that position in the string.\n"
"Return a match structure describing the results of the match,\n"
@ -259,9 +271,11 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
SCM mvec = SCM_BOOL_F;
SCM substr;
SCM_VALIDATE_RGXP (1, rx);
SCM_VALIDATE_REGEXP (1, regexp);
SCM_VALIDATE_STRING (2, str);
struct scm_regexp *rx = scm_to_regexp (regexp);
if (SCM_UNBNDP (start))
{
substr = str;
@ -281,9 +295,9 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
c_str = scm_to_locale_string (substr);
nmatches = SCM_RGX(rx)->re_nsub + 1;
nmatches = rx->regex.re_nsub + 1;
matches = scm_malloc (sizeof (regmatch_t) * nmatches);
status = regexec (SCM_RGX (rx), c_str, nmatches, matches,
status = regexec (&rx->regex, c_str, nmatches, matches,
scm_to_int (flags));
if (!status)
@ -312,7 +326,7 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
if (status != 0 && status != REG_NOMATCH)
scm_error_scm (scm_regexp_error_key,
scm_from_utf8_string (FUNC_NAME),
scm_regexp_error_msg (status, SCM_RGX (rx)),
scm_regexp_error_msg (status, &rx->regex),
SCM_BOOL_F, SCM_BOOL_F);
return mvec;
}
@ -321,9 +335,6 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
static void
scm_init_ice_9_regex (void *unused)
{
scm_tc16_regex = scm_make_smob_type ("regexp", sizeof (regex_t));
scm_set_smob_free (scm_tc16_regex, regex_free);
/* Compilation flags. */
scm_c_define ("regexp/basic", scm_from_int (REG_BASIC));
scm_c_define ("regexp/extended", scm_from_int (REG_EXTENDED));

View file

@ -24,6 +24,7 @@
#include <libguile/scm.h>
SCM_INTERNAL void scm_i_finalize_regexp (struct scm_thread*, SCM);
SCM_INTERNAL void scm_init_regex_posix (void);
#endif /* SCM_REGEX_POSIX_H */

View file

@ -515,9 +515,8 @@ typedef uintptr_t scm_t_bits;
#define scm_tc16_directory 0x047f
#define scm_tc16_syntax_transformer 0x057f
#define scm_tc16_random_state 0x067f
/*
#define scm_tc16_regexp 0x107f
*/
#define scm_tc16_regexp 0x077f
/* Definitions for tc16: */
#define SCM_TYP16(x) (0xffff & SCM_CELL_TYPE (x))