1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-02 07:40:30 +02:00

Convert regexps to use statically-allocated tc16

* libguile/scm.h: Allocate tc16.
* libguile/regex-posix.h:
* libguile/regex-posix.c:
* libguile/finalizers.h:
* libguile/finalizers.c: Adapt.
This commit is contained in:
Andy Wingo 2025-06-17 14:35:51 +02:00
parent 197345fa02
commit ace42e191e
5 changed files with 68 additions and 38 deletions

View file

@ -43,6 +43,9 @@
#include "init.h" #include "init.h"
#include "numbers.h" #include "numbers.h"
#include "ports.h" #include "ports.h"
#ifdef ENABLE_REGEX
#include "regex-posix.h"
#endif
#include "smob.h" #include "smob.h"
#include "struct.h" #include "struct.h"
#include "symbols.h" #include "symbols.h"
@ -78,6 +81,7 @@ enum builtin_finalizer_kind
FINALIZE_KIND_SMOB, FINALIZE_KIND_SMOB,
FINALIZE_KIND_PORT, FINALIZE_KIND_PORT,
FINALIZE_KIND_DIRECTORY, FINALIZE_KIND_DIRECTORY,
FINALIZE_KIND_REGEXP,
}; };
static SCM static SCM
@ -133,6 +137,12 @@ scm_i_add_directory_finalizer (struct scm_thread *thread, SCM obj)
return add_builtin_finalizer (thread, obj, FINALIZE_KIND_DIRECTORY); return add_builtin_finalizer (thread, obj, FINALIZE_KIND_DIRECTORY);
} }
SCM
scm_i_add_regexp_finalizer (struct scm_thread *thread, SCM obj)
{
return add_builtin_finalizer (thread, obj, FINALIZE_KIND_REGEXP);
}
SCM SCM
scm_i_add_pointer_finalizer (struct scm_thread *thread, SCM obj, SCM free) scm_i_add_pointer_finalizer (struct scm_thread *thread, SCM obj, SCM free)
{ {
@ -184,6 +194,13 @@ run_finalizer (struct scm_thread *thread, SCM obj, SCM closure)
case FINALIZE_KIND_DIRECTORY: case FINALIZE_KIND_DIRECTORY:
scm_i_finalize_directory (thread, obj); scm_i_finalize_directory (thread, obj);
break; break;
case FINALIZE_KIND_REGEXP:
#ifdef ENABLE_REGEX
scm_i_finalize_regexp (thread, obj);
#else
abort ();
#endif
break;
default: default:
abort (); abort ();
} }

View file

@ -34,6 +34,8 @@ SCM_INTERNAL SCM scm_i_add_port_finalizer (struct scm_thread *thread,
SCM obj); SCM obj);
SCM_INTERNAL SCM scm_i_add_directory_finalizer (struct scm_thread *thread, SCM_INTERNAL SCM scm_i_add_directory_finalizer (struct scm_thread *thread,
SCM obj); SCM obj);
SCM_INTERNAL SCM scm_i_add_regexp_finalizer (struct scm_thread *thread,
SCM obj);
SCM_INTERNAL SCM scm_i_add_pointer_finalizer (struct scm_thread *thread, SCM_INTERNAL SCM scm_i_add_pointer_finalizer (struct scm_thread *thread,
SCM obj, SCM free); SCM obj, SCM free);
SCM_INTERNAL SCM scm_i_add_finalizer (struct scm_thread *thread, SCM obj, SCM_INTERNAL SCM scm_i_add_finalizer (struct scm_thread *thread, SCM obj,

View file

@ -40,13 +40,13 @@
#include "async.h" #include "async.h"
#include "extensions.h" #include "extensions.h"
#include "feature.h" #include "feature.h"
#include "finalizers.h"
#include "gsubr.h" #include "gsubr.h"
#include "list.h" #include "list.h"
#include "modules.h" #include "modules.h"
#include "numbers.h" #include "numbers.h"
#include "pairs.h" #include "pairs.h"
#include "ports.h" #include "ports.h"
#include "smob.h"
#include "strings.h" #include "strings.h"
#include "strports.h" #include "strports.h"
#include "symbols.h" #include "symbols.h"
@ -60,24 +60,41 @@
#define REG_BASIC 0 #define REG_BASIC 0
#endif #endif
scm_t_bits scm_tc16_regex; struct scm_regexp
{
scm_t_bits tag;
regex_t regex;
};
static inline int static inline int
scm_is_regexp (SCM x) scm_is_regexp (SCM x)
{ {
return SCM_HAS_TYP16 (x, scm_tc16_regex); return SCM_HAS_TYP16 (x, scm_tc16_regexp);
}
static inline struct scm_regexp*
scm_to_regexp (SCM x)
{
if (!scm_is_regexp (x))
abort ();
return (struct scm_regexp *) SCM_UNPACK_POINTER (x);
}
static inline SCM
scm_from_regexp (struct scm_regexp *x)
{
return SCM_PACK_POINTER (x);
} }
#define SCM_REGEXP_P(x) (scm_is_regexp (x)) #define SCM_REGEXP_P(x) (scm_is_regexp (x))
#define SCM_RGX(X) ((regex_t *) SCM_SMOB_DATA (X)) #define SCM_VALIDATE_REGEXP(pos, a) \
#define SCM_VALIDATE_RGXP(pos, a) \
SCM_MAKE_VALIDATE_MSG (pos, a, REGEXP_P, "regexp") SCM_MAKE_VALIDATE_MSG (pos, a, REGEXP_P, "regexp")
static size_t void
regex_free (SCM obj) scm_i_finalize_regexp (struct scm_thread *thread, SCM obj)
{ {
regfree (SCM_RGX (obj)); struct scm_regexp *rx = scm_to_regexp (obj);
return 0; regfree (&rx->regex);
} }
@ -87,18 +104,10 @@ SCM_SYMBOL (scm_regexp_error_key, "regular-expression-syntax");
static SCM static SCM
scm_regexp_error_msg (int regerrno, regex_t *rx) scm_regexp_error_msg (int regerrno, regex_t *rx)
{ {
char *errmsg; size_t len = regerror (regerrno, rx, NULL, 0);
int l; char *errmsg = scm_malloc (len);
regerror (regerrno, rx, errmsg, len);
errmsg = scm_malloc (80); return scm_take_locale_stringn (errmsg, len - 1);
l = regerror (regerrno, rx, errmsg, 80);
if (l > 80)
{
free (errmsg);
errmsg = scm_malloc (l);
regerror (regerrno, rx, errmsg, l);
}
return scm_take_locale_string (errmsg);
} }
SCM_DEFINE_STATIC (regexp_p, "regexp?", 1, 0, 0, SCM_DEFINE_STATIC (regexp_p, "regexp?", 1, 0, 0,
@ -153,7 +162,7 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
#define FUNC_NAME s_make_regexp #define FUNC_NAME s_make_regexp
{ {
SCM flag; SCM flag;
regex_t *rx; struct scm_regexp *rx;
int status, cflags; int status, cflags;
char *c_pat; char *c_pat;
@ -173,16 +182,17 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
flag = SCM_CDR (flag); flag = SCM_CDR (flag);
} }
rx = scm_gc_malloc_pointerless (sizeof (regex_t), "regex"); rx = scm_gc_malloc_pointerless (sizeof (*rx), "regex");
rx->tag = scm_tc16_regexp;
c_pat = scm_to_locale_string (pat); c_pat = scm_to_locale_string (pat);
status = regcomp (rx, c_pat, status = regcomp (&rx->regex, c_pat,
/* Make sure they're not passing REG_NOSUB; /* Make sure they're not passing REG_NOSUB;
regexp-exec assumes we're getting match data. */ regexp-exec assumes we're getting match data. */
cflags & ~REG_NOSUB); cflags & ~REG_NOSUB);
free (c_pat); free (c_pat);
if (status != 0) if (status != 0)
{ {
SCM errmsg = scm_regexp_error_msg (status, rx); SCM errmsg = scm_regexp_error_msg (status, &rx->regex);
scm_error_scm (scm_regexp_error_key, scm_error_scm (scm_regexp_error_key,
scm_from_utf8_string (FUNC_NAME), scm_from_utf8_string (FUNC_NAME),
errmsg, errmsg,
@ -191,7 +201,9 @@ SCM_DEFINE_STATIC (make_regexp, "make-regexp", 1, 0, 1,
/* never returns */ /* never returns */
} }
SCM_RETURN_NEWSMOB (scm_tc16_regex, rx); SCM ret = scm_from_regexp (rx);
scm_i_add_regexp_finalizer (SCM_I_CURRENT_THREAD, ret);
return ret;
} }
#undef FUNC_NAME #undef FUNC_NAME
@ -231,8 +243,8 @@ fixup_multibyte_match (regmatch_t *matches, int nmatches, char *str)
} }
SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0, SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
(SCM rx, SCM str, SCM start, SCM flags), (SCM regexp, SCM str, SCM start, SCM flags),
"Match the compiled regular expression @var{rx} against\n" "Match the compiled regular expression @var{regexp} against\n"
"@code{str}. If the optional integer @var{start} argument is\n" "@code{str}. If the optional integer @var{start} argument is\n"
"provided, begin matching from that position in the string.\n" "provided, begin matching from that position in the string.\n"
"Return a match structure describing the results of the match,\n" "Return a match structure describing the results of the match,\n"
@ -259,9 +271,11 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
SCM mvec = SCM_BOOL_F; SCM mvec = SCM_BOOL_F;
SCM substr; SCM substr;
SCM_VALIDATE_RGXP (1, rx); SCM_VALIDATE_REGEXP (1, regexp);
SCM_VALIDATE_STRING (2, str); SCM_VALIDATE_STRING (2, str);
struct scm_regexp *rx = scm_to_regexp (regexp);
if (SCM_UNBNDP (start)) if (SCM_UNBNDP (start))
{ {
substr = str; substr = str;
@ -281,9 +295,9 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
c_str = scm_to_locale_string (substr); c_str = scm_to_locale_string (substr);
nmatches = SCM_RGX(rx)->re_nsub + 1; nmatches = rx->regex.re_nsub + 1;
matches = scm_malloc (sizeof (regmatch_t) * nmatches); matches = scm_malloc (sizeof (regmatch_t) * nmatches);
status = regexec (SCM_RGX (rx), c_str, nmatches, matches, status = regexec (&rx->regex, c_str, nmatches, matches,
scm_to_int (flags)); scm_to_int (flags));
if (!status) if (!status)
@ -312,7 +326,7 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
if (status != 0 && status != REG_NOMATCH) if (status != 0 && status != REG_NOMATCH)
scm_error_scm (scm_regexp_error_key, scm_error_scm (scm_regexp_error_key,
scm_from_utf8_string (FUNC_NAME), scm_from_utf8_string (FUNC_NAME),
scm_regexp_error_msg (status, SCM_RGX (rx)), scm_regexp_error_msg (status, &rx->regex),
SCM_BOOL_F, SCM_BOOL_F); SCM_BOOL_F, SCM_BOOL_F);
return mvec; return mvec;
} }
@ -321,9 +335,6 @@ SCM_DEFINE_STATIC (regexp_exec, "regexp-exec", 2, 2, 0,
static void static void
scm_init_ice_9_regex (void *unused) scm_init_ice_9_regex (void *unused)
{ {
scm_tc16_regex = scm_make_smob_type ("regexp", sizeof (regex_t));
scm_set_smob_free (scm_tc16_regex, regex_free);
/* Compilation flags. */ /* Compilation flags. */
scm_c_define ("regexp/basic", scm_from_int (REG_BASIC)); scm_c_define ("regexp/basic", scm_from_int (REG_BASIC));
scm_c_define ("regexp/extended", scm_from_int (REG_EXTENDED)); scm_c_define ("regexp/extended", scm_from_int (REG_EXTENDED));

View file

@ -24,6 +24,7 @@
#include <libguile/scm.h> #include <libguile/scm.h>
SCM_INTERNAL void scm_i_finalize_regexp (struct scm_thread*, SCM);
SCM_INTERNAL void scm_init_regex_posix (void); SCM_INTERNAL void scm_init_regex_posix (void);
#endif /* SCM_REGEX_POSIX_H */ #endif /* SCM_REGEX_POSIX_H */

View file

@ -515,9 +515,8 @@ typedef uintptr_t scm_t_bits;
#define scm_tc16_directory 0x047f #define scm_tc16_directory 0x047f
#define scm_tc16_syntax_transformer 0x057f #define scm_tc16_syntax_transformer 0x057f
#define scm_tc16_random_state 0x067f #define scm_tc16_random_state 0x067f
/* #define scm_tc16_regexp 0x077f
#define scm_tc16_regexp 0x107f
*/
/* Definitions for tc16: */ /* Definitions for tc16: */
#define SCM_TYP16(x) (0xffff & SCM_CELL_TYPE (x)) #define SCM_TYP16(x) (0xffff & SCM_CELL_TYPE (x))