1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-07-02 23:50:47 +02:00

Move char-set-cursor implementation to Scheme

Also deprecate the C interface.

* libguile/deprecated.h:
* libguile/deprecated.c (scm_char_set_cursor):
(scm_char_set_ref):
(scm_char_set_cursor_next):
(scm_end_of_char_set_p): Deprecate.
* libguile/srfi-14.c (charset-mutable?, char-set-ranges)
(charset-set-ranges!): New accessors, exposed internally to srfi-14.scm.
* libguile/srfi-14.c (scm_boot_srfi_14): Remove scm_tc16_charset_cursor.
* module/srfi/srfi-14.scm (<char-set-cursor>): Implement as a record.
This commit is contained in:
Andy Wingo 2025-06-13 11:10:22 +02:00
parent 4516119dd1
commit 7a1406891f
5 changed files with 182 additions and 158 deletions

View file

@ -415,6 +415,61 @@ scm_array_cell_set_x (SCM array, SCM val, SCM indices)
}
static SCM char_set_cursor_var;
static SCM char_set_ref_var;
static SCM char_set_cursor_next_var;
static SCM end_of_char_set_p_var;
static void
init_char_set_cursor_vars (void)
{
char_set_cursor_var = scm_c_public_lookup ("srfi srfi-14", "char-set-cursor");
char_set_ref_var = scm_c_public_lookup ("srfi srfi-14", "char-set-ref");
char_set_cursor_next_var = scm_c_public_lookup ("srfi srfi-14", "char-set-cursor-next");
end_of_char_set_p_var = scm_c_public_lookup ("srfi srfi-14", "end-of-char-set?");
}
static void
init_char_set_cursor_functions (void)
{
static scm_i_pthread_once_t once = SCM_I_PTHREAD_ONCE_INIT;
scm_c_issue_deprecation_warning
("Using the char set cursor functions from C is deprecated. Invoke"
"char-set-cursor, etc. from (srfi srfi-14) instead.");
scm_i_pthread_once (&once, init_char_set_cursor_vars);
}
SCM
scm_char_set_cursor (SCM cs)
{
init_char_set_cursor_functions ();
return scm_call_1 (scm_variable_ref (char_set_cursor_var), cs);
}
SCM
scm_char_set_ref (SCM cs, SCM cursor)
{
init_char_set_cursor_functions ();
return scm_call_2 (scm_variable_ref (char_set_ref_var), cs, cursor);
}
SCM
scm_char_set_cursor_next (SCM cs, SCM cursor)
{
init_char_set_cursor_functions ();
return scm_call_2 (scm_variable_ref (char_set_cursor_next_var), cs, cursor);
}
SCM
scm_end_of_char_set_p (SCM cursor)
{
init_char_set_cursor_functions ();
return scm_call_1 (scm_variable_ref (end_of_char_set_p_var), cursor);
}
void

View file

@ -75,6 +75,11 @@ SCM_DEPRECATED SCM scm_array_slice_for_each_in_order (SCM frank, SCM op, SCM arg
SCM_DEPRECATED SCM scm_array_cell_ref (SCM array, SCM indices);
SCM_DEPRECATED SCM scm_array_cell_set_x (SCM array, SCM val, SCM indices);
SCM_DEPRECATED SCM scm_char_set_cursor (SCM cs);
SCM_DEPRECATED SCM scm_char_set_ref (SCM cs, SCM cursor);
SCM_DEPRECATED SCM scm_char_set_cursor_next (SCM cs, SCM cursor);
SCM_DEPRECATED SCM scm_end_of_char_set_p (SCM cursor);
/* Deprecated declarations go here. */
void scm_i_init_deprecated (void);

View file

@ -63,12 +63,6 @@ struct scm_charset
struct scm_bytevector *ranges;
};
typedef struct
{
size_t range;
scm_t_wchar n;
} scm_t_char_set_cursor;
static inline struct scm_charset*
scm_to_charset (SCM scm)
{
@ -680,29 +674,6 @@ scm_i_print_char_set (SCM charset, SCM port, scm_print_state *pstate SCM_UNUSED)
return 1;
}
/* Smob print hook for character sets cursors. */
int scm_tc16_charset_cursor = 0;
static int
charset_cursor_print (SCM cursor, SCM port,
scm_print_state *pstate SCM_UNUSED)
{
scm_t_char_set_cursor *cur;
cur = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
scm_puts ("#<charset-cursor ", port);
if (cur->range == (size_t) (-1))
scm_puts ("(empty)", port);
else
{
scm_write (scm_from_size_t (cur->range), port);
scm_puts (":", port);
scm_write (scm_from_int32 (cur->n), port);
}
scm_puts (">", port);
return 1;
}
SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
(SCM obj),
"Return @code{#t} if @var{obj} is a character set, @code{#f}\n"
@ -713,6 +684,46 @@ SCM_DEFINE (scm_char_set_p, "char-set?", 1, 0, 0,
}
#undef FUNC_NAME
SCM_DEFINE_STATIC (scm_charset_mutable_p, "charset-mutable?", 1, 0, 0,
(SCM cs),
"Return @code{#t} if the character set @var{cs} is mutable,\n"
"@code{#f} otherwise.")
#define FUNC_NAME s_scm_char_set_p
{
SCM_VALIDATE_CHARSET (1, cs);
return scm_from_bool (!charset_is_immutable (cs));
}
#undef FUNC_NAME
SCM_DEFINE_STATIC (scm_charset_ranges, "charset-ranges", 1, 0, 0,
(SCM cs),
"Return the {lo, hi} packed sorted array of inclusive\n"
"ranges of the character set @var{cs}, as a u32vector.")
#define FUNC_NAME s_scm_char_set_p
{
SCM_VALIDATE_CHARSET (1, cs);
return scm_from_bytevector (scm_to_charset (cs)->ranges);
}
#undef FUNC_NAME
SCM_DEFINE_STATIC (scm_charset_set_ranges_x, "charset-set-ranges!", 2, 0, 0,
(SCM cs, SCM ranges),
"Replace the {lo, hi} packed sorted array of inclusive\n"
"ranges of the character set @var{cs} with the given\n"
"u32vector.")
#define FUNC_NAME s_scm_char_set_p
{
SCM_VALIDATE_MUTABLE_CHARSET (1, cs);
SCM_VALIDATE_BYTEVECTOR (2, ranges);
struct scm_bytevector *bv = scm_to_bytevector (ranges);
SCM_ASSERT_TYPE
((bv->length % sizeof (scm_t_char_range)) == 0
&& scm_bytevector_element_type (bv) == SCM_ARRAY_ELEMENT_TYPE_U32,
ranges, 2, FUNC_NAME, "char ranges");
scm_to_charset (cs)->ranges = bv;
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
int
scm_i_char_sets_equal (SCM a, SCM b)
@ -817,128 +828,6 @@ SCM_DEFINE (scm_char_set_hash, "char-set-hash", 1, 1, 0,
#undef FUNC_NAME
SCM_DEFINE (scm_char_set_cursor, "char-set-cursor", 1, 0, 0,
(SCM cs), "Return a cursor into the character set @var{cs}.")
#define FUNC_NAME s_scm_char_set_cursor
{
struct scm_charset *cs_data;
scm_t_char_set_cursor *cur_data;
SCM_VALIDATE_CHARSET (1, cs);
cs_data = scm_to_charset (cs);
cur_data =
(scm_t_char_set_cursor *) scm_gc_malloc (sizeof (scm_t_char_set_cursor),
"charset-cursor");
if (charset_len (cs_data) == 0)
{
cur_data->range = (size_t) (-1);
cur_data->n = 0;
}
else
{
cur_data->range = 0;
cur_data->n = charset_range_lo (cs_data, 0);
}
SCM_RETURN_NEWSMOB (scm_tc16_charset_cursor, cur_data);
}
#undef FUNC_NAME
SCM_DEFINE (scm_char_set_ref, "char-set-ref", 2, 0, 0,
(SCM cs, SCM cursor),
"Return the character at the current cursor position\n"
"@var{cursor} in the character set @var{cs}. It is an error to\n"
"pass a cursor for which @code{end-of-char-set?} returns true.")
#define FUNC_NAME s_scm_char_set_ref
{
struct scm_charset *cs_data;
scm_t_char_set_cursor *cur_data;
size_t i;
SCM_VALIDATE_CHARSET (1, cs);
SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
cs_data = scm_to_charset (cs);
cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
/* Validate that this cursor is still true. */
i = cur_data->range;
if (i == (size_t) (-1)
|| i >= charset_len (cs_data)
|| cur_data->n < charset_range_lo (cs_data, i)
|| cur_data->n > charset_range_hi (cs_data, i))
SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
return SCM_MAKE_CHAR (cur_data->n);
}
#undef FUNC_NAME
SCM_DEFINE (scm_char_set_cursor_next, "char-set-cursor-next", 2, 0, 0,
(SCM cs, SCM cursor),
"Advance the character set cursor @var{cursor} to the next\n"
"character in the character set @var{cs}. It is an error if the\n"
"cursor given satisfies @code{end-of-char-set?}.")
#define FUNC_NAME s_scm_char_set_cursor_next
{
struct scm_charset *cs_data;
scm_t_char_set_cursor *cur_data;
size_t i;
SCM_VALIDATE_CHARSET (1, cs);
SCM_VALIDATE_SMOB (2, cursor, charset_cursor);
cs_data = scm_to_charset (cs);
cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
/* Validate that this cursor is still true. */
i = cur_data->range;
if (i == (size_t) (-1)
|| i >= charset_len (cs_data)
|| !charset_range_contains (cs_data, i, cur_data->n))
SCM_MISC_ERROR ("invalid character set cursor: ~A", scm_list_1 (cursor));
/* Increment the cursor. */
if (cur_data->n == charset_range_hi (cs_data, i))
{
if (i + 1 < charset_len (cs_data))
{
cur_data->range = i + 1;
cur_data->n = charset_range_lo (cs_data, i + 1);
}
else
{
/* This is the end of the road. */
cur_data->range = (size_t) (-1);
cur_data->n = 0;
}
}
else
{
cur_data->n = cur_data->n + 1;
}
return cursor;
}
#undef FUNC_NAME
SCM_DEFINE (scm_end_of_char_set_p, "end-of-char-set?", 1, 0, 0,
(SCM cursor),
"Return @code{#t} if @var{cursor} has reached the end of a\n"
"character set, @code{#f} otherwise.")
#define FUNC_NAME s_scm_end_of_char_set_p
{
scm_t_char_set_cursor *cur_data;
SCM_VALIDATE_SMOB (1, cursor, charset_cursor);
cur_data = (scm_t_char_set_cursor *) SCM_SMOB_DATA (cursor);
if (cur_data->range == (size_t) (-1))
return SCM_BOOL_T;
return SCM_BOOL_F;
}
#undef FUNC_NAME
SCM_DEFINE (scm_char_set_fold, "char-set-fold", 3, 0, 0,
(SCM kons, SCM knil, SCM cs),
"Fold the procedure @var{kons} over the character set @var{cs},\n"
@ -2079,9 +1968,6 @@ scm_boot_srfi_14 (void)
empty_charset_ranges =
scm_i_make_typed_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_U32);
scm_tc16_charset_cursor = scm_make_smob_type ("char-set-cursor", 0);
scm_set_smob_print (scm_tc16_charset_cursor, charset_cursor_print);
FOR_EACH_STANDARD_CHARSET (DEFINE_C_CHARSET);
scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,

View file

@ -35,10 +35,6 @@ SCM_API SCM scm_char_set_p (SCM obj);
SCM_API SCM scm_char_set_eq (SCM char_sets);
SCM_API SCM scm_char_set_leq (SCM char_sets);
SCM_API SCM scm_char_set_hash (SCM cs, SCM bound);
SCM_API SCM scm_char_set_cursor (SCM cs);
SCM_API SCM scm_char_set_ref (SCM cs, SCM cursor);
SCM_API SCM scm_char_set_cursor_next (SCM cs, SCM cursor);
SCM_API SCM scm_end_of_char_set_p (SCM cursor);
SCM_API SCM scm_char_set_fold (SCM kons, SCM knil, SCM cs);
SCM_API SCM scm_char_set_unfold (SCM p, SCM f, SCM g, SCM seed, SCM base_cs);
SCM_API SCM scm_char_set_unfold_x (SCM p, SCM f, SCM g, SCM seed, SCM base_cs);

View file

@ -25,6 +25,9 @@
(define-module (srfi srfi-14)
;; FIXME: Use #:export instead of #:replace once deprecated bindings
;; are removed.
#:use-module (ice-9 match)
#:use-module (srfi srfi-9)
#:use-module (rnrs bytevectors)
#:replace (;; General procedures
char-set?
char-set=
@ -98,6 +101,85 @@
(load-extension (string-append "libguile-" (effective-version))
"scm_init_srfi_14"))
(define-record-type <char-set-cursor>
(make-char-set-cursor cs range n limit)
char-set-cursor?
(cs char-set-cursor-charset)
(range char-set-cursor-range set-char-set-cursor-range!)
(n char-set-cursor-n set-char-set-cursor-n!)
(limit char-set-cursor-limit set-char-set-cursor-limit!))
(define (charset-ranges-len ranges)
(ash (bytevector-length ranges) -3))
(define (charset-range-lo ranges idx)
(bytevector-u32-native-ref ranges (ash idx 3)))
(define (charset-range-hi ranges idx)
(bytevector-u32-native-ref ranges (+ (ash idx 3) 4)))
(define (char-set-cursor cs)
"Return a cursor into the character set @var{cs}."
(let* ((ranges (charset-ranges cs))
(len (charset-ranges-len ranges)))
(if (zero? len)
(make-char-set-cursor cs #f #f #f)
(make-char-set-cursor cs 0 (charset-range-lo ranges 0)
(charset-range-hi ranges 0)))))
(define (char-set-ref cs cursor)
"Return the character at the current cursor position @var{cursor} in the
character set @var{cs}. It is an error to pass a cursor for which
@code{end-of-char-set?} returns true."
(match cursor
(($ <char-set-cursor> cs* range n limit)
(unless (eq? cs cs*)
(error "charset cursors can only be used with their original charsets"
cursor))
(unless n
(error "char-set-ref on cursor that is end-of-char-set?" cursor))
(integer->char n))
(_
(scm-error 'wrong-type-arg "char-set-ref" "Wrong type argument: ~S"
(list cursor)
'()))))
(define (char-set-cursor-next cs cursor)
"Advance the character set cursor @var{cursor} to the next character in
the character set @var{cs}. It is an error if the cursor given
satisfies @code{end-of-char-set?}."
(match cursor
(($ <char-set-cursor> cs* range n limit)
(unless (eq? cs cs*)
(error "charset cursors can only be used with their original charsets"
cursor))
(unless n
(error "char-set-next on cursor that is end-of-char-set?" cursor))
(cond
((< n limit)
(set-char-set-cursor-n! cursor (1+ n)))
(else
(let* ((ranges (charset-ranges cs))
(len (charset-ranges-len ranges)))
(cond
((< (1+ range) len)
(set-char-set-cursor-range! cursor (1+ range))
(set-char-set-cursor-n! cursor (charset-range-lo cs range))
(set-char-set-cursor-limit! cursor (charset-range-hi cs range)))
(else
(set-char-set-cursor-range! cursor #f)
(set-char-set-cursor-n! cursor #f)
(set-char-set-cursor-limit! cursor #f))))))
cursor)
(_
(scm-error 'wrong-type-arg "char-set-ref" "Wrong type argument: ~S"
(list cursor)
'()))))
(define (end-of-char-set? cursor)
"Return @code{#t} if @var{cursor} has reached the end of a character set,
@code{#f} otherwise."
(not (char-set-cursor-range cursor)))
(cond-expand-provide (current-module) '(srfi-14))
;;; srfi-14.scm ends here