1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

srfi-1 delete-duplicates: move from C to Scheme

* libguile/srfi-1.c (scm_srfi1_delete-duplicates): delete.
* libguile/srfi-1.h (scm_srfi1_delete-duplicates): delete.
* module/srfi/srfi-1.scm: add delete-duplicates.
This commit is contained in:
Rob Browning 2024-07-20 10:07:59 -05:00
parent a94b4406b7
commit 51b7021de1
3 changed files with 46 additions and 140 deletions

View file

@ -51,144 +51,6 @@
* optimize them, and have the VM execute them efficiently.
*/
static SCM
equal_trampoline (SCM proc, SCM arg1, SCM arg2)
{
return scm_equal_p (arg1, arg2);
}
/* list_copy_part() copies the first COUNT cells of LST, puts the result at
*dst, and returns the SCM_CDRLOC of the last cell in that new list.
This function is designed to be careful about LST possibly having changed
in between the caller deciding what to copy, and the copy actually being
done here. The COUNT ensures we terminate if LST has become circular,
SCM_VALIDATE_CONS guards against a cdr in the list changed to some
non-pair object. */
#include <stdio.h>
static SCM *
list_copy_part (SCM lst, int count, SCM *dst)
#define FUNC_NAME "list_copy_part"
{
SCM c;
for ( ; count > 0; count--)
{
SCM_VALIDATE_CONS (SCM_ARGn, lst);
c = scm_cons (SCM_CAR (lst), SCM_EOL);
*dst = c;
dst = SCM_CDRLOC (c);
lst = SCM_CDR (lst);
}
return dst;
}
#undef FUNC_NAME
SCM_DEFINE (scm_srfi1_delete_duplicates, "delete-duplicates", 1, 1, 0,
(SCM lst, SCM pred),
"Return a list containing the elements of @var{lst} but without\n"
"duplicates.\n"
"\n"
"When elements are equal, only the first in @var{lst} is\n"
"retained. Equal elements can be anywhere in @var{lst}, they\n"
"don't have to be adjacent. The returned list will have the\n"
"retained elements in the same order as they were in @var{lst}.\n"
"\n"
"Equality is determined by @var{pred}, or @code{equal?} if not\n"
"given. Calls @code{(pred x y)} are made with element @var{x}\n"
"being before @var{y} in @var{lst}. A call is made at most once\n"
"for each combination, but the sequence of the calls across the\n"
"elements is unspecified.\n"
"\n"
"@var{lst} is not modified, but the return might share a common\n"
"tail with @var{lst}.\n"
"\n"
"In the worst case, this is an @math{O(N^2)} algorithm because\n"
"it must check each element against all those preceding it. For\n"
"long lists it is more efficient to sort and then compare only\n"
"adjacent elements.")
#define FUNC_NAME s_scm_srfi1_delete_duplicates
{
scm_t_trampoline_2 equal_p;
SCM ret, *p, keeplst, item, l;
int count, i;
/* ret is the new list constructed. p is where to append, initially &ret
then SCM_CDRLOC of the last pair. lst is advanced as each element is
considered.
Elements retained are not immediately appended to ret, instead keeplst
is the last pair in lst which is to be kept but is not yet copied.
Initially this is the first pair of lst, since the first element is
always retained.
*p is kept set to keeplst, so ret (inclusive) to lst (exclusive) is all
the elements retained, making the equality search loop easy.
If an item must be deleted, elements from keeplst (inclusive) to lst
(exclusive) must be copied and appended to ret. When there's no more
deletions, *p is left set to keeplst, so ret shares structure with the
original lst. (ret will be the entire original lst if there are no
deletions.) */
/* skip to end if an empty list (or something invalid) */
ret = SCM_EOL;
if (SCM_UNBNDP (pred))
equal_p = equal_trampoline;
else
{
SCM_VALIDATE_PROC (SCM_ARG2, pred);
equal_p = scm_call_2;
}
keeplst = lst;
count = 0;
p = &ret;
for ( ; scm_is_pair (lst); lst = SCM_CDR (lst))
{
item = SCM_CAR (lst);
/* look for item in "ret" list */
for (l = ret; scm_is_pair (l); l = SCM_CDR (l))
{
if (scm_is_true (equal_p (pred, SCM_CAR (l), item)))
{
/* "item" is a duplicate, so copy keeplst onto ret */
duplicate:
p = list_copy_part (keeplst, count, p);
keeplst = SCM_CDR (lst); /* elem after the one deleted */
count = 0;
goto next_elem;
}
}
/* look for item in "keeplst" list
be careful traversing, in case nasty code changed the cdrs */
for (i = 0, l = keeplst;
i < count && scm_is_pair (l);
i++, l = SCM_CDR (l))
if (scm_is_true (equal_p (pred, SCM_CAR (l), item)))
goto duplicate;
/* keep this element */
count++;
next_elem:
;
}
SCM_ASSERT_TYPE (SCM_NULL_OR_NIL_P (lst), lst, SCM_ARG1, FUNC_NAME, "list");
/* share tail of keeplst items */
*p = keeplst;
return ret;
}
#undef FUNC_NAME
void
scm_register_srfi_1 (void)

View file

@ -24,8 +24,6 @@
#include "libguile/scm.h"
SCM_INTERNAL SCM scm_srfi1_delete_duplicates (SCM lst, SCM pred);
SCM_INTERNAL void scm_register_srfi_1 (void);
SCM_INTERNAL void scm_init_srfi_1 (void);

View file

@ -1170,6 +1170,52 @@ be deleted with @code{(delete 5 lst <)}.
@var{lst} may be modified to construct the returned list."
(remove! (lambda (elem) (pred x elem)) lst))
(define* (delete-duplicates lst #:optional (= equal?))
"Return a list containing the elements of @var{lst} but without
duplicates.
When elements are equal, only the first in @var{lst} is retained. Equal
elements can be anywhere in @var{lst}, they don't have to be adjacent.
The returned list will have the retained elements in the same order as
they were in @var{lst}.
Equality is determined by @var{pred}, or @code{equal?} if not given.
Calls @code{(pred x y)} are made with element @var{x} being before
@var{y} in @var{lst}. A call is made at most once for each combination,
but the sequence of the calls across the elements is unspecified.
@var{lst} is not modified, but the return might share a common tail with
@var{lst}.
In the worst case, this is an @math{O(N^2)} algorithm because it must
check each element against all those preceding it. For long lists it is
more efficient to sort and then compare only adjacent elements."
;; Same implementation as remove (see comments there), except that the
;; predicate checks for duplicates in both last-seen and the pending
;; result.
(if (null? lst)
lst
(let ((result (list #f)))
(let lp ((lst lst)
(last-kept lst)
(tail result))
(if (null? lst)
(begin
(set-cdr! tail last-kept)
(cdr result))
(let ((item (car lst)))
(if (or (member item (cdr result) (lambda (x y) (= y x)))
(member-before item last-kept lst =))
(if (eq? last-kept lst)
(lp (cdr lst) (cdr lst) tail)
(call-with-values
(lambda () (list-prefix-and-tail last-kept lst))
(lambda (prefix new-tail)
(set-cdr! tail prefix)
(lp (cdr lst) (cdr lst) new-tail))))
;; unique, keep
(lp (cdr lst) last-kept tail))))))))
(define* (delete-duplicates! lst #:optional (= equal?))
"Return a list containing the elements of @var{lst} but without
duplicates.