mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-18 09:40:25 +02:00
In string-split, add support for character sets and predicates.
* libguile/srfi-13.c (string-split): Add support for splitting on character sets and predicates, like string-index and others. * test-suite/tests/strings.test (string-split): Add tests covering the new argument types. * doc/ref/api-data.texi (string-split): Update. Signed-off-by: Mark H Weaver <mhw@netris.org>
This commit is contained in:
parent
8b22ced1c9
commit
5f085775ab
4 changed files with 146 additions and 37 deletions
|
@ -3152,12 +3152,24 @@ These procedures are useful for similar tasks.
|
||||||
Convert the string @var{str} into a list of characters.
|
Convert the string @var{str} into a list of characters.
|
||||||
@end deffn
|
@end deffn
|
||||||
|
|
||||||
@deffn {Scheme Procedure} string-split str chr
|
@deffn {Scheme Procedure} string-split str char_pred
|
||||||
@deffnx {C Function} scm_string_split (str, chr)
|
@deffnx {C Function} scm_string_split (str, char_pred)
|
||||||
Split the string @var{str} into a list of substrings delimited
|
Split the string @var{str} into a list of substrings delimited
|
||||||
by appearances of the character @var{chr}. Note that an empty substring
|
by appearances of characters that
|
||||||
between separator characters will result in an empty string in the
|
|
||||||
result list.
|
@itemize @bullet
|
||||||
|
@item
|
||||||
|
equal @var{char_pred}, if it is a character,
|
||||||
|
|
||||||
|
@item
|
||||||
|
satisfy the predicate @var{char_pred}, if it is a procedure,
|
||||||
|
|
||||||
|
@item
|
||||||
|
are in the set @var{char_pred}, if it is a character set.
|
||||||
|
@end itemize
|
||||||
|
|
||||||
|
Note that an empty substring between separator characters will result in
|
||||||
|
an empty string in the result list.
|
||||||
|
|
||||||
@lisp
|
@lisp
|
||||||
(string-split "root:x:0:0:root:/root:/bin/bash" #\:)
|
(string-split "root:x:0:0:root:/root:/bin/bash" #\:)
|
||||||
|
|
|
@ -2993,11 +2993,22 @@ SCM_DEFINE (scm_string_tokenize, "string-tokenize", 1, 3, 0,
|
||||||
#undef FUNC_NAME
|
#undef FUNC_NAME
|
||||||
|
|
||||||
SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
|
SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
|
||||||
(SCM str, SCM chr),
|
(SCM str, SCM char_pred),
|
||||||
"Split the string @var{str} into a list of the substrings delimited\n"
|
"Split the string @var{str} into a list of the substrings delimited\n"
|
||||||
"by appearances of the character @var{chr}. Note that an empty substring\n"
|
"by appearances of characters that\n"
|
||||||
"between separator characters will result in an empty string in the\n"
|
"\n"
|
||||||
"result list.\n"
|
"@itemize @bullet\n"
|
||||||
|
"@item\n"
|
||||||
|
"equal @var{char_pred}, if it is a character,\n"
|
||||||
|
"\n"
|
||||||
|
"@item\n"
|
||||||
|
"satisfy the predicate @var{char_pred}, if it is a procedure,\n"
|
||||||
|
"\n"
|
||||||
|
"@item\n"
|
||||||
|
"are in the set @var{char_pred}, if it is a character set.\n"
|
||||||
|
"@end itemize\n\n"
|
||||||
|
"Note that an empty substring between separator characters\n"
|
||||||
|
"will result in an empty string in the result list.\n"
|
||||||
"\n"
|
"\n"
|
||||||
"@lisp\n"
|
"@lisp\n"
|
||||||
"(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n"
|
"(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n"
|
||||||
|
@ -3014,12 +3025,14 @@ SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
|
||||||
"@end lisp")
|
"@end lisp")
|
||||||
#define FUNC_NAME s_scm_string_split
|
#define FUNC_NAME s_scm_string_split
|
||||||
{
|
{
|
||||||
long idx, last_idx;
|
|
||||||
int narrow;
|
|
||||||
SCM res = SCM_EOL;
|
SCM res = SCM_EOL;
|
||||||
|
|
||||||
SCM_VALIDATE_STRING (1, str);
|
SCM_VALIDATE_STRING (1, str);
|
||||||
SCM_VALIDATE_CHAR (2, chr);
|
|
||||||
|
if (SCM_CHARP (char_pred))
|
||||||
|
{
|
||||||
|
long idx, last_idx;
|
||||||
|
int narrow;
|
||||||
|
|
||||||
/* This is explicit wide/narrow logic (instead of using
|
/* This is explicit wide/narrow logic (instead of using
|
||||||
scm_i_string_ref) is a speed optimization. */
|
scm_i_string_ref) is a speed optimization. */
|
||||||
|
@ -3031,7 +3044,7 @@ SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
|
||||||
while (idx >= 0)
|
while (idx >= 0)
|
||||||
{
|
{
|
||||||
last_idx = idx;
|
last_idx = idx;
|
||||||
while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(chr))
|
while (idx > 0 && buf[idx-1] != (char) SCM_CHAR(char_pred))
|
||||||
idx--;
|
idx--;
|
||||||
if (idx >= 0)
|
if (idx >= 0)
|
||||||
{
|
{
|
||||||
|
@ -3046,7 +3059,7 @@ SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
|
||||||
while (idx >= 0)
|
while (idx >= 0)
|
||||||
{
|
{
|
||||||
last_idx = idx;
|
last_idx = idx;
|
||||||
while (idx > 0 && buf[idx-1] != SCM_CHAR(chr))
|
while (idx > 0 && buf[idx-1] != SCM_CHAR(char_pred))
|
||||||
idx--;
|
idx--;
|
||||||
if (idx >= 0)
|
if (idx >= 0)
|
||||||
{
|
{
|
||||||
|
@ -3055,6 +3068,30 @@ SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SCM sidx, slast_idx;
|
||||||
|
|
||||||
|
if (!SCM_CHARSETP (char_pred))
|
||||||
|
SCM_ASSERT (scm_is_true (scm_procedure_p (char_pred)),
|
||||||
|
char_pred, SCM_ARG2, FUNC_NAME);
|
||||||
|
|
||||||
|
/* Supporting predicates and character sets involves handling SCM
|
||||||
|
values so there is less chance to optimize. */
|
||||||
|
slast_idx = scm_string_length (str);
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
sidx = scm_string_index_right (str, char_pred, SCM_INUM0, slast_idx);
|
||||||
|
if (scm_is_false (sidx))
|
||||||
|
break;
|
||||||
|
res = scm_cons (scm_substring (str, scm_oneplus (sidx), slast_idx), res);
|
||||||
|
slast_idx = sidx;
|
||||||
|
}
|
||||||
|
|
||||||
|
res = scm_cons (scm_substring (str, SCM_INUM0, slast_idx), res);
|
||||||
|
}
|
||||||
|
|
||||||
scm_remember_upto_here_1 (str);
|
scm_remember_upto_here_1 (str);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,7 +110,7 @@ SCM_API SCM scm_xsubstring (SCM s, SCM from, SCM to, SCM start, SCM end);
|
||||||
SCM_API SCM scm_string_xcopy_x (SCM target, SCM tstart, SCM s, SCM sfrom, SCM sto, SCM start, SCM end);
|
SCM_API SCM scm_string_xcopy_x (SCM target, SCM tstart, SCM s, SCM sfrom, SCM sto, SCM start, SCM end);
|
||||||
SCM_API SCM scm_string_replace (SCM s1, SCM s2, SCM start1, SCM end1, SCM start2, SCM end2);
|
SCM_API SCM scm_string_replace (SCM s1, SCM s2, SCM start1, SCM end1, SCM start2, SCM end2);
|
||||||
SCM_API SCM scm_string_tokenize (SCM s, SCM token_char, SCM start, SCM end);
|
SCM_API SCM scm_string_tokenize (SCM s, SCM token_char, SCM start, SCM end);
|
||||||
SCM_API SCM scm_string_split (SCM s, SCM chr);
|
SCM_API SCM scm_string_split (SCM s, SCM char_pred);
|
||||||
SCM_API SCM scm_string_filter (SCM char_pred, SCM s, SCM start, SCM end);
|
SCM_API SCM scm_string_filter (SCM char_pred, SCM s, SCM start, SCM end);
|
||||||
SCM_API SCM scm_string_delete (SCM char_pred, SCM s, SCM start, SCM end);
|
SCM_API SCM scm_string_delete (SCM char_pred, SCM s, SCM start, SCM end);
|
||||||
|
|
||||||
|
|
|
@ -557,7 +557,67 @@
|
||||||
(pass-if "char 255"
|
(pass-if "char 255"
|
||||||
(equal? '("a" "b")
|
(equal? '("a" "b")
|
||||||
(string-split (string #\a (integer->char 255) #\b)
|
(string-split (string #\a (integer->char 255) #\b)
|
||||||
(integer->char 255)))))
|
(integer->char 255))))
|
||||||
|
|
||||||
|
(pass-if "empty string - char"
|
||||||
|
(equal? '("")
|
||||||
|
(string-split "" #\:)))
|
||||||
|
|
||||||
|
(pass-if "non-empty - char - no delimiters"
|
||||||
|
(equal? '("foobarfrob")
|
||||||
|
(string-split "foobarfrob" #\:)))
|
||||||
|
|
||||||
|
(pass-if "non-empty - char - delimiters"
|
||||||
|
(equal? '("foo" "bar" "frob")
|
||||||
|
(string-split "foo:bar:frob" #\:)))
|
||||||
|
|
||||||
|
(pass-if "non-empty - char - leading delimiters"
|
||||||
|
(equal? '("" "" "foo" "bar" "frob")
|
||||||
|
(string-split "::foo:bar:frob" #\:)))
|
||||||
|
|
||||||
|
(pass-if "non-empty - char - trailing delimiters"
|
||||||
|
(equal? '("foo" "bar" "frob" "" "")
|
||||||
|
(string-split "foo:bar:frob::" #\:)))
|
||||||
|
|
||||||
|
(pass-if "empty string - charset"
|
||||||
|
(equal? '("")
|
||||||
|
(string-split "" (char-set #\:))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - charset - no delimiters"
|
||||||
|
(equal? '("foobarfrob")
|
||||||
|
(string-split "foobarfrob" (char-set #\:))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - charset - delimiters"
|
||||||
|
(equal? '("foo" "bar" "frob")
|
||||||
|
(string-split "foo:bar:frob" (char-set #\:))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - charset - leading delimiters"
|
||||||
|
(equal? '("" "" "foo" "bar" "frob")
|
||||||
|
(string-split "::foo:bar:frob" (char-set #\:))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - charset - trailing delimiters"
|
||||||
|
(equal? '("foo" "bar" "frob" "" "")
|
||||||
|
(string-split "foo:bar:frob::" (char-set #\:))))
|
||||||
|
|
||||||
|
(pass-if "empty string - pred"
|
||||||
|
(equal? '("")
|
||||||
|
(string-split "" (negate char-alphabetic?))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - pred - no delimiters"
|
||||||
|
(equal? '("foobarfrob")
|
||||||
|
(string-split "foobarfrob" (negate char-alphabetic?))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - pred - delimiters"
|
||||||
|
(equal? '("foo" "bar" "frob")
|
||||||
|
(string-split "foo:bar:frob" (negate char-alphabetic?))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - pred - leading delimiters"
|
||||||
|
(equal? '("" "" "foo" "bar" "frob")
|
||||||
|
(string-split "::foo:bar:frob" (negate char-alphabetic?))))
|
||||||
|
|
||||||
|
(pass-if "non-empty - pred - trailing delimiters"
|
||||||
|
(equal? '("foo" "bar" "frob" "" "")
|
||||||
|
(string-split "foo:bar:frob::" (negate char-alphabetic?)))))
|
||||||
|
|
||||||
(with-test-prefix "substring-move!"
|
(with-test-prefix "substring-move!"
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue