1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-10 14:00:21 +02:00

* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,

scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.
This commit is contained in:
Marius Vollmer 2004-08-19 16:49:42 +00:00
parent eb01cb6494
commit 3ee86942a7
8 changed files with 964 additions and 341 deletions

View file

@ -179,7 +179,7 @@ SCM_DEFINE (scm_registered_modules, "c-registered-modules", 0, 0, 0,
res = SCM_EOL;
for (md = registered_mods; md; md = md->link)
res = scm_cons (scm_cons (scm_makfrom0str (md->module_name),
res = scm_cons (scm_cons (scm_from_locale_string (md->module_name),
scm_from_ulong ((unsigned long) md->init_func)),
res);
return res;
@ -379,17 +379,17 @@ SCM
scm_makstr (size_t len, int dummy)
{
scm_c_issue_deprecation_warning
("'scm_makstr' is deprecated. Use 'scm_allocate_string' instead.");
return scm_allocate_string (len);
("'scm_makstr' is deprecated. Use 'scm_c_make_string' instead.");
return scm_c_make_string (len, SCM_UNDEFINED);
}
SCM
scm_makfromstr (const char *src, size_t len, int dummy SCM_UNUSED)
{
scm_c_issue_deprecation_warning ("`scm_makfromstr' is deprecated. "
"Use `scm_mem2string' instead.");
"Use `scm_from_locale_stringn' instead.");
return scm_mem2string (src, len);
return scm_from_locale_stringn (src, len);
}
SCM
@ -653,7 +653,7 @@ SCM
scm_sym2ovcell_soft (SCM sym, SCM obarray)
{
SCM lsym, z;
size_t hash = SCM_SYMBOL_HASH (sym) % SCM_VECTOR_LENGTH (obarray);
size_t hash = scm_i_symbol_hash (sym) % SCM_VECTOR_LENGTH (obarray);
scm_c_issue_deprecation_warning ("`scm_sym2ovcell_soft' is deprecated. "
"Use hashtables instead.");
@ -716,8 +716,8 @@ scm_sym2ovcell (SCM sym, SCM obarray)
SCM
scm_intern_obarray_soft (const char *name,size_t len,SCM obarray,unsigned int softness)
{
SCM symbol = scm_mem2symbol (name, len);
size_t raw_hash = SCM_SYMBOL_HASH (symbol);
SCM symbol = scm_from_locale_symboln (name, len);
size_t raw_hash = scm_i_symbol_hash (symbol);
size_t hash;
SCM lsym;
@ -814,8 +814,8 @@ SCM_DEFINE (scm_string_to_obarray_symbol, "string->obarray-symbol", 2, 1, 0,
else if (scm_is_eq (o, SCM_BOOL_T))
o = SCM_BOOL_F;
vcell = scm_intern_obarray_soft (SCM_I_STRING_CHARS(s),
SCM_I_STRING_LENGTH (s),
vcell = scm_intern_obarray_soft (scm_i_string_chars (s),
scm_i_string_length (s),
o,
softness);
if (scm_is_false (vcell))
@ -841,7 +841,7 @@ SCM_DEFINE (scm_intern_symbol, "intern-symbol", 2, 0, 0,
"Use hashtables instead.");
SCM_VALIDATE_VECTOR (1,o);
hval = SCM_SYMBOL_HASH (s) % SCM_VECTOR_LENGTH (o);
hval = scm_i_symbol_hash (s) % SCM_VECTOR_LENGTH (o);
/* If the symbol is already interned, simply return. */
SCM_REDEFER_INTS;
{
@ -883,7 +883,7 @@ SCM_DEFINE (scm_unintern_symbol, "unintern-symbol", 2, 0, 0,
if (scm_is_false (o))
return SCM_BOOL_F;
SCM_VALIDATE_VECTOR (1,o);
hval = SCM_SYMBOL_HASH (s) % SCM_VECTOR_LENGTH (o);
hval = scm_i_symbol_hash (s) % SCM_VECTOR_LENGTH (o);
SCM_DEFER_INTS;
{
SCM lsym_follow;
@ -1047,10 +1047,10 @@ SCM_DEFINE (scm_gentemp, "gentemp", 0, 2, 0,
else
{
SCM_VALIDATE_STRING (1, prefix);
len = SCM_I_STRING_LENGTH (prefix);
len = scm_i_string_length (prefix);
if (len > MAX_PREFIX_LENGTH)
name = SCM_MUST_MALLOC (MAX_PREFIX_LENGTH + SCM_INTBUFLEN);
strncpy (name, SCM_I_STRING_CHARS (prefix), len);
strncpy (name, scm_i_string_chars (prefix), len);
}
if (SCM_UNBNDP (obarray))
@ -1112,7 +1112,7 @@ scm_c_string2str (SCM obj, char *str, size_t *lenp)
{
char *result = scm_to_locale_string (obj);
if (lenp)
*lenp = SCM_I_STRING_LENGTH (obj);
*lenp = scm_i_string_length (obj);
return result;
}
else
@ -1140,6 +1140,25 @@ scm_c_substring2str (SCM obj, char *str, size_t start, size_t len)
return str;
}
/* Converts the given Scheme symbol OBJ into a C string, containing a copy
of OBJ's content with a trailing null byte. If LENP is non-NULL, set
*LENP to the string's length.
When STR is non-NULL it receives the copy and is returned by the function,
otherwise new memory is allocated and the caller is responsible for
freeing it via free(). If out of memory, NULL is returned.
Note that Scheme symbols may contain arbitrary data, including null
characters. This means that null termination is not a reliable way to
determine the length of the returned value. However, the function always
copies the complete contents of OBJ, and sets *LENP to the length of the
scheme symbol (if LENP is non-null). */
char *
scm_c_symbol2str (SCM obj, char *str, size_t *lenp)
{
return scm_c_string2str (scm_symbol_to_string (obj), str, lenp);
}
double
scm_truncate (double x)
{
@ -1156,6 +1175,23 @@ scm_round (double x)
return scm_c_round (x);
}
char *
SCM_SYMBOL_CHARS (SCM sym)
{
scm_c_issue_deprecation_warning
("SCM_SYMBOL_CHARS is deprecated. Use scm_symbol_to_string.");
return scm_i_symbol_chars (sym);
}
size_t
SCM_SYMBOL_LENGTH (SCM sym)
{
scm_c_issue_deprecation_warning
("SCM_SYMBOL_LENGTH is deprecated. Use scm_symbol_to_string.");
return scm_c_symbol_length (sym);
}
void
scm_i_init_deprecated ()
{

View file

@ -404,6 +404,8 @@ SCM_API char *scm_c_string2str (SCM obj, char *str, size_t *lenp);
*/
SCM_API char *scm_c_substring2str (SCM obj, char *str, size_t start, size_t len);
SCM_API char *scm_c_symbol2str (SCM obj, char *str, size_t *lenp);
/* Deprecated because the names belong to what is now
scm_truncate_number and scm_round_number.
*/
@ -438,6 +440,12 @@ SCM_API double scm_round (double x);
&& (size_t) c_end <= SCM_STRING_LENGTH (str));\
} while (0)
/* Deprecated because we don't want people to access the internals of
symbols directly.
*/
SCM_API char *SCM_SYMBOL_CHARS (SCM sym);
SCM_API size_t SCM_SYMBOL_LENGTH (SCM sym);
void scm_i_init_deprecated (void);

View file

@ -2,7 +2,7 @@
discourage something, move it here when that is feasible.
*/
/* Copyright (C) 2003 Free Software Foundation, Inc.
/* Copyright (C) 2003, 2004 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -82,6 +82,80 @@ scm_make_complex (double x, double y)
return scm_c_make_rectangular (x, y);
}
SCM
scm_mem2symbol (const char *mem, size_t len)
{
return scm_from_locale_symboln (mem, len);
}
SCM
scm_mem2uninterned_symbol (const char *mem, size_t len)
{
return scm_make_symbol (scm_from_locale_stringn (mem, len));
}
SCM
scm_str2symbol (const char *str)
{
return scm_from_locale_symbol (str);
}
/* This function must only be applied to memory obtained via malloc,
since the GC is going to apply `free' to it when the string is
dropped.
Also, s[len] must be `\0', since we promise that strings are
null-terminated. Perhaps we could handle non-null-terminated
strings by claiming they're shared substrings of a string we just
made up. */
SCM
scm_take_str (char *s, size_t len)
{
SCM answer = scm_from_locale_stringn (s, len);
free (s);
return answer;
}
/* `s' must be a malloc'd string. See scm_take_str. */
SCM
scm_take0str (char *s)
{
return scm_take_locale_string (s);
}
SCM
scm_mem2string (const char *src, size_t len)
{
return scm_from_locale_stringn (src, len);
}
SCM
scm_str2string (const char *src)
{
return scm_from_locale_string (src);
}
SCM
scm_makfrom0str (const char *src)
{
if (!src) return SCM_BOOL_F;
return scm_from_locale_string (src);
}
SCM
scm_makfrom0str_opt (const char *src)
{
return scm_makfrom0str (src);
}
SCM
scm_allocate_string (size_t len)
{
return scm_i_make_string (len, NULL);
}
void
scm_i_init_discouraged (void)
{

View file

@ -113,6 +113,41 @@ SCM_API double scm_num2double (SCM num, unsigned long int pos,
SCM_API SCM scm_make_complex (double x, double y);
/* Discouraged because they don't make the encoding explicit.
*/
SCM_API SCM scm_mem2symbol (const char *mem, size_t len);
SCM_API SCM scm_mem2uninterned_symbol (const char *mem, size_t len);
SCM_API SCM scm_str2symbol (const char *str);
SCM_API SCM scm_take_str (char *s, size_t len);
SCM_API SCM scm_take0str (char *s);
SCM_API SCM scm_mem2string (const char *src, size_t len);
SCM_API SCM scm_str2string (const char *src);
SCM_API SCM scm_makfrom0str (const char *src);
SCM_API SCM scm_makfrom0str_opt (const char *src);
/* Discouraged because scm_c_make_string has a better name and is more
consistent with make-string.
*/
SCM_API SCM scm_allocate_string (size_t len);
/* Discouraged because scm_is_symbol has a better name,
*/
#define SCM_SYMBOLP scm_is_symbol
/* Discouraged because the alternatives have the better names.
*/
#define SCM_SYMBOL_FUNC scm_symbol_fref
#define SCM_SET_SYMBOL_FUNC scm_symbol_fset_x
#define SCM_SYMBOL_PROPS scm_symbol_pref
#define SCM_SET_SYMBOL_PROPS scm_symbol_pset_x
/* Discouraged because there are better ways.
*/
#define SCM_SYMBOL_HASH scm_i_symbol_hash
#define SCM_SYMBOL_INTERNED_P(X) scm_i_symbol_is_interned
void scm_i_init_discouraged (void);
#endif /* SCM_ENABLE_DISCOURAGED == 1 */

View file

@ -1,4 +1,4 @@
/* Copyright (C) 1995,1996,1998,2000,2001 Free Software Foundation, Inc.
/* Copyright (C) 1995,1996,1998,2000,2001, 2004 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -19,6 +19,7 @@
#include <string.h>
#include <stdio.h>
#include "libguile/_scm.h"
#include "libguile/chars.h"
@ -33,12 +34,454 @@
/* {Strings}
*/
/* Stringbufs
*
* XXX - keeping an accurate refcount during GC seems to be quite
* tricky, so we just keep score of whether a stringbuf might be
* shared, not wether it definitely is.
*
* The scheme I (mvo) tried to keep an accurate reference count would
* recount all strings that point to a stringbuf during the mark-phase
* of the GC. This was done since one cannot access the stringbuf of
* a string when that string is freed (in order to decrease the
* reference count). The memory of the stringbuf might have been
* reused already for something completely different.
*
* This recounted worked for a small number of threads beating on
* cow-strings, but it failed randomly with more than 10 threads, say.
* I couldn't figure out what went wrong, so I used the conservative
* approach implemented below.
*
* A stringbuf needs to know its length, but only so that it can be
* reported when the stringbuf is freed.
*
* Stringbufs (and strings) are not stored very compactly: a stringbuf
* has room for about 2*sizeof(scm_t_bits)-1 bytes additional
* information. As a compensation, the code below is made more
* complicated by storing small strings inline in the double cell of a
* stringbuf. So we have fixstrings and bigstrings...
*/
#define STRINGBUF_F_SHARED 0x100
#define STRINGBUF_F_INLINE 0x200
#define STRINGBUF_TAG scm_tc7_stringbuf
#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
#define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
? STRINGBUF_INLINE_CHARS (buf) \
: STRINGBUF_OUTLINE_CHARS (buf))
#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
? STRINGBUF_INLINE_LENGTH (buf) \
: STRINGBUF_OUTLINE_LENGTH (buf))
#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
#define SET_STRINGBUF_SHARED(buf) \
(SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
#if SCM_DEBUG
static size_t lenhist[1001];
#endif
static SCM
make_stringbuf (size_t len)
{
/* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
scm_i_symbol_chars, all stringbufs are null-terminated. Once
SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
has been changed for scm_i_symbol_chars, this null-termination
can be dropped.
*/
#if SCM_DEBUG
if (len < 1000)
lenhist[len]++;
else
lenhist[1000]++;
#endif
if (len <= STRINGBUF_MAX_INLINE_LEN-1)
{
return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
0, 0, 0);
}
else
{
char *mem = scm_gc_malloc (len+1, "string");
mem[len] = '\0';
return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
(scm_t_bits) len, (scm_t_bits) 0);
}
}
SCM
scm_i_stringbuf_mark (SCM buf)
{
return SCM_BOOL_F;
}
void
scm_i_stringbuf_free (SCM buf)
{
if (!STRINGBUF_INLINE (buf))
scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
}
SCM_MUTEX (stringbuf_write_mutex);
/* Copy-on-write strings.
*/
#define STRING_TAG scm_tc7_string
#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
SCM
scm_i_make_string (size_t len, char **charsp)
{
SCM buf = make_stringbuf (len);
SCM res;
if (charsp)
*charsp = STRINGBUF_CHARS (buf);
res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
(scm_t_bits)0, (scm_t_bits) len);
return res;
}
static void
validate_substring_args (SCM str, size_t start, size_t end)
{
if (!IS_STRING (str))
scm_wrong_type_arg_msg (NULL, 0, str, "string");
if (start > STRING_LENGTH (str))
scm_out_of_range (NULL, scm_from_size_t (start));
if (end > STRING_LENGTH (str) || end < start)
scm_out_of_range (NULL, scm_from_size_t (end));
}
SCM
scm_i_substring (SCM str, size_t start, size_t end)
{
SCM buf = STRING_STRINGBUF (str);
scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
(scm_t_bits)start, (scm_t_bits) end - start);
}
SCM
scm_i_substring_copy (SCM str, size_t start, size_t end)
{
size_t len = end - start;
SCM buf = STRING_STRINGBUF (str);
SCM my_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (my_buf), STRINGBUF_CHARS (buf) + start, len);
scm_remember_upto_here_1 (buf);
return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
(scm_t_bits)0, (scm_t_bits) len);
}
SCM
scm_c_substring (SCM str, size_t start, size_t end)
{
validate_substring_args (str, start, end);
return scm_i_substring (str, start, end);
}
SCM
scm_c_substring_copy (SCM str, size_t start, size_t end)
{
validate_substring_args (str, start, end);
return scm_i_substring_copy (str, start, end);
}
/* Mutation-sharing substrings
*/
#define SH_STRING_TAG (scm_tc7_string + 0x100)
#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
/* START and LENGTH as for STRINGs. */
#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
SCM
scm_i_substring_shared (SCM str, size_t start, size_t end)
{
if (start == 0 && end == STRING_LENGTH (str))
return str;
else
{
SCM res = scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
(scm_t_bits)start, (scm_t_bits) end - start);
return res;
}
}
SCM
scm_c_substring_shared (SCM str, size_t start, size_t end)
{
validate_substring_args (str, start, end);
return scm_i_substring_shared (str, start, end);
}
SCM
scm_i_string_mark (SCM str)
{
if (IS_SH_STRING (str))
return SH_STRING_STRING (str);
else
return STRING_STRINGBUF (str);
}
void
scm_i_string_free (SCM str)
{
}
/* Internal accessors
*/
size_t
scm_i_string_length (SCM str)
{
return STRING_LENGTH (str);
}
const char *
scm_i_string_chars (SCM str)
{
SCM buf;
size_t start = STRING_START(str);
if (IS_SH_STRING (str))
{
str = SH_STRING_STRING (str);
start += STRING_START (str);
}
buf = STRING_STRINGBUF (str);
return STRINGBUF_CHARS (buf) + start;
}
char *
scm_i_string_writable_chars (SCM str)
{
SCM buf;
size_t start = STRING_START(str);
if (IS_SH_STRING (str))
{
str = SH_STRING_STRING (str);
start += STRING_START (str);
}
buf = STRING_STRINGBUF (str);
scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
if (STRINGBUF_SHARED (buf))
{
/* Clone stringbuf. For this, we put all threads to sleep.
*/
size_t len = STRING_LENGTH (str);
SCM new_buf;
scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
new_buf = make_stringbuf (len);
memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + STRING_START (str), len);
scm_i_thread_put_to_sleep ();
SET_STRING_STRINGBUF (str, new_buf);
start -= STRING_START (str);
SET_STRING_START (str, 0);
scm_i_thread_wake_up ();
buf = new_buf;
scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
}
return STRINGBUF_CHARS (buf) + start;
}
void
scm_i_string_stop_writing (void)
{
scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
}
/* Symbols.
Basic symbol creation and accessing is done here, the rest is in
symbols.[hc]. This has been done to keep stringbufs and the
internals of strings and string-like objects confined to this file.
*/
#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
SCM
scm_i_make_symbol (SCM name, unsigned long hash, SCM props)
{
SCM buf;
size_t start = STRING_START (name);
size_t length = STRING_LENGTH (name);
if (IS_SH_STRING (name))
{
name = SH_STRING_STRING (name);
start += STRING_START (name);
}
buf = SYMBOL_STRINGBUF (name);
if (start == 0 && length == STRINGBUF_LENGTH (buf))
{
/* reuse buf. */
scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
}
else
{
/* make new buf. */
SCM new_buf = make_stringbuf (length);
memcpy (STRINGBUF_CHARS (new_buf),
STRINGBUF_CHARS (buf) + start, length);
buf = new_buf;
}
return scm_double_cell (scm_tc7_symbol, SCM_UNPACK (buf),
(scm_t_bits) hash, SCM_UNPACK (props));
}
size_t
scm_i_symbol_length (SCM sym)
{
return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
}
const char *
scm_i_symbol_chars (SCM sym)
{
SCM buf = SYMBOL_STRINGBUF (sym);
return STRINGBUF_CHARS (buf);
}
SCM
scm_i_symbol_mark (SCM sym)
{
scm_gc_mark (SYMBOL_STRINGBUF (sym));
return SCM_CELL_OBJECT_3 (sym);
}
void
scm_i_symbol_free (SCM sym)
{
}
SCM
scm_i_symbol_substring (SCM sym, size_t start, size_t end)
{
SCM buf = SYMBOL_STRINGBUF (sym);
scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
SET_STRINGBUF_SHARED (buf);
scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
(scm_t_bits)start, (scm_t_bits) end - start);
}
/* Debugging
*/
#if SCM_DEBUG
SCM scm_sys_string_dump (SCM);
SCM scm_sys_symbol_dump (SCM);
SCM scm_sys_stringbuf_hist (void);
SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
(SCM str),
"")
#define FUNC_NAME s_scm_sys_string_dump
{
SCM_VALIDATE_STRING (1, str);
fprintf (stderr, "%p:\n", str);
fprintf (stderr, " start: %u\n", STRING_START (str));
fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
if (IS_SH_STRING (str))
{
fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
fprintf (stderr, "\n");
scm_sys_string_dump (SH_STRING_STRING (str));
}
else
{
SCM buf = STRING_STRINGBUF (str);
fprintf (stderr, " buf: %p\n", buf);
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
}
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
(SCM sym),
"")
#define FUNC_NAME s_scm_sys_symbol_dump
{
SCM_VALIDATE_SYMBOL (1, sym);
fprintf (stderr, "%p:\n", sym);
fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
{
SCM buf = SYMBOL_STRINGBUF (sym);
fprintf (stderr, " buf: %p\n", buf);
fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
}
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
(void),
"")
#define FUNC_NAME s_scm_sys_string_dump
{
int i;
for (i = 0; i < 1000; i++)
if (lenhist[i])
fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
fprintf (stderr, ">999: %u\n", lenhist[1000]);
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
#endif
SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
(SCM obj),
"Return @code{#t} if @var{obj} is a string, else @code{#f}.")
#define FUNC_NAME s_scm_string_p
{
return scm_from_bool (SCM_I_STRINGP (obj));
return scm_from_bool (IS_STRING (obj));
}
#undef FUNC_NAME
@ -53,126 +496,35 @@ SCM_DEFINE (scm_string, "string", 0, 0, 1,
#define FUNC_NAME s_scm_string
{
SCM result;
size_t len;
char *data;
{
long i = scm_ilength (chrs);
SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
result = scm_allocate_string (i);
len = i;
}
{
unsigned char *data = SCM_I_STRING_UCHARS (result);
result = scm_i_make_string (len, &data);
while (len > 0 && SCM_CONSP (chrs))
{
SCM elt = SCM_CAR (chrs);
while (!SCM_NULLP (chrs))
{
SCM elt = SCM_CAR (chrs);
SCM_VALIDATE_CHAR (SCM_ARGn, elt);
*data++ = SCM_CHAR (elt);
chrs = SCM_CDR (chrs);
len--;
}
if (len > 0)
scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
if (!SCM_NULLP (chrs))
scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
SCM_VALIDATE_CHAR (SCM_ARGn, elt);
*data++ = SCM_CHAR (elt);
chrs = SCM_CDR (chrs);
}
}
return result;
}
#undef FUNC_NAME
/* converts C scm_array of strings to SCM scm_list of strings. */
/* If argc < 0, a null terminated scm_array is assumed. */
SCM
scm_makfromstrs (int argc, char **argv)
{
int i = argc;
SCM lst = SCM_EOL;
if (0 > i)
for (i = 0; argv[i]; i++);
while (i--)
lst = scm_cons (scm_mem2string (argv[i], strlen (argv[i])), lst);
return lst;
}
/* This function must only be applied to memory obtained via malloc,
since the GC is going to apply `free' to it when the string is
dropped.
Also, s[len] must be `\0', since we promise that strings are
null-terminated. Perhaps we could handle non-null-terminated
strings by claiming they're shared substrings of a string we just
made up. */
SCM
scm_take_str (char *s, size_t len)
#define FUNC_NAME "scm_take_str"
{
SCM answer;
SCM_ASSERT_RANGE (2, scm_from_ulong (len), len <= SCM_STRING_MAX_LENGTH);
answer = scm_cell (SCM_I_MAKE_STRING_TAG (len), (scm_t_bits) s);
scm_gc_register_collectable_memory (s, len+1, "string");
return answer;
}
#undef FUNC_NAME
/* `s' must be a malloc'd string. See scm_take_str. */
SCM
scm_take0str (char *s)
{
return scm_take_locale_string (s);
}
SCM
scm_mem2string (const char *src, size_t len)
{
return scm_from_locale_stringn (src, len);
}
SCM
scm_str2string (const char *src)
{
return scm_from_locale_string (src);
}
SCM
scm_makfrom0str (const char *src)
{
if (!src) return SCM_BOOL_F;
return scm_from_locale_string (src);
}
SCM
scm_makfrom0str_opt (const char *src)
{
return scm_makfrom0str (src);
}
SCM
scm_allocate_string (size_t len)
#define FUNC_NAME "scm_allocate_string"
{
char *mem;
SCM s;
SCM_ASSERT_RANGE (1, scm_from_size_t (len), len <= SCM_STRING_MAX_LENGTH);
mem = (char *) scm_gc_malloc (len + 1, "string");
mem[len] = 0;
s = scm_cell (SCM_I_MAKE_STRING_TAG (len), (scm_t_bits) mem);
return s;
}
#undef FUNC_NAME
SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
(SCM k, SCM chr),
"Return a newly allocated string of\n"
@ -181,34 +533,45 @@ SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
"of the @var{string} are unspecified.")
#define FUNC_NAME s_scm_make_string
{
size_t i = scm_to_unsigned_integer (k, 0, SCM_STRING_MAX_LENGTH);
SCM res = scm_allocate_string (i);
return scm_c_make_string (scm_to_size_t (k), chr);
}
#undef FUNC_NAME
SCM
scm_c_make_string (size_t len, SCM chr)
#define FUNC_NAME NULL
{
char *dst;
SCM res = scm_i_make_string (len, &dst);
if (!SCM_UNBNDP (chr))
{
unsigned char *dst;
SCM_VALIDATE_CHAR (2, chr);
dst = SCM_I_STRING_UCHARS (res);
memset (dst, SCM_CHAR (chr), i);
SCM_VALIDATE_CHAR (0, chr);
memset (dst, SCM_CHAR (chr), len);
}
return res;
}
#undef FUNC_NAME
SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
(SCM string),
"Return the number of characters in @var{string}.")
#define FUNC_NAME s_scm_string_length
{
SCM_VALIDATE_STRING (1, string);
return scm_from_size_t (SCM_I_STRING_LENGTH (string));
return scm_from_size_t (STRING_LENGTH (string));
}
#undef FUNC_NAME
size_t
scm_c_string_length (SCM string)
{
if (!IS_STRING (string))
scm_wrong_type_arg_msg (NULL, 0, string, "string");
return STRING_LENGTH (string);
}
SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
(SCM str, SCM k),
"Return character @var{k} of @var{str} using zero-origin\n"
@ -218,11 +581,18 @@ SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
unsigned long idx;
SCM_VALIDATE_STRING (1, str);
idx = scm_to_unsigned_integer (k, 0, SCM_I_STRING_LENGTH(str)-1);
return SCM_MAKE_CHAR (SCM_I_STRING_UCHARS (str)[idx]);
idx = scm_to_unsigned_integer (k, 0, scm_i_string_length (str)-1);
return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
}
#undef FUNC_NAME
SCM
scm_c_string_ref (SCM str, size_t p)
{
if (p >= scm_i_string_length (str))
scm_out_of_range (NULL, scm_from_size_t (p));
return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
}
SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
(SCM str, SCM k, SCM chr),
@ -234,13 +604,28 @@ SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
unsigned long idx;
SCM_VALIDATE_STRING (1, str);
idx = scm_to_unsigned_integer (k, 0, SCM_I_STRING_LENGTH(str)-1);
idx = scm_to_unsigned_integer (k, 0, scm_i_string_length(str)-1);
SCM_VALIDATE_CHAR (3, chr);
SCM_I_STRING_UCHARS (str)[idx] = SCM_CHAR (chr);
{
char *dst = scm_i_string_writable_chars (str);
dst[idx] = SCM_CHAR (chr);
scm_i_string_stop_writing ();
}
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
void
scm_c_string_set_x (SCM str, size_t p, SCM chr)
{
if (p >= scm_i_string_length (str))
scm_out_of_range (NULL, scm_from_size_t (p));
{
char *dst = scm_i_string_writable_chars (str);
dst[p] = SCM_CHAR (chr);
scm_i_string_stop_writing ();
}
}
SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
(SCM str, SCM start, SCM end),
@ -252,24 +637,64 @@ SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
"0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
#define FUNC_NAME s_scm_substring
{
unsigned long int from;
unsigned long int to;
SCM substr;
size_t len, from, to;
SCM_VALIDATE_STRING (1, str);
from = scm_to_unsigned_integer (start, 0, SCM_I_STRING_LENGTH(str));
len = scm_i_string_length (str);
from = scm_to_unsigned_integer (start, 0, len);
if (SCM_UNBNDP (end))
to = SCM_I_STRING_LENGTH(str);
to = len;
else
to = scm_to_unsigned_integer (end, from, SCM_I_STRING_LENGTH(str));
substr = scm_allocate_string (to - from);
memcpy (SCM_I_STRING_CHARS (substr), SCM_I_STRING_CHARS (str) + from,
to - from);
scm_remember_upto_here_1 (str);
return substr;
to = scm_to_unsigned_integer (end, from, len);
return scm_i_substring (str, from, to);
}
#undef FUNC_NAME
SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
(SCM str, SCM start, SCM end),
"Return a newly allocated string formed from the characters\n"
"of @var{str} beginning with index @var{start} (inclusive) and\n"
"ending with index @var{end} (exclusive).\n"
"@var{str} must be a string, @var{start} and @var{end} must be\n"
"exact integers satisfying:\n\n"
"0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
#define FUNC_NAME s_scm_substring_copy
{
size_t len, from, to;
SCM_VALIDATE_STRING (1, str);
len = scm_i_string_length (str);
from = scm_to_unsigned_integer (start, 0, len);
if (SCM_UNBNDP (end))
to = len;
else
to = scm_to_unsigned_integer (end, from, len);
return scm_i_substring_copy (str, from, to);
}
#undef FUNC_NAME
SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
(SCM str, SCM start, SCM end),
"Return string that indirectly refers to the characters\n"
"of @var{str} beginning with index @var{start} (inclusive) and\n"
"ending with index @var{end} (exclusive).\n"
"@var{str} must be a string, @var{start} and @var{end} must be\n"
"exact integers satisfying:\n\n"
"0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
#define FUNC_NAME s_scm_substring_shared
{
size_t len, from, to;
SCM_VALIDATE_STRING (1, str);
len = scm_i_string_length (str);
from = scm_to_unsigned_integer (start, 0, len);
if (SCM_UNBNDP (end))
to = len;
else
to = scm_to_unsigned_integer (end, from, len);
return scm_i_substring_shared (str, from, to);
}
#undef FUNC_NAME
SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
(SCM args),
@ -287,15 +712,16 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
{
s = SCM_CAR (l);
SCM_VALIDATE_STRING (SCM_ARGn, s);
i += SCM_I_STRING_LENGTH (s);
i += scm_i_string_length (s);
}
res = scm_allocate_string (i);
data = SCM_I_STRING_CHARS (res);
res = scm_i_make_string (i, &data);
for (l = args; !SCM_NULLP (l); l = SCM_CDR (l))
{
s = SCM_CAR (l);
memcpy (data, SCM_I_STRING_CHARS (s), SCM_I_STRING_LENGTH (s));
data += SCM_I_STRING_LENGTH (s);
SCM_VALIDATE_STRING (SCM_ARGn, s);
size_t len = scm_i_string_length (s);
memcpy (data, scm_i_string_chars (s), len);
data += len;
scm_remember_upto_here_1 (s);
}
return res;
@ -305,7 +731,7 @@ SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
int
scm_is_string (SCM obj)
{
return SCM_I_STRINGP (obj);
return IS_STRING (obj);
}
SCM
@ -316,8 +742,7 @@ scm_from_locale_stringn (const char *str, size_t len)
if (len == (size_t)-1)
len = strlen (str);
res = scm_allocate_string (len);
dst = SCM_I_STRING_CHARS (res);
res = scm_i_make_string (len, &dst);
memcpy (dst, str, len);
return res;
}
@ -348,17 +773,14 @@ SCM
scm_take_locale_string (char *str)
{
size_t len = strlen (str);
SCM res;
SCM buf, res;
if (len > SCM_STRING_MAX_LENGTH)
{
free (str);
scm_out_of_range (NULL, scm_from_size_t (len));
}
res = scm_cell (SCM_I_MAKE_STRING_TAG (len), (scm_t_bits) str);
buf = scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
(scm_t_bits) len, (scm_t_bits) 0);
res = scm_double_cell (STRING_TAG,
SCM_UNPACK (buf),
(scm_t_bits) 0, (scm_t_bits) len);
scm_gc_register_collectable_memory (str, len+1, "string");
return res;
}
@ -368,11 +790,11 @@ scm_to_locale_stringn (SCM str, size_t *lenp)
char *res;
size_t len;
if (!SCM_I_STRINGP (str))
if (!scm_is_string (str))
scm_wrong_type_arg_msg (NULL, 0, str, "string");
len = SCM_I_STRING_LENGTH (str);
len = scm_i_string_length (str);
res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
memcpy (res, SCM_I_STRING_CHARS (str), len);
memcpy (res, scm_i_string_chars (str), len);
if (lenp == NULL)
{
res[len] = '\0';
@ -402,14 +824,28 @@ scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
{
size_t len;
if (!SCM_I_STRINGP (str))
if (!scm_is_string (str))
scm_wrong_type_arg_msg (NULL, 0, str, "string");
len = SCM_I_STRING_LENGTH (str);
memcpy (buf, SCM_I_STRING_CHARS (str), (len > max_len)? max_len : len);
len = scm_i_string_length (str);
memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
scm_remember_upto_here_1 (str);
return len;
}
/* converts C scm_array of strings to SCM scm_list of strings. */
/* If argc < 0, a null terminated scm_array is assumed. */
SCM
scm_makfromstrs (int argc, char **argv)
{
int i = argc;
SCM lst = SCM_EOL;
if (0 > i)
for (i = 0; argv[i]; i++);
while (i--)
lst = scm_cons (scm_from_locale_string (argv[i]), lst);
return lst;
}
/* Return a newly allocated array of char pointers to each of the strings
in args, with a terminating NULL pointer. */
@ -468,10 +904,50 @@ scm_i_get_substring_spec (size_t len,
*cend = scm_to_unsigned_integer (end, *cstart, len);
}
#if SCM_ENABLE_DEPRECATED
int
SCM_STRINGP (SCM str)
{
scm_c_issue_deprecation_warning
("SCM_STRINGP is deprecated. Use scm_is_string instead.");
/* We don't accept shared substrings here since they are not
null-terminated.
*/
return IS_STRING (str) && !IS_SH_STRING (str);
}
char *
SCM_STRING_CHARS (SCM str)
{
char *chars;
scm_c_issue_deprecation_warning
("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
/* The following is wrong, of course...
*/
chars = scm_i_string_writable_chars (str);
scm_i_string_stop_writing ();
return chars;
}
size_t
SCM_STRING_LENGTH (SCM str)
{
scm_c_issue_deprecation_warning
("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
return scm_c_string_length (str);
}
#endif
void
scm_init_strings ()
{
scm_nullstr = scm_allocate_string (0);
scm_nullstr = scm_i_make_string (0, NULL);
#include "libguile/strings.x"
}

View file

@ -3,7 +3,7 @@
#ifndef SCM_STRINGS_H
#define SCM_STRINGS_H
/* Copyright (C) 1995,1996,1997,1998,2000,2001 Free Software Foundation, Inc.
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2004 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -26,20 +26,56 @@
#define SCM_STRING_MAX_LENGTH ((SCM_T_BITS_MAX-255)/256)
/* String representation.
#define SCM_I_MAKE_STRING_TAG(l) ((((scm_t_bits) (l)) << 8) + scm_tc7_string)
#define SCM_I_STRINGP(x) (!SCM_IMP (x) && (SCM_TYP7 (x) == scm_tc7_string))
#define SCM_I_STRING_UCHARS(x) ((unsigned char *) (SCM_CELL_WORD_1 (x)))
#define SCM_I_STRING_CHARS(x) ((char *) (SCM_CELL_WORD_1 (x)))
#define SCM_I_STRING_LENGTH(x) ((size_t) (SCM_CELL_WORD_0 (x) >> 8))
A string is a piece of a stringbuf. A stringbuf can be used by
more than one string. When a string is written to and the
stringbuf of that string is used by more than one string, a new
stringbuf is created. That is, strings are copy-on-write. This
behavior can be used to make the substring operation quite
efficient.
#define SCM_STRINGP SCM_I_STRINGP
#define SCM_STRING_CHARS SCM_I_STRING_CHARS
#define SCM_STRING_UCHARS SCM_I_STRING_UCHARS
#define SCM_STRING_LENGTH SCM_I_STRING_LENGTH
The implementation is tuned so that mutating a string is costly,
but just reading it is cheap and lock-free.
There are also mutation-sharing strings. They refer to a part of
an ordinary string. Writing to a mutation-sharing string just
writes to the ordinary string.
Internal, low level interface to the character arrays
- Use scm_i_string_chars to get a pointer to the byte array of a
string for reading. Use scm_i_string_length to get the number of
bytes in that array. The array is not null-terminated.
- The array is valid as long as the corresponding SCM object is
protected but only until the next SCM_TICK. During such a 'safe
point', strings might change their representation.
- Use scm_i_string_writable_chars to get the same pointer as with
scm_i_string_chars, but for reading and writing. This is a
potentially costly operation since it implements the
copy-on-write behavior. When done with the writing, call
scm_i_string_stop_writing. You must do this before the next
SCM_TICK. (This means, before calling almost any other scm_
function and you can't allow throws, of course.)
- New strings can be created with scm_i_make_string. This gives
access to a writable pointer that remains valid as long as nobody
else makes a copy-on-write substring of the string. Do not call
scm_i_string_stop_writing for this pointer.
Legacy interface
- SCM_STRINGP returns false for sh-strings.
- SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately
calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH
is the same as scm_i_string_length. SCM_STRINGP will only return
true for strings that are null-terminated when accessed with
SCM_STRING_CHARS.
*/
SCM_API SCM scm_string_p (SCM x);
SCM_API SCM scm_string (SCM chrs);
@ -48,16 +84,17 @@ SCM_API SCM scm_string_length (SCM str);
SCM_API SCM scm_string_ref (SCM str, SCM k);
SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
SCM_API SCM scm_substring (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
SCM_API SCM scm_makfromstrs (int argc, char **argv);
SCM_API SCM scm_take_str (char *s, size_t len);
SCM_API SCM scm_take0str (char *s);
SCM_API SCM scm_mem2string (const char *src, size_t len);
SCM_API SCM scm_str2string (const char *src);
SCM_API SCM scm_makfrom0str (const char *src);
SCM_API SCM scm_makfrom0str_opt (const char *src);
SCM_API SCM scm_allocate_string (size_t len);
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str);
SCM_API SCM scm_c_string_ref (SCM str, size_t pos);
SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr);
SCM_API SCM scm_c_substring (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
SCM_API int scm_is_string (SCM x);
SCM_API SCM scm_from_locale_string (const char *str);
@ -68,6 +105,35 @@ SCM_API char *scm_to_locale_string (SCM str);
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
SCM_API SCM scm_makfromstrs (int argc, char **argv);
/* internal accessor functions. Arguments must be valid. */
SCM_API SCM scm_i_make_string (size_t len, char **datap);
SCM_API SCM scm_i_substring (SCM str, size_t start, size_t end);
SCM_API SCM scm_i_substring_shared (SCM str, size_t start, size_t end);
SCM_API SCM scm_i_substring_copy (SCM str, size_t start, size_t end);
SCM_API size_t scm_i_string_length (SCM str);
SCM_API const char *scm_i_string_chars (SCM str);
SCM_API char *scm_i_string_writable_chars (SCM str);
SCM_API void scm_i_string_stop_writing (void);
/* internal functions related to symbols. */
SCM_API SCM scm_i_make_symbol (SCM name, unsigned long hash, SCM props);
SCM_API const char *scm_i_symbol_chars (SCM sym);
SCM_API size_t scm_i_symbol_length (SCM sym);
SCM_API SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
/* internal GC functions. */
SCM_API SCM scm_i_string_mark (SCM str);
SCM_API SCM scm_i_stringbuf_mark (SCM buf);
SCM_API SCM scm_i_symbol_mark (SCM buf);
SCM_API void scm_i_string_free (SCM str);
SCM_API void scm_i_stringbuf_free (SCM buf);
SCM_API void scm_i_symbol_free (SCM sym);
/* internal utility functions. */
SCM_API char **scm_i_allocate_string_pointers (SCM list);
@ -76,6 +142,17 @@ SCM_API void scm_i_get_substring_spec (size_t len,
SCM start, size_t *cstart,
SCM end, size_t *cend);
/* deprecated stuff */
#if SCM_ENABLE_DEPRECATED
SCM_API int SCM_STRINGP (SCM obj);
SCM_API char *SCM_STRING_CHARS (SCM str);
SCM_API size_t SCM_STRING_LENGTH (SCM str);
#define SCM_STRING_UCHARS ((unsigned char *)SCM_STRING_CHARS (str))
#endif
SCM_API void scm_init_strings (void);
#endif /* SCM_STRINGS_H */

View file

@ -69,7 +69,7 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
* without first creating an SCM string object. (This would have
* been necessary if we had used the hashtable API in hashtab.h.)
*
* 2. We can use the raw hash value stored in SCM_SYMBOL_HASH (sym)
* 2. We can use the raw hash value stored in scm_i_symbol_hash (sym)
* to speed up lookup.
*
* Both optimizations might be possible without breaking the
@ -79,12 +79,15 @@ SCM_DEFINE (scm_sys_symbols, "%symbols", 0, 0, 0,
unsigned long
scm_i_hash_symbol (SCM obj, unsigned long n, void *closure)
{
return SCM_SYMBOL_HASH (obj) % n;
return scm_i_symbol_hash (obj) % n;
}
SCM
scm_mem2symbol (const char *name, size_t len)
static SCM
scm_i_mem2symbol (SCM str)
{
const char *name = scm_i_string_chars (str);
size_t len = scm_i_string_length (str);
size_t raw_hash = scm_string_hash ((const unsigned char *) name, len) / 2;
size_t hash = raw_hash % SCM_HASHTABLE_N_BUCKETS (symbols);
@ -98,10 +101,10 @@ scm_mem2symbol (const char *name, size_t len)
l = SCM_CDR (l))
{
SCM sym = SCM_CAAR (l);
if (SCM_SYMBOL_HASH (sym) == raw_hash
&& SCM_SYMBOL_LENGTH (sym) == len)
if (scm_i_symbol_hash (sym) == raw_hash
&& scm_i_symbol_length (sym) == len)
{
char *chrs = SCM_SYMBOL_CHARS (sym);
const char *chrs = scm_i_symbol_chars (sym);
size_t i = len;
while (i != 0)
@ -120,11 +123,8 @@ scm_mem2symbol (const char *name, size_t len)
{
/* The symbol was not found - create it. */
SCM symbol = scm_double_cell (SCM_MAKE_SYMBOL_TAG (len),
(scm_t_bits) scm_gc_strndup (name, len,
"symbol"),
raw_hash,
SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL)));
SCM symbol = scm_i_make_symbol (str, raw_hash,
scm_cons (SCM_BOOL_F, SCM_EOL));
SCM slot = SCM_HASHTABLE_BUCKETS (symbols) [hash];
SCM cell = scm_cons (symbol, SCM_UNDEFINED);
@ -137,23 +137,17 @@ scm_mem2symbol (const char *name, size_t len)
}
}
SCM
scm_mem2uninterned_symbol (const char *name, size_t len)
static SCM
scm_i_mem2uninterned_symbol (SCM str)
{
const char *name = scm_i_string_chars (str);
size_t len = scm_i_string_length (str);
size_t raw_hash = (scm_string_hash ((const unsigned char *) name, len)/2
+ SCM_T_BITS_MAX/2 + 1);
return scm_double_cell (SCM_MAKE_SYMBOL_TAG (len),
(scm_t_bits) scm_gc_strndup (name, len,
"symbol"),
raw_hash,
SCM_UNPACK (scm_cons (SCM_BOOL_F, SCM_EOL)));
}
SCM
scm_str2symbol (const char *str)
{
return scm_mem2symbol (str, strlen (str));
return scm_i_make_symbol (str, raw_hash,
scm_cons (SCM_BOOL_F, SCM_EOL));
}
SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
@ -162,7 +156,7 @@ SCM_DEFINE (scm_symbol_p, "symbol?", 1, 0, 0,
"@code{#f}.")
#define FUNC_NAME s_scm_symbol_p
{
return scm_from_bool (SCM_SYMBOLP (obj));
return scm_from_bool (scm_is_symbol (obj));
}
#undef FUNC_NAME
@ -173,7 +167,7 @@ SCM_DEFINE (scm_symbol_interned_p, "symbol-interned?", 1, 0, 0,
#define FUNC_NAME s_scm_symbol_interned_p
{
SCM_VALIDATE_SYMBOL (1, symbol);
return scm_from_bool (SCM_SYMBOL_INTERNED_P (symbol));
return scm_from_bool (scm_i_symbol_is_interned (symbol));
}
#undef FUNC_NAME
@ -184,12 +178,8 @@ SCM_DEFINE (scm_make_symbol, "make-symbol", 1, 0, 0,
"calls to @code{string->symbol} will not return it.")
#define FUNC_NAME s_scm_make_symbol
{
SCM sym;
SCM_VALIDATE_STRING (1, name);
sym = scm_mem2uninterned_symbol (SCM_I_STRING_CHARS (name),
SCM_I_STRING_LENGTH (name));
scm_remember_upto_here_1 (name);
return sym;
return scm_i_mem2uninterned_symbol (name);
}
#undef FUNC_NAME
@ -220,11 +210,8 @@ SCM_DEFINE (scm_symbol_to_string, "symbol->string", 1, 0, 0,
"@end lisp")
#define FUNC_NAME s_scm_symbol_to_string
{
SCM str;
SCM_VALIDATE_SYMBOL (1, s);
str = scm_mem2string (SCM_SYMBOL_CHARS (s), SCM_SYMBOL_LENGTH (s));
scm_remember_upto_here_1 (s);
return str;
return scm_i_symbol_substring (s, 0, scm_i_symbol_length (s));
}
#undef FUNC_NAME
@ -253,12 +240,8 @@ SCM_DEFINE (scm_string_to_symbol, "string->symbol", 1, 0, 0,
"@end lisp")
#define FUNC_NAME s_scm_string_to_symbol
{
SCM sym;
SCM_VALIDATE_STRING (1, string);
sym = scm_mem2symbol (SCM_I_STRING_CHARS (string),
SCM_I_STRING_LENGTH (string));
scm_remember_upto_here_1 (string);
return sym;
return scm_i_mem2symbol (string);
}
#undef FUNC_NAME
@ -274,39 +257,23 @@ SCM_DEFINE (scm_gensym, "gensym", 0, 1, 0,
#define FUNC_NAME s_scm_gensym
{
static int gensym_counter = 0;
SCM suffix, name;
int n, n_digits;
char buf[SCM_INTBUFLEN];
char buf[MAX_PREFIX_LENGTH + SCM_INTBUFLEN];
char *name = buf;
size_t len;
if (SCM_UNBNDP (prefix))
{
name[0] = ' ';
name[1] = 'g';
len = 2;
}
else
{
SCM_VALIDATE_STRING (1, prefix);
len = SCM_I_STRING_LENGTH (prefix);
if (len > MAX_PREFIX_LENGTH)
name = scm_malloc (len + SCM_INTBUFLEN);
memcpy (name, SCM_I_STRING_CHARS (prefix), len);
scm_remember_upto_here_1 (prefix);
}
{
int n, n_digits;
prefix = scm_from_locale_string (" g");
/* mutex in case another thread looks and incs at the exact same moment */
scm_mutex_lock (&scm_i_misc_mutex);
n = gensym_counter++;
scm_mutex_unlock (&scm_i_misc_mutex);
/* mutex in case another thread looks and incs at the exact same moment */
scm_mutex_lock (&scm_i_misc_mutex);
n = gensym_counter++;
scm_mutex_unlock (&scm_i_misc_mutex);
n_digits = scm_iint2str (n, 10, &name[len]);
SCM res = scm_mem2symbol (name, len + n_digits);
if (name != buf)
free (name);
return res;
}
n_digits = scm_iint2str (n, 10, buf);
suffix = scm_from_locale_stringn (buf, n_digits);
name = scm_string_append (scm_list_2 (prefix, suffix));
return scm_string_to_symbol (name);
}
#undef FUNC_NAME
@ -316,7 +283,7 @@ SCM_DEFINE (scm_symbol_hash, "symbol-hash", 1, 0, 0,
#define FUNC_NAME s_scm_symbol_hash
{
SCM_VALIDATE_SYMBOL (1, symbol);
return scm_from_ulong (SCM_SYMBOL_HASH (symbol));
return scm_from_ulong (scm_i_symbol_hash (symbol));
}
#undef FUNC_NAME
@ -326,7 +293,7 @@ SCM_DEFINE (scm_symbol_fref, "symbol-fref", 1, 0, 0,
#define FUNC_NAME s_scm_symbol_fref
{
SCM_VALIDATE_SYMBOL (1, s);
return SCM_SYMBOL_FUNC (s);
return SCM_CAR (SCM_CELL_OBJECT_3 (s));
}
#undef FUNC_NAME
@ -337,7 +304,7 @@ SCM_DEFINE (scm_symbol_pref, "symbol-pref", 1, 0, 0,
#define FUNC_NAME s_scm_symbol_pref
{
SCM_VALIDATE_SYMBOL (1, s);
return SCM_SYMBOL_PROPS (s);
return SCM_CDR (SCM_CELL_OBJECT_3 (s));
}
#undef FUNC_NAME
@ -348,7 +315,7 @@ SCM_DEFINE (scm_symbol_fset_x, "symbol-fset!", 2, 0, 0,
#define FUNC_NAME s_scm_symbol_fset_x
{
SCM_VALIDATE_SYMBOL (1, s);
SCM_SET_SYMBOL_FUNC (s, val);
SCM_SETCAR (SCM_CELL_OBJECT_3 (s), val);
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
@ -360,56 +327,22 @@ SCM_DEFINE (scm_symbol_pset_x, "symbol-pset!", 2, 0, 0,
#define FUNC_NAME s_scm_symbol_pset_x
{
SCM_VALIDATE_SYMBOL (1, s);
SCM_DEFER_INTS;
SCM_SET_SYMBOL_PROPS (s, val);
SCM_ALLOW_INTS;
SCM_SETCDR (SCM_CELL_OBJECT_3 (s), val);
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
/* Converts the given Scheme symbol OBJ into a C string, containing a copy
of OBJ's content with a trailing null byte. If LENP is non-NULL, set
*LENP to the string's length.
When STR is non-NULL it receives the copy and is returned by the function,
otherwise new memory is allocated and the caller is responsible for
freeing it via free(). If out of memory, NULL is returned.
Note that Scheme symbols may contain arbitrary data, including null
characters. This means that null termination is not a reliable way to
determine the length of the returned value. However, the function always
copies the complete contents of OBJ, and sets *LENP to the length of the
scheme symbol (if LENP is non-null). */
#define FUNC_NAME "scm_c_symbol2str"
char *
scm_c_symbol2str (SCM obj, char *str, size_t *lenp)
SCM
scm_from_locale_symbol (const char *sym)
{
size_t len;
SCM_ASSERT (SCM_SYMBOLP (obj), obj, SCM_ARG1, FUNC_NAME);
len = SCM_SYMBOL_LENGTH (obj);
if (str == NULL)
{
/* FIXME: Should we use exported wrappers for malloc (and free), which
* allow windows DLLs to call the correct freeing function? */
str = (char *) scm_malloc ((len + 1) * sizeof (char));
if (str == NULL)
return NULL;
}
memcpy (str, SCM_SYMBOL_CHARS (obj), len);
scm_remember_upto_here_1 (obj);
str[len] = '\0';
if (lenp != NULL)
*lenp = len;
return str;
return scm_string_to_symbol (scm_from_locale_string (sym));
}
#undef FUNC_NAME
SCM
scm_from_locale_symboln (const char *sym, size_t len)
{
return scm_string_to_symbol (scm_from_locale_stringn (sym, len));
}
void
scm_symbols_prehistory ()

View file

@ -3,7 +3,7 @@
#ifndef SCM_SYMBOLS_H
#define SCM_SYMBOLS_H
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003 Free Software Foundation, Inc.
/* Copyright (C) 1995,1996,1997,1998,2000,2001, 2003, 2004 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -25,39 +25,16 @@
#include "libguile/__scm.h"
/* SCM_SYMBOL_LENGTH(SYM) is the length of SYM's name in characters, and
* SCM_SYMBOL_CHARS(SYM) is the address of the first character of SYM's name.
*
* SCM_SYMBOL_HASH is a hash value for the symbol. It is also used to
* encode whether the symbol is interned or not. See
* SCM_SYMBOL_INTERNED_P.
*/
#define SCM_SYMBOLP(x) (!SCM_IMP (x) && (SCM_TYP7 (x) == scm_tc7_symbol))
#define SCM_SYMBOL_LENGTH(x) (((unsigned long) SCM_CELL_WORD_0 (x)) >> 8)
#define SCM_MAKE_SYMBOL_TAG(l) (((l) << 8) + scm_tc7_symbol)
#define SCM_SET_SYMBOL_LENGTH(s, l) (SCM_SET_CELL_WORD_0 ((s), SCM_MAKE_SYMBOL_TAG(l)))
#define SCM_SYMBOL_CHARS(x) ((char *) (SCM_CELL_WORD_1 (x)))
#define SCM_SET_SYMBOL_CHARS(s, c) (SCM_SET_CELL_WORD_1 ((s), (c)))
#define SCM_SYMBOL_HASH(X) ((unsigned long) SCM_CELL_WORD_2 (X))
#define SCM_SYMBOL_INTERNED_P(X) (SCM_SYMBOL_HASH(X) <= (SCM_T_BITS_MAX/2))
#define SCM_PROP_SLOTS(X) (SCM_CELL_OBJECT_3 (X))
#define SCM_SET_PROP_SLOTS(X, v) (SCM_SET_CELL_OBJECT_3 ((X), (v)))
#define SCM_SYMBOL_FUNC(X) (SCM_CAR (SCM_CELL_OBJECT_3 (X)))
#define SCM_SET_SYMBOL_FUNC(X, v) (SCM_SETCAR (SCM_CELL_OBJECT_3 (X), (v)))
#define SCM_SYMBOL_PROPS(X) (SCM_CDR (SCM_CELL_OBJECT_3 (X)))
#define SCM_SET_SYMBOL_PROPS(X, v) (SCM_SETCDR (SCM_CELL_OBJECT_3 (X), (v)))
#define scm_is_symbol(x) (!SCM_IMP (x) \
&& (SCM_TYP7 (x) == scm_tc7_symbol))
#define scm_i_symbol_hash(x) ((unsigned long) SCM_CELL_WORD_2 (x))
#define scm_i_symbol_is_interned(x) (scm_i_symbol_hash(x)<=(SCM_T_BITS_MAX/2))
#ifdef GUILE_DEBUG
SCM_API SCM scm_sys_symbols (void);
#endif
SCM_API unsigned long scm_i_hash_symbol (SCM obj, unsigned long n, void *closure);
SCM_API SCM scm_mem2symbol (const char*, size_t);
SCM_API SCM scm_mem2uninterned_symbol (const char *name, size_t len);
SCM_API SCM scm_str2symbol (const char*);
SCM_API SCM scm_symbol_p (SCM x);
SCM_API SCM scm_symbol_interned_p (SCM sym);
@ -73,7 +50,14 @@ SCM_API SCM scm_symbol_pset_x (SCM s, SCM val);
SCM_API SCM scm_symbol_hash (SCM s);
SCM_API SCM scm_gensym (SCM prefix);
SCM_API char *scm_c_symbol2str (SCM obj, char *str, size_t *lenp);
SCM_API SCM scm_from_locale_symbol (const char *str);
SCM_API SCM scm_from_locale_symboln (const char *str, size_t len);
/* internal functions. */
SCM_API unsigned long scm_i_hash_symbol (SCM obj, unsigned long n,
void *closure);
SCM_API void scm_symbols_prehistory (void);
SCM_API void scm_init_symbols (void);