1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00
guile/libguile/strings.h
Jan (janneke) Nieuwenhuizen 76950b4281 Support for x86_64-w64-mingw32.
On x86-64-MinGW the size of long is 4.  As long is used for
SCM_FIXNUM_BIT, that would mean incompatible .go files, and waste of
cell space.  So we would like to use long long, but the GMP interface
uses long.

To get around this, the x86-64-MinGW port now requires the use of
mini-GMP.  Mini-GMP has been changed to use intptr_t and uintptr_t.

Likewise, "integers.{h,c}" and "numbers.{h,c}" now use intptr_t instead
of scm_t_inum or long, and uintptr_t instead of unsigned long.

* configure.ac: When x86_64-w64-mingw32, require mini-GMP.
* libguile/mini-gmp.h: Use intptr_t instead of long, uintptr_t instead
of unsigned long throughout.
* libguile/mini-gmp.c: Likewise.
* libguile/scm.h (SCM_INTPTR_T_BIT): New define.
* libguile/numbers.h (SCM_FIXNUM_BIT): Use it.
* libguile/numbers.c (L1, UL1): New macros.  Use them thoughout instead
of 1L, 1UL.
(verify): Use SCM_INTPTR_T_BIT.
(verify): Use SCM_INTPTR_T_MAX and SCM_INTPTR_T_MIN.
(scm_from_inum): Remove macro.
Use intptr_t and uintptr_t instead of scm_t_inum or long, and unsigned
long.
* libguile/numbers.h (scm_from_intptr, scm_from_uintptr, scm_to_intptr,
scm_to_uintptr): New defines.
* libguile/integers.h: Use intptr_t and uintptr_t instead of scm_t_inum
and unsigned long.
* libguile/integers.c (L1) : New macro.  Use it thoughout instead of 1L.
Use intptr_t and uintptr_t instead of long and unsigned long.
(long_magnitude): Rename to...
(intptr_t_magnitude): ...this.  Use intptr_t, uintptr_t.
(negative_long): Rename to...
(negative_t_intptr): ...this.  Use uintptr_t, INTPTR_MIN.
(inum_magnitude): Use intptr_t.
(ulong_to_bignum): Rename to...
(uintptr_t_to_bignum): ...this.  Use uintptr_t.
(long_to_bignum): Rename to...
(intptr_t_to_bignum): ...this.  Use intptr_t.
(long_to_scm): Rename to...
(intptr_t_to_scm): ...this.  Use intptr_to_bignum.
(ulong_to_scm): Rename to...
(uintptr_t_to_scm): ...this.  Use uintptr_to_bignum.
(long_sign): Rename to..
(intptr_t_sign): ...this.  Use SCM_SIZEOF_INTPTR_T.
(bignum_cmp_long): Rename to...
(bignum_cmp_intptr_t): ...this.  Use uintptr_t.
* libguile/array-map.c (array_compare): Use uintptr_t instead of
unsigned long and intptr_t instead of long.
* libguile/arrays.c (make-shared-array): Use ssize_t instead of long.
* libguile/bytevectors.c (is_signed_int32, is_unsigned_int32)
[MINGW32 && __x86_64__]: Use ULL.
(twos_complement): Use uintptr_t instead of unsigned long.
* libguile/hash.c (JENKINS_LOOKUP3_HASHWORD2): Likewise.
(narrow_string_hash, wide_string_hash, scm_i_string_hash,
scm_i_locale_string_hash, scm_i_latin1_string_hash,
scm_i_utf8_string_hash, scm_i_struct_hash, scm_raw_ihashq,
scm_raw_ihash): Use and return uintptr_t instead of unsigned long.
(scm_hashv, scm_hash): Use SCM_UINTPTR_T_MAX.
* libguile/hash.h (scm_i_locale_string_hash, scm_i_latin1_string_hash,
scm_i_utf8_string_hash): update prototypes.
* libguile/scmsigs.c (sigaction): Use intptr_t instead of long.
* libguile/strings.c (scm_i_make_symbol, (scm_i_c_make_symbol): Use
uintptr_t instead of unsigned long.
* libguile/strings.h (scm_i_make_symbol, (scm_i_c_make_symbol): Update
declacations.
* libguile/srfi-60.c: Use scm_to_uintptr, scm_from_intptr and variants
throughout.
* libguile/symbols.c (symbol-hash): Use scm_from_uintptr.

Co-authored-by: Mike Gran <spk121@yahoo.com>
Co-authored-by: Andy Wingo <wingo@pobox.com>
2022-11-10 10:33:49 -08:00

309 lines
12 KiB
C
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef SCM_STRINGS_H
#define SCM_STRINGS_H
/* Copyright 1995-1998,2000-2001,2004-2006,2008-2011,2013,2015-2019,2021,2022
Free Software Foundation, Inc.
This file is part of Guile.
Guile is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Guile is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with Guile. If not, see
<https://www.gnu.org/licenses/>. */
#include <libguile/gc.h>
#include <libguile/error.h>
#include "libguile/inline.h"
#include <libguile/snarf.h>
#include "libguile/numbers.h"
/* String representation.
A string is a piece of a stringbuf. A stringbuf can be used by
more than one string. When a string is written to and the
stringbuf of that string is used by more than one string, a new
stringbuf is created. That is, strings are copy-on-write. This
behavior can be used to make the substring operation quite
efficient.
The implementation is tuned so that mutating a string is costly,
but just reading it is cheap and lock-free.
There are also mutation-sharing strings. They refer to a part of
an ordinary string. Writing to a mutation-sharing string just
writes to the ordinary string.
Internal, low level interface to the character arrays
- Use scm_i_is_narrow_string to determine is the string is narrow or
wide.
- Use scm_i_string_chars or scm_i_string_wide_chars to get a
pointer to the byte or scm_t_wchar array of a string for reading.
Use scm_i_string_length to get the number of characters in that
array. The array is not null-terminated.
- The array is valid as long as the corresponding SCM object is
protected but only until the next SCM_TICK. During such a 'safe
point', strings might change their representation.
- Use scm_i_string_start_writing to get a version of the string
ready for reading and writing. This is a potentially costly
operation since it implements the copy-on-write behavior. When
done with the writing, call scm_i_string_stop_writing. You must
do this before the next SCM_TICK. (This means, before calling
almost any other scm_ function and you can't allow throws, of
course.)
- New strings can be created with scm_i_make_string or
scm_i_make_wide_string. This gives access to a writable pointer
that remains valid as long as nobody else makes a copy-on-write
substring of the string. Do not call scm_i_string_stop_writing
for this pointer.
- Alternately, scm_i_string_ref and scm_i_string_set_x can be used
to read and write strings without worrying about whether the
string is narrow or wide. scm_i_string_set_x still needs to be
bracketed by scm_i_string_start_writing and
scm_i_string_stop_writing.
Legacy interface
- SCM_STRINGP is just scm_is_string.
- SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately
calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH
is the same as scm_i_string_length. SCM_STRING_CHARS will throw
an error for strings that are not null-terminated. There is
no wide version of this interface.
*/
/* A type indicating what strategy to take when string locale
conversion is unsuccessful. */
typedef enum
{
SCM_FAILED_CONVERSION_ERROR = SCM_ICONVEH_ERROR,
SCM_FAILED_CONVERSION_QUESTION_MARK = SCM_ICONVEH_QUESTION_MARK,
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE = SCM_ICONVEH_ESCAPE_SEQUENCE
} scm_t_string_failed_conversion_handler;
SCM_INTERNAL SCM scm_nullstr;
SCM_INTERNAL scm_t_string_failed_conversion_handler
scm_i_default_string_failed_conversion_handler (void);
SCM_INLINE int scm_is_string (SCM x);
SCM_API SCM scm_string_p (SCM x);
SCM_API SCM scm_string (SCM chrs);
SCM_API SCM scm_make_string (SCM k, SCM chr);
SCM_API SCM scm_string_length (SCM str);
SCM_API SCM scm_string_utf8_length (SCM str);
SCM_API SCM scm_string_bytes_per_char (SCM str);
SCM_API SCM scm_string_ref (SCM str, SCM k);
SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
SCM_API SCM scm_substring (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_read_only (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_string_utf8_length (SCM str);
SCM_API size_t scm_c_symbol_length (SCM sym);
SCM_API SCM scm_c_string_ref (SCM str, size_t pos);
SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr);
SCM_API SCM scm_c_substring (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
/* Use locale encoding for user input, user output, or interacting with
the C library. Use latin1 for ASCII, and for literals in source
code. Use utf8 for interaction with modern libraries which deal in
UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a
specific encoding. */
SCM_API SCM scm_from_locale_string (const char *str);
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
SCM_API SCM scm_take_locale_string (char *str);
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
SCM_API char *scm_to_locale_string (SCM str);
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_latin1_string (const char *str);
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
SCM_API char *scm_to_latin1_string (SCM str);
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
SCM_API char *scm_to_utf8_string (SCM str);
SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf8_string (const char *str);
SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len);
SCM_API scm_t_wchar *scm_to_utf32_string (SCM str);
SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str);
SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len);
SCM_API char *scm_to_port_string (SCM str, SCM port);
SCM_API char *scm_to_port_stringn (SCM str, size_t *lenp, SCM port);
SCM_API SCM scm_from_port_string (const char *str, SCM port);
SCM_API SCM scm_from_port_stringn (const char *str, size_t len, SCM port);
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
scm_t_string_failed_conversion_handler handler);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
SCM_API SCM scm_string_normalize_nfd (SCM str);
SCM_API SCM scm_string_normalize_nfkd (SCM str);
SCM_API SCM scm_string_normalize_nfc (SCM str);
SCM_API SCM scm_string_normalize_nfkc (SCM str);
SCM_API SCM scm_makfromstrs (int argc, char **argv);
/* Snarfing support. See snarf.h. */
#ifdef SCM_SUPPORT_STATIC_ALLOCATION
#define SCM_IMMUTABLE_STRINGBUF(c_name, contents) \
static SCM_UNUSED const \
struct \
{ \
scm_t_bits word_0; \
scm_t_bits word_1; \
const char buffer[sizeof (contents)]; \
} \
c_name = \
{ \
scm_tc7_stringbuf, \
sizeof (contents) - 1, \
contents \
}
#define SCM_IMMUTABLE_STRING(c_name, contents) \
SCM_IMMUTABLE_STRINGBUF (scm_i_paste (c_name, _stringbuf), contents); \
SCM_IMMUTABLE_DOUBLE_CELL (c_name, \
scm_tc7_ro_string, \
(scm_t_bits) &scm_i_paste (c_name, \
_stringbuf), \
(scm_t_bits) 0, \
(scm_t_bits) (sizeof (contents) - 1))
#endif /* SCM_SUPPORT_STATIC_ALLOCATION */
/* internal constants */
/* Type tag for read-only strings. */
#define scm_tc7_ro_string (scm_tc7_string + 0x200)
/* Flags for shared and wide strings. */
#define SCM_I_STRINGBUF_F_WIDE 0x400
#define SCM_I_STRINGBUF_F_MUTABLE 0x800
SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port,
scm_print_state *pstate);
/* internal accessor functions. Arguments must be valid. */
SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap,
int read_only_p);
SCM_INTERNAL SCM scm_i_make_wide_string (size_t len, scm_t_wchar **datap,
int read_only_p);
SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end);
SCM_INTERNAL size_t scm_i_string_length (SCM str);
SCM_INTERNAL int scm_i_string_is_mutable (SCM str);
SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str);
SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str);
SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str);
SCM_INTERNAL const void *scm_i_string_data (SCM str);
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void);
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
/* internal functions related to symbols. */
SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
uintptr_t hash);
SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
SCM_INTERNAL void scm_encoding_error (const char *subr, int err,
const char *message, SCM port, SCM chr);
SCM_INTERNAL void scm_decoding_error (const char *subr, int err,
const char *message, SCM port);
/* internal utility functions. */
SCM_INTERNAL char **scm_i_allocate_string_pointers (SCM list);
SCM_INTERNAL void scm_i_get_substring_spec (size_t len,
SCM start, size_t *cstart,
SCM end, size_t *cend);
/* Debugging functions */
SCM_API SCM scm_sys_string_dump (SCM);
SCM_API SCM scm_sys_symbol_dump (SCM);
#ifdef SCM_STRING_LENGTH_HISTOGRAM
SCM_API SCM scm_sys_stringbuf_hist (void);
#endif
#if SCM_CAN_INLINE || defined SCM_INLINE_C_IMPLEMENTING_INLINES
/* Either inlining, or being included from inline.c. */
SCM_INLINE_IMPLEMENTATION int
scm_is_string (SCM x)
{
return SCM_HAS_TYP7 (x, scm_tc7_string);
}
#endif
#define SCM_VALIDATE_STRING(pos, str) \
do { \
SCM_ASSERT_TYPE (scm_is_string (str), str, pos, FUNC_NAME, "string"); \
} while (0)
SCM_INTERNAL void scm_init_strings (void);
#endif /* SCM_STRINGS_H */