1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-01 12:20:26 +02:00

Use string accessors for string->number conversion

* libguile/numbers.c (scm_i_print_fraction): use string accessors
  (XDIGIT2UINT): use libunistring function
  (mem2uinteger, mem2integer, mem2decimal_from_point, mem2ureal)
  (mem2complex): take scheme string instead of c string; use accessors
  (scm_i_string_to_number): new function
  (scm_c_locale_string_to_number): use scm_i_string_to_number

* libguile/numbers.h: declaration for scm_i_string_to_number

* libguile/strings.c (scm_i_string_strcmp): new function

* libguile/strings.h: declaration for scm_i_string_strcmp
This commit is contained in:
Michael Gran 2009-08-21 09:18:30 -07:00
parent e23106d53e
commit 3f47e52621
4 changed files with 93 additions and 55 deletions

View file

@ -46,8 +46,9 @@
#endif #endif
#include <math.h> #include <math.h>
#include <ctype.h>
#include <string.h> #include <string.h>
#include <unicase.h>
#include <unictype.h>
#if HAVE_COMPLEX_H #if HAVE_COMPLEX_H
#include <complex.h> #include <complex.h>
@ -2437,7 +2438,7 @@ scm_i_print_fraction (SCM sexp, SCM port, scm_print_state *pstate SCM_UNUSED)
{ {
SCM str; SCM str;
str = scm_number_to_string (sexp, SCM_UNDEFINED); str = scm_number_to_string (sexp, SCM_UNDEFINED);
scm_lfwrite (scm_i_string_chars (str), scm_i_string_length (str), port); scm_lfwrite_str (str, port);
scm_remember_upto_here_1 (str); scm_remember_upto_here_1 (str);
return !0; return !0;
} }
@ -2485,12 +2486,12 @@ enum t_exactness {NO_EXACTNESS, INEXACT, EXACT};
/* In non ASCII-style encodings the following macro might not work. */ /* In non ASCII-style encodings the following macro might not work. */
#define XDIGIT2UINT(d) \ #define XDIGIT2UINT(d) \
(isdigit ((int) (unsigned char) d) \ (uc_is_property_decimal_digit ((int) (unsigned char) d) \
? (d) - '0' \ ? (d) - '0' \
: tolower ((int) (unsigned char) d) - 'a' + 10) : tolower ((int) (unsigned char) d) - 'a' + 10)
static SCM static SCM
mem2uinteger (const char* mem, size_t len, unsigned int *p_idx, mem2uinteger (SCM mem, unsigned int *p_idx,
unsigned int radix, enum t_exactness *p_exactness) unsigned int radix, enum t_exactness *p_exactness)
{ {
unsigned int idx = *p_idx; unsigned int idx = *p_idx;
@ -2500,12 +2501,13 @@ mem2uinteger (const char* mem, size_t len, unsigned int *p_idx,
unsigned int digit_value; unsigned int digit_value;
SCM result; SCM result;
char c; char c;
size_t len = scm_i_string_length (mem);
if (idx == len) if (idx == len)
return SCM_BOOL_F; return SCM_BOOL_F;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
if (!isxdigit ((int) (unsigned char) c)) if (!uc_is_property_ascii_hex_digit ((scm_t_uint32) c))
return SCM_BOOL_F; return SCM_BOOL_F;
digit_value = XDIGIT2UINT (c); digit_value = XDIGIT2UINT (c);
if (digit_value >= radix) if (digit_value >= radix)
@ -2515,8 +2517,8 @@ mem2uinteger (const char* mem, size_t len, unsigned int *p_idx,
result = SCM_I_MAKINUM (digit_value); result = SCM_I_MAKINUM (digit_value);
while (idx != len) while (idx != len)
{ {
char c = mem[idx]; scm_t_wchar c = scm_i_string_ref (mem, idx);
if (isxdigit ((int) (unsigned char) c)) if (uc_is_property_ascii_hex_digit ((scm_t_uint32) c))
{ {
if (hash_seen) if (hash_seen)
break; break;
@ -2569,20 +2571,20 @@ mem2uinteger (const char* mem, size_t len, unsigned int *p_idx,
* has already been seen in the digits before the point. * has already been seen in the digits before the point.
*/ */
/* In non ASCII-style encodings the following macro might not work. */ #define DIGIT2UINT(d) (uc_numeric_value(d).numerator)
#define DIGIT2UINT(d) ((d) - '0')
static SCM static SCM
mem2decimal_from_point (SCM result, const char* mem, size_t len, mem2decimal_from_point (SCM result, SCM mem,
unsigned int *p_idx, enum t_exactness *p_exactness) unsigned int *p_idx, enum t_exactness *p_exactness)
{ {
unsigned int idx = *p_idx; unsigned int idx = *p_idx;
enum t_exactness x = *p_exactness; enum t_exactness x = *p_exactness;
size_t len = scm_i_string_length (mem);
if (idx == len) if (idx == len)
return result; return result;
if (mem[idx] == '.') if (scm_i_string_ref (mem, idx) == '.')
{ {
scm_t_bits shift = 1; scm_t_bits shift = 1;
scm_t_bits add = 0; scm_t_bits add = 0;
@ -2592,8 +2594,8 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
idx++; idx++;
while (idx != len) while (idx != len)
{ {
char c = mem[idx]; scm_t_wchar c = scm_i_string_ref (mem, idx);
if (isdigit ((int) (unsigned char) c)) if (uc_is_property_decimal_digit ((scm_t_uint32) c))
{ {
if (x == INEXACT) if (x == INEXACT)
return SCM_BOOL_F; return SCM_BOOL_F;
@ -2643,13 +2645,13 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
{ {
int sign = 1; int sign = 1;
unsigned int start; unsigned int start;
char c; scm_t_wchar c;
int exponent; int exponent;
SCM e; SCM e;
/* R5RS, section 7.1.1, lexical structure of numbers: <suffix> */ /* R5RS, section 7.1.1, lexical structure of numbers: <suffix> */
switch (mem[idx]) switch (scm_i_string_ref (mem, idx))
{ {
case 'd': case 'D': case 'd': case 'D':
case 'e': case 'E': case 'e': case 'E':
@ -2661,7 +2663,7 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
return SCM_BOOL_F; return SCM_BOOL_F;
start = idx; start = idx;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
if (c == '-') if (c == '-')
{ {
idx++; idx++;
@ -2669,7 +2671,7 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
return SCM_BOOL_F; return SCM_BOOL_F;
sign = -1; sign = -1;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
} }
else if (c == '+') else if (c == '+')
{ {
@ -2678,20 +2680,20 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
return SCM_BOOL_F; return SCM_BOOL_F;
sign = 1; sign = 1;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
} }
else else
sign = 1; sign = 1;
if (!isdigit ((int) (unsigned char) c)) if (!uc_is_property_decimal_digit ((scm_t_uint32) c))
return SCM_BOOL_F; return SCM_BOOL_F;
idx++; idx++;
exponent = DIGIT2UINT (c); exponent = DIGIT2UINT (c);
while (idx != len) while (idx != len)
{ {
char c = mem[idx]; scm_t_wchar c = scm_i_string_ref (mem, idx);
if (isdigit ((int) (unsigned char) c)) if (uc_is_property_decimal_digit ((scm_t_uint32) c))
{ {
idx++; idx++;
if (exponent <= SCM_MAXEXP) if (exponent <= SCM_MAXEXP)
@ -2704,7 +2706,7 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
if (exponent > SCM_MAXEXP) if (exponent > SCM_MAXEXP)
{ {
size_t exp_len = idx - start; size_t exp_len = idx - start;
SCM exp_string = scm_from_locale_stringn (&mem[start], exp_len); SCM exp_string = scm_i_substring_copy (mem, start, start + exp_len);
SCM exp_num = scm_string_to_number (exp_string, SCM_UNDEFINED); SCM exp_num = scm_string_to_number (exp_string, SCM_UNDEFINED);
scm_out_of_range ("string->number", exp_num); scm_out_of_range ("string->number", exp_num);
} }
@ -2736,11 +2738,12 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len,
/* R5RS, section 7.1.1, lexical structure of numbers: <ureal R> */ /* R5RS, section 7.1.1, lexical structure of numbers: <ureal R> */
static SCM static SCM
mem2ureal (const char* mem, size_t len, unsigned int *p_idx, mem2ureal (SCM mem, unsigned int *p_idx,
unsigned int radix, enum t_exactness *p_exactness) unsigned int radix, enum t_exactness *p_exactness)
{ {
unsigned int idx = *p_idx; unsigned int idx = *p_idx;
SCM result; SCM result;
size_t len = scm_i_string_length (mem);
/* Start off believing that the number will be exact. This changes /* Start off believing that the number will be exact. This changes
to INEXACT if we see a decimal point or a hash. */ to INEXACT if we see a decimal point or a hash. */
@ -2749,45 +2752,45 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx,
if (idx == len) if (idx == len)
return SCM_BOOL_F; return SCM_BOOL_F;
if (idx+5 <= len && !strncmp (mem+idx, "inf.0", 5)) if (idx+5 <= len && !scm_i_string_strcmp (mem, idx, "inf.0"))
{ {
*p_idx = idx+5; *p_idx = idx+5;
return scm_inf (); return scm_inf ();
} }
if (idx+4 < len && !strncmp (mem+idx, "nan.", 4)) if (idx+4 < len && !scm_i_string_strcmp (mem, idx, "nan."))
{ {
/* Cobble up the fractional part. We might want to set the /* Cobble up the fractional part. We might want to set the
NaN's mantissa from it. */ NaN's mantissa from it. */
idx += 4; idx += 4;
mem2uinteger (mem, len, &idx, 10, &x); mem2uinteger (mem, &idx, 10, &x);
*p_idx = idx; *p_idx = idx;
return scm_nan (); return scm_nan ();
} }
if (mem[idx] == '.') if (scm_i_string_ref (mem, idx) == '.')
{ {
if (radix != 10) if (radix != 10)
return SCM_BOOL_F; return SCM_BOOL_F;
else if (idx + 1 == len) else if (idx + 1 == len)
return SCM_BOOL_F; return SCM_BOOL_F;
else if (!isdigit ((int) (unsigned char) mem[idx + 1])) else if (!uc_is_property_decimal_digit ((scm_t_uint32) scm_i_string_ref (mem, idx+1)))
return SCM_BOOL_F; return SCM_BOOL_F;
else else
result = mem2decimal_from_point (SCM_I_MAKINUM (0), mem, len, result = mem2decimal_from_point (SCM_I_MAKINUM (0), mem,
p_idx, &x); p_idx, &x);
} }
else else
{ {
SCM uinteger; SCM uinteger;
uinteger = mem2uinteger (mem, len, &idx, radix, &x); uinteger = mem2uinteger (mem, &idx, radix, &x);
if (scm_is_false (uinteger)) if (scm_is_false (uinteger))
return SCM_BOOL_F; return SCM_BOOL_F;
if (idx == len) if (idx == len)
result = uinteger; result = uinteger;
else if (mem[idx] == '/') else if (scm_i_string_ref (mem, idx) == '/')
{ {
SCM divisor; SCM divisor;
@ -2795,7 +2798,7 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx,
if (idx == len) if (idx == len)
return SCM_BOOL_F; return SCM_BOOL_F;
divisor = mem2uinteger (mem, len, &idx, radix, &x); divisor = mem2uinteger (mem, &idx, radix, &x);
if (scm_is_false (divisor)) if (scm_is_false (divisor))
return SCM_BOOL_F; return SCM_BOOL_F;
@ -2804,7 +2807,7 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx,
} }
else if (radix == 10) else if (radix == 10)
{ {
result = mem2decimal_from_point (uinteger, mem, len, &idx, &x); result = mem2decimal_from_point (uinteger, mem, &idx, &x);
if (scm_is_false (result)) if (scm_is_false (result))
return SCM_BOOL_F; return SCM_BOOL_F;
} }
@ -2835,17 +2838,18 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx,
/* R5RS, section 7.1.1, lexical structure of numbers: <complex R> */ /* R5RS, section 7.1.1, lexical structure of numbers: <complex R> */
static SCM static SCM
mem2complex (const char* mem, size_t len, unsigned int idx, mem2complex (SCM mem, unsigned int idx,
unsigned int radix, enum t_exactness *p_exactness) unsigned int radix, enum t_exactness *p_exactness)
{ {
char c; scm_t_wchar c;
int sign = 0; int sign = 0;
SCM ureal; SCM ureal;
size_t len = scm_i_string_length (mem);
if (idx == len) if (idx == len)
return SCM_BOOL_F; return SCM_BOOL_F;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
if (c == '+') if (c == '+')
{ {
idx++; idx++;
@ -2860,7 +2864,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
if (idx == len) if (idx == len)
return SCM_BOOL_F; return SCM_BOOL_F;
ureal = mem2ureal (mem, len, &idx, radix, p_exactness); ureal = mem2ureal (mem, &idx, radix, p_exactness);
if (scm_is_false (ureal)) if (scm_is_false (ureal))
{ {
/* input must be either +i or -i */ /* input must be either +i or -i */
@ -2868,7 +2872,8 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
if (sign == 0) if (sign == 0)
return SCM_BOOL_F; return SCM_BOOL_F;
if (mem[idx] == 'i' || mem[idx] == 'I') if (scm_i_string_ref (mem, idx) == 'i'
|| scm_i_string_ref (mem, idx) == 'I')
{ {
idx++; idx++;
if (idx != len) if (idx != len)
@ -2887,7 +2892,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
if (idx == len) if (idx == len)
return ureal; return ureal;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
switch (c) switch (c)
{ {
case 'i': case 'I': case 'i': case 'I':
@ -2912,7 +2917,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
SCM angle; SCM angle;
SCM result; SCM result;
c = mem[idx]; c = scm_i_string_ref (mem, idx);
if (c == '+') if (c == '+')
{ {
idx++; idx++;
@ -2930,7 +2935,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
else else
sign = 1; sign = 1;
angle = mem2ureal (mem, len, &idx, radix, p_exactness); angle = mem2ureal (mem, &idx, radix, p_exactness);
if (scm_is_false (angle)) if (scm_is_false (angle))
return SCM_BOOL_F; return SCM_BOOL_F;
if (idx != len) if (idx != len)
@ -2952,7 +2957,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
else else
{ {
int sign = (c == '+') ? 1 : -1; int sign = (c == '+') ? 1 : -1;
SCM imag = mem2ureal (mem, len, &idx, radix, p_exactness); SCM imag = mem2ureal (mem, &idx, radix, p_exactness);
if (scm_is_false (imag)) if (scm_is_false (imag))
imag = SCM_I_MAKINUM (sign); imag = SCM_I_MAKINUM (sign);
@ -2961,7 +2966,8 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
if (idx == len) if (idx == len)
return SCM_BOOL_F; return SCM_BOOL_F;
if (mem[idx] != 'i' && mem[idx] != 'I') if (scm_i_string_ref (mem, idx) != 'i'
&& scm_i_string_ref (mem, idx) != 'I')
return SCM_BOOL_F; return SCM_BOOL_F;
idx++; idx++;
@ -2982,19 +2988,19 @@ mem2complex (const char* mem, size_t len, unsigned int idx,
enum t_radix {NO_RADIX=0, DUAL=2, OCT=8, DEC=10, HEX=16}; enum t_radix {NO_RADIX=0, DUAL=2, OCT=8, DEC=10, HEX=16};
SCM SCM
scm_c_locale_stringn_to_number (const char* mem, size_t len, scm_i_string_to_number (SCM mem, unsigned int default_radix)
unsigned int default_radix)
{ {
unsigned int idx = 0; unsigned int idx = 0;
unsigned int radix = NO_RADIX; unsigned int radix = NO_RADIX;
enum t_exactness forced_x = NO_EXACTNESS; enum t_exactness forced_x = NO_EXACTNESS;
enum t_exactness implicit_x = EXACT; enum t_exactness implicit_x = EXACT;
SCM result; SCM result;
size_t len = scm_i_string_length (mem);
/* R5RS, section 7.1.1, lexical structure of numbers: <prefix R> */ /* R5RS, section 7.1.1, lexical structure of numbers: <prefix R> */
while (idx + 2 < len && mem[idx] == '#') while (idx + 2 < len && scm_i_string_ref (mem, idx) == '#')
{ {
switch (mem[idx + 1]) switch (scm_i_string_ref (mem, idx + 1))
{ {
case 'b': case 'B': case 'b': case 'B':
if (radix != NO_RADIX) if (radix != NO_RADIX)
@ -3034,9 +3040,9 @@ scm_c_locale_stringn_to_number (const char* mem, size_t len,
/* R5RS, section 7.1.1, lexical structure of numbers: <complex R> */ /* R5RS, section 7.1.1, lexical structure of numbers: <complex R> */
if (radix == NO_RADIX) if (radix == NO_RADIX)
result = mem2complex (mem, len, idx, default_radix, &implicit_x); result = mem2complex (mem, idx, default_radix, &implicit_x);
else else
result = mem2complex (mem, len, idx, (unsigned int) radix, &implicit_x); result = mem2complex (mem, idx, (unsigned int) radix, &implicit_x);
if (scm_is_false (result)) if (scm_is_false (result))
return SCM_BOOL_F; return SCM_BOOL_F;
@ -3067,6 +3073,15 @@ scm_c_locale_stringn_to_number (const char* mem, size_t len,
} }
} }
SCM
scm_c_locale_stringn_to_number (const char* mem, size_t len,
unsigned int default_radix)
{
SCM str = scm_from_locale_stringn (mem, len);
return scm_i_string_to_number (str, default_radix);
}
SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0, SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0,
(SCM string, SCM radix), (SCM string, SCM radix),
@ -3089,9 +3104,7 @@ SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0,
else else
base = scm_to_unsigned_integer (radix, 2, INT_MAX); base = scm_to_unsigned_integer (radix, 2, INT_MAX);
answer = scm_c_locale_stringn_to_number (scm_i_string_chars (string), answer = scm_i_string_to_number (string, base);
scm_i_string_length (string),
base);
scm_remember_upto_here_1 (string); scm_remember_upto_here_1 (string);
return answer; return answer;
} }

View file

@ -212,6 +212,7 @@ SCM_API int scm_print_complex (SCM sexp, SCM port, scm_print_state *pstate);
SCM_API int scm_bigprint (SCM exp, SCM port, scm_print_state *pstate); SCM_API int scm_bigprint (SCM exp, SCM port, scm_print_state *pstate);
SCM_API SCM scm_c_locale_stringn_to_number (const char *mem, size_t len, SCM_API SCM scm_c_locale_stringn_to_number (const char *mem, size_t len,
unsigned int radix); unsigned int radix);
SCM_INTERNAL SCM scm_i_string_to_number (SCM str, unsigned int radix);
SCM_API SCM scm_string_to_number (SCM str, SCM radix); SCM_API SCM scm_string_to_number (SCM str, SCM radix);
SCM_API SCM scm_bigequal (SCM x, SCM y); SCM_API SCM scm_bigequal (SCM x, SCM y);
SCM_API SCM scm_real_equalp (SCM x, SCM y); SCM_API SCM scm_real_equalp (SCM x, SCM y);

View file

@ -590,6 +590,29 @@ scm_i_string_ref (SCM str, size_t x)
return scm_i_string_wide_chars (str)[x]; return scm_i_string_wide_chars (str)[x];
} }
int
scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
{
if (scm_i_is_narrow_string (sstr))
{
const char *a = scm_i_string_chars (sstr) + start_x;
const char *b = cstr;
return strncmp (a, b, strlen(b));
}
else
{
size_t i;
const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
const char *b = cstr;
for (i = 0; i < strlen (b); i++)
{
if (a[i] != (unsigned char) b[i])
return 1;
}
}
return 0;
}
/* Set the Pth character of STR to UCS-4 codepoint CHR. */ /* Set the Pth character of STR to UCS-4 codepoint CHR. */
void void
scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr) scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)

View file

@ -152,6 +152,7 @@ SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void); SCM_INTERNAL void scm_i_string_stop_writing (void);
SCM_INTERNAL int scm_i_is_narrow_string (SCM str); SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x); SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
/* internal functions related to symbols. */ /* internal functions related to symbols. */