diff --git a/libguile/numbers.c b/libguile/numbers.c index b4bff8142..ff963dbfa 100644 --- a/libguile/numbers.c +++ b/libguile/numbers.c @@ -46,8 +46,9 @@ #endif #include -#include #include +#include +#include #if HAVE_COMPLEX_H #include @@ -2437,7 +2438,7 @@ scm_i_print_fraction (SCM sexp, SCM port, scm_print_state *pstate SCM_UNUSED) { SCM str; str = scm_number_to_string (sexp, SCM_UNDEFINED); - scm_lfwrite (scm_i_string_chars (str), scm_i_string_length (str), port); + scm_lfwrite_str (str, port); scm_remember_upto_here_1 (str); return !0; } @@ -2484,13 +2485,13 @@ enum t_exactness {NO_EXACTNESS, INEXACT, EXACT}; /* R5RS, section 7.1.1, lexical structure of numbers: . */ /* In non ASCII-style encodings the following macro might not work. */ -#define XDIGIT2UINT(d) \ - (isdigit ((int) (unsigned char) d) \ - ? (d) - '0' \ +#define XDIGIT2UINT(d) \ + (uc_is_property_decimal_digit ((int) (unsigned char) d) \ + ? (d) - '0' \ : tolower ((int) (unsigned char) d) - 'a' + 10) static SCM -mem2uinteger (const char* mem, size_t len, unsigned int *p_idx, +mem2uinteger (SCM mem, unsigned int *p_idx, unsigned int radix, enum t_exactness *p_exactness) { unsigned int idx = *p_idx; @@ -2500,12 +2501,13 @@ mem2uinteger (const char* mem, size_t len, unsigned int *p_idx, unsigned int digit_value; SCM result; char c; + size_t len = scm_i_string_length (mem); if (idx == len) return SCM_BOOL_F; - c = mem[idx]; - if (!isxdigit ((int) (unsigned char) c)) + c = scm_i_string_ref (mem, idx); + if (!uc_is_property_ascii_hex_digit ((scm_t_uint32) c)) return SCM_BOOL_F; digit_value = XDIGIT2UINT (c); if (digit_value >= radix) @@ -2515,8 +2517,8 @@ mem2uinteger (const char* mem, size_t len, unsigned int *p_idx, result = SCM_I_MAKINUM (digit_value); while (idx != len) { - char c = mem[idx]; - if (isxdigit ((int) (unsigned char) c)) + scm_t_wchar c = scm_i_string_ref (mem, idx); + if (uc_is_property_ascii_hex_digit ((scm_t_uint32) c)) { if (hash_seen) break; @@ -2569,20 +2571,20 @@ mem2uinteger (const char* mem, size_t len, unsigned int *p_idx, * has already been seen in the digits before the point. */ -/* In non ASCII-style encodings the following macro might not work. */ -#define DIGIT2UINT(d) ((d) - '0') +#define DIGIT2UINT(d) (uc_numeric_value(d).numerator) static SCM -mem2decimal_from_point (SCM result, const char* mem, size_t len, +mem2decimal_from_point (SCM result, SCM mem, unsigned int *p_idx, enum t_exactness *p_exactness) { unsigned int idx = *p_idx; enum t_exactness x = *p_exactness; + size_t len = scm_i_string_length (mem); if (idx == len) return result; - if (mem[idx] == '.') + if (scm_i_string_ref (mem, idx) == '.') { scm_t_bits shift = 1; scm_t_bits add = 0; @@ -2592,8 +2594,8 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, idx++; while (idx != len) { - char c = mem[idx]; - if (isdigit ((int) (unsigned char) c)) + scm_t_wchar c = scm_i_string_ref (mem, idx); + if (uc_is_property_decimal_digit ((scm_t_uint32) c)) { if (x == INEXACT) return SCM_BOOL_F; @@ -2643,13 +2645,13 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, { int sign = 1; unsigned int start; - char c; + scm_t_wchar c; int exponent; SCM e; /* R5RS, section 7.1.1, lexical structure of numbers: */ - switch (mem[idx]) + switch (scm_i_string_ref (mem, idx)) { case 'd': case 'D': case 'e': case 'E': @@ -2661,7 +2663,7 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, return SCM_BOOL_F; start = idx; - c = mem[idx]; + c = scm_i_string_ref (mem, idx); if (c == '-') { idx++; @@ -2669,7 +2671,7 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, return SCM_BOOL_F; sign = -1; - c = mem[idx]; + c = scm_i_string_ref (mem, idx); } else if (c == '+') { @@ -2678,20 +2680,20 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, return SCM_BOOL_F; sign = 1; - c = mem[idx]; + c = scm_i_string_ref (mem, idx); } else sign = 1; - if (!isdigit ((int) (unsigned char) c)) + if (!uc_is_property_decimal_digit ((scm_t_uint32) c)) return SCM_BOOL_F; idx++; exponent = DIGIT2UINT (c); while (idx != len) { - char c = mem[idx]; - if (isdigit ((int) (unsigned char) c)) + scm_t_wchar c = scm_i_string_ref (mem, idx); + if (uc_is_property_decimal_digit ((scm_t_uint32) c)) { idx++; if (exponent <= SCM_MAXEXP) @@ -2704,7 +2706,7 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, if (exponent > SCM_MAXEXP) { size_t exp_len = idx - start; - SCM exp_string = scm_from_locale_stringn (&mem[start], exp_len); + SCM exp_string = scm_i_substring_copy (mem, start, start + exp_len); SCM exp_num = scm_string_to_number (exp_string, SCM_UNDEFINED); scm_out_of_range ("string->number", exp_num); } @@ -2736,11 +2738,12 @@ mem2decimal_from_point (SCM result, const char* mem, size_t len, /* R5RS, section 7.1.1, lexical structure of numbers: */ static SCM -mem2ureal (const char* mem, size_t len, unsigned int *p_idx, +mem2ureal (SCM mem, unsigned int *p_idx, unsigned int radix, enum t_exactness *p_exactness) { unsigned int idx = *p_idx; SCM result; + size_t len = scm_i_string_length (mem); /* Start off believing that the number will be exact. This changes to INEXACT if we see a decimal point or a hash. */ @@ -2749,45 +2752,45 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx, if (idx == len) return SCM_BOOL_F; - if (idx+5 <= len && !strncmp (mem+idx, "inf.0", 5)) + if (idx+5 <= len && !scm_i_string_strcmp (mem, idx, "inf.0")) { *p_idx = idx+5; return scm_inf (); } - if (idx+4 < len && !strncmp (mem+idx, "nan.", 4)) + if (idx+4 < len && !scm_i_string_strcmp (mem, idx, "nan.")) { /* Cobble up the fractional part. We might want to set the NaN's mantissa from it. */ idx += 4; - mem2uinteger (mem, len, &idx, 10, &x); + mem2uinteger (mem, &idx, 10, &x); *p_idx = idx; return scm_nan (); } - if (mem[idx] == '.') + if (scm_i_string_ref (mem, idx) == '.') { if (radix != 10) return SCM_BOOL_F; else if (idx + 1 == len) return SCM_BOOL_F; - else if (!isdigit ((int) (unsigned char) mem[idx + 1])) + else if (!uc_is_property_decimal_digit ((scm_t_uint32) scm_i_string_ref (mem, idx+1))) return SCM_BOOL_F; else - result = mem2decimal_from_point (SCM_I_MAKINUM (0), mem, len, + result = mem2decimal_from_point (SCM_I_MAKINUM (0), mem, p_idx, &x); } else { SCM uinteger; - uinteger = mem2uinteger (mem, len, &idx, radix, &x); + uinteger = mem2uinteger (mem, &idx, radix, &x); if (scm_is_false (uinteger)) return SCM_BOOL_F; if (idx == len) result = uinteger; - else if (mem[idx] == '/') + else if (scm_i_string_ref (mem, idx) == '/') { SCM divisor; @@ -2795,7 +2798,7 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx, if (idx == len) return SCM_BOOL_F; - divisor = mem2uinteger (mem, len, &idx, radix, &x); + divisor = mem2uinteger (mem, &idx, radix, &x); if (scm_is_false (divisor)) return SCM_BOOL_F; @@ -2804,7 +2807,7 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx, } else if (radix == 10) { - result = mem2decimal_from_point (uinteger, mem, len, &idx, &x); + result = mem2decimal_from_point (uinteger, mem, &idx, &x); if (scm_is_false (result)) return SCM_BOOL_F; } @@ -2835,17 +2838,18 @@ mem2ureal (const char* mem, size_t len, unsigned int *p_idx, /* R5RS, section 7.1.1, lexical structure of numbers: */ static SCM -mem2complex (const char* mem, size_t len, unsigned int idx, +mem2complex (SCM mem, unsigned int idx, unsigned int radix, enum t_exactness *p_exactness) { - char c; + scm_t_wchar c; int sign = 0; SCM ureal; + size_t len = scm_i_string_length (mem); if (idx == len) return SCM_BOOL_F; - c = mem[idx]; + c = scm_i_string_ref (mem, idx); if (c == '+') { idx++; @@ -2860,7 +2864,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx, if (idx == len) return SCM_BOOL_F; - ureal = mem2ureal (mem, len, &idx, radix, p_exactness); + ureal = mem2ureal (mem, &idx, radix, p_exactness); if (scm_is_false (ureal)) { /* input must be either +i or -i */ @@ -2868,7 +2872,8 @@ mem2complex (const char* mem, size_t len, unsigned int idx, if (sign == 0) return SCM_BOOL_F; - if (mem[idx] == 'i' || mem[idx] == 'I') + if (scm_i_string_ref (mem, idx) == 'i' + || scm_i_string_ref (mem, idx) == 'I') { idx++; if (idx != len) @@ -2887,7 +2892,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx, if (idx == len) return ureal; - c = mem[idx]; + c = scm_i_string_ref (mem, idx); switch (c) { case 'i': case 'I': @@ -2912,7 +2917,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx, SCM angle; SCM result; - c = mem[idx]; + c = scm_i_string_ref (mem, idx); if (c == '+') { idx++; @@ -2930,7 +2935,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx, else sign = 1; - angle = mem2ureal (mem, len, &idx, radix, p_exactness); + angle = mem2ureal (mem, &idx, radix, p_exactness); if (scm_is_false (angle)) return SCM_BOOL_F; if (idx != len) @@ -2952,7 +2957,7 @@ mem2complex (const char* mem, size_t len, unsigned int idx, else { int sign = (c == '+') ? 1 : -1; - SCM imag = mem2ureal (mem, len, &idx, radix, p_exactness); + SCM imag = mem2ureal (mem, &idx, radix, p_exactness); if (scm_is_false (imag)) imag = SCM_I_MAKINUM (sign); @@ -2961,7 +2966,8 @@ mem2complex (const char* mem, size_t len, unsigned int idx, if (idx == len) return SCM_BOOL_F; - if (mem[idx] != 'i' && mem[idx] != 'I') + if (scm_i_string_ref (mem, idx) != 'i' + && scm_i_string_ref (mem, idx) != 'I') return SCM_BOOL_F; idx++; @@ -2982,19 +2988,19 @@ mem2complex (const char* mem, size_t len, unsigned int idx, enum t_radix {NO_RADIX=0, DUAL=2, OCT=8, DEC=10, HEX=16}; SCM -scm_c_locale_stringn_to_number (const char* mem, size_t len, - unsigned int default_radix) +scm_i_string_to_number (SCM mem, unsigned int default_radix) { unsigned int idx = 0; unsigned int radix = NO_RADIX; enum t_exactness forced_x = NO_EXACTNESS; enum t_exactness implicit_x = EXACT; SCM result; + size_t len = scm_i_string_length (mem); /* R5RS, section 7.1.1, lexical structure of numbers: */ - while (idx + 2 < len && mem[idx] == '#') + while (idx + 2 < len && scm_i_string_ref (mem, idx) == '#') { - switch (mem[idx + 1]) + switch (scm_i_string_ref (mem, idx + 1)) { case 'b': case 'B': if (radix != NO_RADIX) @@ -3034,9 +3040,9 @@ scm_c_locale_stringn_to_number (const char* mem, size_t len, /* R5RS, section 7.1.1, lexical structure of numbers: */ if (radix == NO_RADIX) - result = mem2complex (mem, len, idx, default_radix, &implicit_x); + result = mem2complex (mem, idx, default_radix, &implicit_x); else - result = mem2complex (mem, len, idx, (unsigned int) radix, &implicit_x); + result = mem2complex (mem, idx, (unsigned int) radix, &implicit_x); if (scm_is_false (result)) return SCM_BOOL_F; @@ -3067,6 +3073,15 @@ scm_c_locale_stringn_to_number (const char* mem, size_t len, } } +SCM +scm_c_locale_stringn_to_number (const char* mem, size_t len, + unsigned int default_radix) +{ + SCM str = scm_from_locale_stringn (mem, len); + + return scm_i_string_to_number (str, default_radix); +} + SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0, (SCM string, SCM radix), @@ -3089,9 +3104,7 @@ SCM_DEFINE (scm_string_to_number, "string->number", 1, 1, 0, else base = scm_to_unsigned_integer (radix, 2, INT_MAX); - answer = scm_c_locale_stringn_to_number (scm_i_string_chars (string), - scm_i_string_length (string), - base); + answer = scm_i_string_to_number (string, base); scm_remember_upto_here_1 (string); return answer; } diff --git a/libguile/numbers.h b/libguile/numbers.h index bb72d7ac8..eaa57287a 100644 --- a/libguile/numbers.h +++ b/libguile/numbers.h @@ -212,6 +212,7 @@ SCM_API int scm_print_complex (SCM sexp, SCM port, scm_print_state *pstate); SCM_API int scm_bigprint (SCM exp, SCM port, scm_print_state *pstate); SCM_API SCM scm_c_locale_stringn_to_number (const char *mem, size_t len, unsigned int radix); +SCM_INTERNAL SCM scm_i_string_to_number (SCM str, unsigned int radix); SCM_API SCM scm_string_to_number (SCM str, SCM radix); SCM_API SCM scm_bigequal (SCM x, SCM y); SCM_API SCM scm_real_equalp (SCM x, SCM y); diff --git a/libguile/strings.c b/libguile/strings.c index 62758617f..c6464de6b 100644 --- a/libguile/strings.c +++ b/libguile/strings.c @@ -590,6 +590,29 @@ scm_i_string_ref (SCM str, size_t x) return scm_i_string_wide_chars (str)[x]; } +int +scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr) +{ + if (scm_i_is_narrow_string (sstr)) + { + const char *a = scm_i_string_chars (sstr) + start_x; + const char *b = cstr; + return strncmp (a, b, strlen(b)); + } + else + { + size_t i; + const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x; + const char *b = cstr; + for (i = 0; i < strlen (b); i++) + { + if (a[i] != (unsigned char) b[i]) + return 1; + } + } + return 0; +} + /* Set the Pth character of STR to UCS-4 codepoint CHR. */ void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr) diff --git a/libguile/strings.h b/libguile/strings.h index 390b4f6a8..d0cbb8dd3 100644 --- a/libguile/strings.h +++ b/libguile/strings.h @@ -152,6 +152,7 @@ SCM_INTERNAL SCM scm_i_string_start_writing (SCM str); SCM_INTERNAL void scm_i_string_stop_writing (void); SCM_INTERNAL int scm_i_is_narrow_string (SCM str); SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x); +SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr); SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr); /* internal functions related to symbols. */