diff --git a/libguile/chars.c b/libguile/chars.c index 511ffc7c8..5a53c456a 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -24,6 +24,8 @@ #include #include +#include + #include "libguile/_scm.h" #include "libguile/validate.h" @@ -55,7 +57,7 @@ SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_less_p, "char?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is greater than @var{y} in the ASCII\n" + "Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n" "sequence, else @code{#f}.") #define FUNC_NAME s_scm_char_gr_p { @@ -92,7 +94,7 @@ SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr, (SCM x, SCM y), "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n" - "ASCII sequence, else @code{#f}.") + "Unicode sequence, else @code{#f}.") #define FUNC_NAME s_scm_char_geq_p { SCM_VALIDATE_CHAR (1, x); @@ -104,7 +106,7 @@ SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr, (SCM x, SCM y), "Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n" - "case, else @code{#f}.") + "case, else @code{#f}. Case is locale free and not context sensitive.") #define FUNC_NAME s_scm_char_ci_eq_p { SCM_VALIDATE_CHAR (1, x); @@ -115,8 +117,9 @@ SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_ci_less_p, "char-ci?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is greater than @var{y} in the ASCII\n" - "sequence ignoring case, else @code{#f}.") + "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n" + "than the Unicode uppercase form of @var{y} in the Unicode\n" + "sequence, else @code{#f}.") #define FUNC_NAME s_scm_char_ci_gr_p { SCM_VALIDATE_CHAR (1, x); @@ -151,8 +156,9 @@ SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr, SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr, (SCM x, SCM y), - "Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n" - "ASCII sequence ignoring case, else @code{#f}.") + "Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n" + "than or equal to the Unicode uppercase form of @var{y} in the\n" + "Unicode sequence, else @code{#f}.") #define FUNC_NAME s_scm_char_ci_geq_p { SCM_VALIDATE_CHAR (1, x); @@ -233,7 +239,7 @@ SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0, #define FUNC_NAME s_scm_char_to_integer { SCM_VALIDATE_CHAR (1, chr); - return scm_from_ulong (SCM_CHAR(chr)); + return scm_from_uint32 (SCM_CHAR(chr)); } #undef FUNC_NAME @@ -244,7 +250,15 @@ SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0, "Return the character at position @var{n} in the ASCII sequence.") #define FUNC_NAME s_scm_integer_to_char { - return SCM_MAKE_CHAR (scm_to_uchar (n)); + scm_t_wchar cn; + + cn = scm_to_wchar (n); + + /* Avoid the surrogates. */ + if (!SCM_IS_UNICODE_CHAR (cn)) + scm_out_of_range (FUNC_NAME, n); + + return SCM_MAKE_CHAR (cn); } #undef FUNC_NAME @@ -255,7 +269,7 @@ SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0, #define FUNC_NAME s_scm_char_upcase { SCM_VALIDATE_CHAR (1, chr); - return SCM_MAKE_CHAR (toupper (SCM_CHAR (chr))); + return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr))); } #undef FUNC_NAME @@ -266,7 +280,7 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0, #define FUNC_NAME s_scm_char_downcase { SCM_VALIDATE_CHAR (1, chr); - return SCM_MAKE_CHAR (tolower (SCM_CHAR(chr))); + return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr))); } #undef FUNC_NAME @@ -279,23 +293,17 @@ TODO: change name to scm_i_.. ? --hwn */ -int -scm_c_upcase (unsigned int c) +scm_t_wchar +scm_c_upcase (scm_t_wchar c) { - if (c <= UCHAR_MAX) - return toupper (c); - else - return c; + return uc_toupper (c); } -int -scm_c_downcase (unsigned int c) +scm_t_wchar +scm_c_downcase (scm_t_wchar c) { - if (c <= UCHAR_MAX) - return tolower (c); - else - return c; + return uc_tolower (c); } diff --git a/libguile/chars.h b/libguile/chars.h index 5bceea533..e68f06d21 100644 --- a/libguile/chars.h +++ b/libguile/chars.h @@ -3,7 +3,7 @@ #ifndef SCM_CHARS_H #define SCM_CHARS_H -/* Copyright (C) 1995,1996,2000,2001,2004, 2006, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,2000,2001,2004, 2006, 2008, 2009 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -28,9 +28,24 @@ /* Immediate Characters */ + +#ifndef SCM_WCHAR_DEFINED +typedef scm_t_int32 scm_t_wchar; +#define SCM_WCHAR_DEFINED +#endif + #define SCM_CHARP(x) (SCM_ITAG8(x) == scm_tc8_char) -#define SCM_CHAR(x) ((unsigned int)SCM_ITAG8_DATA(x)) -#define SCM_MAKE_CHAR(x) SCM_MAKE_ITAG8((scm_t_bits) (unsigned char) (x), scm_tc8_char) +#define SCM_CHAR(x) ((scm_t_wchar)SCM_ITAG8_DATA(x)) + +#define SCM_MAKE_CHAR(x) ({scm_t_int32 _x = (x); \ + _x < 0 \ + ? SCM_MAKE_ITAG8((scm_t_bits)(unsigned char)_x, scm_tc8_char) \ + : SCM_MAKE_ITAG8((scm_t_bits)_x, scm_tc8_char);}) + +#define SCM_CODEPOINT_MAX (0x10ffff) +#define SCM_IS_UNICODE_CHAR(c) \ + ((scm_t_wchar)(c)<=0xd7ff || \ + ((scm_t_wchar)(c)>=0xe000 && (scm_t_wchar)(c)<=SCM_CODEPOINT_MAX)) @@ -55,9 +70,9 @@ SCM_API SCM scm_char_to_integer (SCM chr); SCM_API SCM scm_integer_to_char (SCM n); SCM_API SCM scm_char_upcase (SCM chr); SCM_API SCM scm_char_downcase (SCM chr); -SCM_API int scm_c_upcase (unsigned int c); -SCM_API int scm_c_downcase (unsigned int c); -SCM_INTERNAL const char * scm_i_charname (SCM chr); +SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c); +SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c); +SCM_INTERNAL const char *scm_i_charname (SCM chr); SCM_INTERNAL SCM scm_i_charname_to_char (const char *charname, size_t charname_len); SCM_INTERNAL void scm_init_chars (void); diff --git a/libguile/numbers.c b/libguile/numbers.c index c7e098151..5f56b7a29 100644 --- a/libguile/numbers.c +++ b/libguile/numbers.c @@ -5863,6 +5863,14 @@ scm_i_range_error (SCM bad_val, SCM min, SCM max) #define SCM_FROM_TYPE_PROTO(arg) scm_from_uint32 (arg) #include "libguile/conv-uinteger.i.c" +#define TYPE scm_t_wchar +#define TYPE_MIN (scm_t_int32)-1 +#define TYPE_MAX (scm_t_int32)0x10ffff +#define SIZEOF_TYPE 4 +#define SCM_TO_TYPE_PROTO(arg) scm_to_wchar (arg) +#define SCM_FROM_TYPE_PROTO(arg) scm_from_wchar (arg) +#include "libguile/conv-integer.i.c" + #if SCM_HAVE_T_INT64 #define TYPE scm_t_int64 diff --git a/libguile/numbers.h b/libguile/numbers.h index 5bad4478b..f30f7d061 100644 --- a/libguile/numbers.h +++ b/libguile/numbers.h @@ -3,7 +3,7 @@ #ifndef SCM_NUMBERS_H #define SCM_NUMBERS_H -/* Copyright (C) 1995,1996,1998,2000,2001,2002,2003,2004,2005, 2006, 2008 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998,2000,2001,2002,2003,2004,2005, 2006, 2008, 2009 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -174,6 +174,11 @@ typedef struct scm_t_complex double imag; } scm_t_complex; +#ifndef SCM_WCHAR_DEFINED +typedef scm_t_int32 scm_t_wchar; +#define SCM_WCHAR_DEFINED +#endif + SCM_API SCM scm_exact_p (SCM x); @@ -322,6 +327,9 @@ SCM_API SCM scm_from_int32 (scm_t_int32 x); SCM_API scm_t_uint32 scm_to_uint32 (SCM x); SCM_API SCM scm_from_uint32 (scm_t_uint32 x); +SCM_API scm_t_wchar scm_to_wchar (SCM x); +SCM_API SCM scm_from_wchar (scm_t_wchar x); + #if SCM_HAVE_T_INT64 SCM_API scm_t_int64 scm_to_int64 (SCM x); diff --git a/libguile/print.c b/libguile/print.c index 604571820..1a5aebe1b 100644 --- a/libguile/print.c +++ b/libguile/print.c @@ -23,6 +23,7 @@ #endif #include +#include #include "libguile/_scm.h" #include "libguile/chars.h" @@ -436,7 +437,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) case scm_tc3_imm24: if (SCM_CHARP (exp)) { - long i = SCM_CHAR (exp); + scm_t_wchar i = SCM_CHAR (exp); const char *name; if (SCM_WRITINGP (pstate)) @@ -445,10 +446,30 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate) name = scm_i_charname (exp); if (name != NULL) scm_puts (name, port); - else if (i < 0 || i > '\177') - scm_intprint (i, 8, port); - else - scm_putc (i, port); + else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L + | UC_CATEGORY_MASK_M + | UC_CATEGORY_MASK_N + | UC_CATEGORY_MASK_P + | UC_CATEGORY_MASK_S)) + /* Print the character if is graphic character. */ + { + if (i<256) + { + /* Character is graphic. Print it. */ + scm_putc (i, port); + } + else + { + /* Character is graphic but unrepresentable in + this port's encoding. */ + scm_intprint (i, 8, port); + } + } + else + { + /* Character is a non-graphical character. */ + scm_intprint (i, 8, port); + } } else scm_putc (i, port); diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c index 726112c8a..ecafbebdd 100644 --- a/libguile/vm-i-system.c +++ b/libguile/vm-i-system.c @@ -175,6 +175,19 @@ VM_DEFINE_INSTRUCTION (16, make_char8, "make-char8", 1, 0, 1) NEXT; } +VM_DEFINE_INSTRUCTION (42, make_char32, "make-char32", 4, 0, 1) +{ + scm_t_wchar v = 0; + v += FETCH (); + v <<= 8; v += FETCH (); + v <<= 8; v += FETCH (); + v <<= 8; v += FETCH (); + PUSH (SCM_MAKE_CHAR (v)); + NEXT; +} + + + VM_DEFINE_INSTRUCTION (17, list, "list", 2, -1, 1) { unsigned h = FETCH (); diff --git a/module/language/assembly.scm b/module/language/assembly.scm index e7308ac6f..3a1da4fe3 100644 --- a/module/language/assembly.scm +++ b/module/language/assembly.scm @@ -131,7 +131,11 @@ (bytevector-s64-set! bv 0 x (endianness big)) bv)))) (else #f))) - ((char? x) `(make-char8 ,(char->integer x))) + ((char? x) + (cond ((<= (char->integer x) #xff) + `(make-char8 ,(char->integer x))) + (else + `(make-char32 ,(char->integer x))))) (else #f))) (define (assembly->object code) @@ -156,6 +160,11 @@ (endianness big))) ((make-char8 ,n) (integer->char n)) + ((make-char32 ,n1 ,n2 ,n3 ,n4) + (integer->char (+ (* n1 #x1000000) + (* n2 #x10000) + (* n3 #x100) + n4))) ((load-string ,s) s) ((load-symbol ,s) (string->symbol s)) ((load-keyword ,s) (symbol->keyword (string->symbol s))) diff --git a/module/language/assembly/compile-bytecode.scm b/module/language/assembly/compile-bytecode.scm index bf6c5f7b5..bed0fb2dc 100644 --- a/module/language/assembly/compile-bytecode.scm +++ b/module/language/assembly/compile-bytecode.scm @@ -122,6 +122,7 @@ ;; meets the alignment requirements of `scm_objcode'. See ;; `scm_c_make_objcode_slice ()'. (write-bytecode meta write get-addr '())))) + ((make-char32 ,x) (write-uint32-be x)) ((load-unsigned-integer ,str) (write-loader str)) ((load-integer ,str) (write-loader str)) ((load-number ,str) (write-loader str))