mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-30 03:40:34 +02:00
Add 32-bit characters
This adds the 32-bit standalone characters. Strings are still 8-bit. Characters larger than 8-bit can only be entered or displayed in octal format at this point. At this point, the terminal's display encoding is expected to be Latin-1. * module/language/assembly/compile-bytecode.scm (write-bytecode): add 32-bit char * module/language/assembly.scm (object->assembly): add 32-bit char (assembly->object): add 32-bit char * libguile/vm-i-system.c (make-char32): new op * libguile/print.c (iprin1): print 32-bit char * libguile/numbers.h: add type scm_t_wchar * libguile/numbers.c: add type scm_t_wchar * libguile/chars.h: new type scm_t_wchar (SCM_CODEPOINT_MAX): new (SCM_IS_UNICODE_CHAR): new (SCM_MAKE_CHAR): operate on 32-bit char * libguile/chars.c: comparison operators now use Unicode codepoints (scm_c_upcase): now receives and returns scm_t_wchar (scm_c_downcase): now receives and returns scm_t_wchar
This commit is contained in:
parent
77332b21a0
commit
904a78f11d
8 changed files with 126 additions and 43 deletions
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -24,6 +24,8 @@
|
|||
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <unicase.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/validate.h"
|
||||
|
||||
|
@ -55,7 +57,7 @@ SCM_DEFINE1 (scm_char_eq_p, "char=?", scm_tc7_rpsubr,
|
|||
|
||||
SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is less than @var{y} in the ASCII sequence,\n"
|
||||
"Return @code{#t} iff @var{x} is less than @var{y} in the Unicode sequence,\n"
|
||||
"else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_less_p
|
||||
{
|
||||
|
@ -68,7 +70,7 @@ SCM_DEFINE1 (scm_char_less_p, "char<?", scm_tc7_rpsubr,
|
|||
SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is less than or equal to @var{y} in the\n"
|
||||
"ASCII sequence, else @code{#f}.")
|
||||
"Unicode sequence, else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_leq_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -79,7 +81,7 @@ SCM_DEFINE1 (scm_char_leq_p, "char<=?", scm_tc7_rpsubr,
|
|||
|
||||
SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is greater than @var{y} in the ASCII\n"
|
||||
"Return @code{#t} iff @var{x} is greater than @var{y} in the Unicode\n"
|
||||
"sequence, else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_gr_p
|
||||
{
|
||||
|
@ -92,7 +94,7 @@ SCM_DEFINE1 (scm_char_gr_p, "char>?", scm_tc7_rpsubr,
|
|||
SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n"
|
||||
"ASCII sequence, else @code{#f}.")
|
||||
"Unicode sequence, else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_geq_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -104,7 +106,7 @@ SCM_DEFINE1 (scm_char_geq_p, "char>=?", scm_tc7_rpsubr,
|
|||
SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is the same character as @var{y} ignoring\n"
|
||||
"case, else @code{#f}.")
|
||||
"case, else @code{#f}. Case is locale free and not context sensitive.")
|
||||
#define FUNC_NAME s_scm_char_ci_eq_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -115,8 +117,9 @@ SCM_DEFINE1 (scm_char_ci_eq_p, "char-ci=?", scm_tc7_rpsubr,
|
|||
|
||||
SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is less than @var{y} in the ASCII sequence\n"
|
||||
"ignoring case, else @code{#f}.")
|
||||
"Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
|
||||
"than the Unicode uppercase form @var{y} in the Unicode sequence,\n"
|
||||
"else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_ci_less_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -127,8 +130,9 @@ SCM_DEFINE1 (scm_char_ci_less_p, "char-ci<?", scm_tc7_rpsubr,
|
|||
|
||||
SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is less than or equal to @var{y} in the\n"
|
||||
"ASCII sequence ignoring case, else @code{#f}.")
|
||||
"Return @code{#t} iff the Unicode uppercase form of @var{x} is less\n"
|
||||
"than or equal to the Unicode uppercase form of @var{y} in the\n"
|
||||
"Unicode sequence, else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_ci_leq_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -139,8 +143,9 @@ SCM_DEFINE1 (scm_char_ci_leq_p, "char-ci<=?", scm_tc7_rpsubr,
|
|||
|
||||
SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is greater than @var{y} in the ASCII\n"
|
||||
"sequence ignoring case, else @code{#f}.")
|
||||
"Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
|
||||
"than the Unicode uppercase form of @var{y} in the Unicode\n"
|
||||
"sequence, else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_ci_gr_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -151,8 +156,9 @@ SCM_DEFINE1 (scm_char_ci_gr_p, "char-ci>?", scm_tc7_rpsubr,
|
|||
|
||||
SCM_DEFINE1 (scm_char_ci_geq_p, "char-ci>=?", scm_tc7_rpsubr,
|
||||
(SCM x, SCM y),
|
||||
"Return @code{#t} iff @var{x} is greater than or equal to @var{y} in the\n"
|
||||
"ASCII sequence ignoring case, else @code{#f}.")
|
||||
"Return @code{#t} iff the Unicode uppercase form of @var{x} is greater\n"
|
||||
"than or equal to the Unicode uppercase form of @var{y} in the\n"
|
||||
"Unicode sequence, else @code{#f}.")
|
||||
#define FUNC_NAME s_scm_char_ci_geq_p
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, x);
|
||||
|
@ -233,7 +239,7 @@ SCM_DEFINE (scm_char_to_integer, "char->integer", 1, 0, 0,
|
|||
#define FUNC_NAME s_scm_char_to_integer
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, chr);
|
||||
return scm_from_ulong (SCM_CHAR(chr));
|
||||
return scm_from_uint32 (SCM_CHAR(chr));
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -244,7 +250,15 @@ SCM_DEFINE (scm_integer_to_char, "integer->char", 1, 0, 0,
|
|||
"Return the character at position @var{n} in the ASCII sequence.")
|
||||
#define FUNC_NAME s_scm_integer_to_char
|
||||
{
|
||||
return SCM_MAKE_CHAR (scm_to_uchar (n));
|
||||
scm_t_wchar cn;
|
||||
|
||||
cn = scm_to_wchar (n);
|
||||
|
||||
/* Avoid the surrogates. */
|
||||
if (!SCM_IS_UNICODE_CHAR (cn))
|
||||
scm_out_of_range (FUNC_NAME, n);
|
||||
|
||||
return SCM_MAKE_CHAR (cn);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -255,7 +269,7 @@ SCM_DEFINE (scm_char_upcase, "char-upcase", 1, 0, 0,
|
|||
#define FUNC_NAME s_scm_char_upcase
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, chr);
|
||||
return SCM_MAKE_CHAR (toupper (SCM_CHAR (chr)));
|
||||
return SCM_MAKE_CHAR (scm_c_upcase (SCM_CHAR (chr)));
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -266,7 +280,7 @@ SCM_DEFINE (scm_char_downcase, "char-downcase", 1, 0, 0,
|
|||
#define FUNC_NAME s_scm_char_downcase
|
||||
{
|
||||
SCM_VALIDATE_CHAR (1, chr);
|
||||
return SCM_MAKE_CHAR (tolower (SCM_CHAR(chr)));
|
||||
return SCM_MAKE_CHAR (scm_c_downcase (SCM_CHAR(chr)));
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -279,23 +293,17 @@ TODO: change name to scm_i_.. ? --hwn
|
|||
*/
|
||||
|
||||
|
||||
int
|
||||
scm_c_upcase (unsigned int c)
|
||||
scm_t_wchar
|
||||
scm_c_upcase (scm_t_wchar c)
|
||||
{
|
||||
if (c <= UCHAR_MAX)
|
||||
return toupper (c);
|
||||
else
|
||||
return c;
|
||||
return uc_toupper (c);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
scm_c_downcase (unsigned int c)
|
||||
scm_t_wchar
|
||||
scm_c_downcase (scm_t_wchar c)
|
||||
{
|
||||
if (c <= UCHAR_MAX)
|
||||
return tolower (c);
|
||||
else
|
||||
return c;
|
||||
return uc_tolower (c);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#ifndef SCM_CHARS_H
|
||||
#define SCM_CHARS_H
|
||||
|
||||
/* Copyright (C) 1995,1996,2000,2001,2004, 2006, 2008 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1995,1996,2000,2001,2004, 2006, 2008, 2009 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -28,9 +28,24 @@
|
|||
|
||||
/* Immediate Characters
|
||||
*/
|
||||
|
||||
#ifndef SCM_WCHAR_DEFINED
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
#define SCM_WCHAR_DEFINED
|
||||
#endif
|
||||
|
||||
#define SCM_CHARP(x) (SCM_ITAG8(x) == scm_tc8_char)
|
||||
#define SCM_CHAR(x) ((unsigned int)SCM_ITAG8_DATA(x))
|
||||
#define SCM_MAKE_CHAR(x) SCM_MAKE_ITAG8((scm_t_bits) (unsigned char) (x), scm_tc8_char)
|
||||
#define SCM_CHAR(x) ((scm_t_wchar)SCM_ITAG8_DATA(x))
|
||||
|
||||
#define SCM_MAKE_CHAR(x) ({scm_t_int32 _x = (x); \
|
||||
_x < 0 \
|
||||
? SCM_MAKE_ITAG8((scm_t_bits)(unsigned char)_x, scm_tc8_char) \
|
||||
: SCM_MAKE_ITAG8((scm_t_bits)_x, scm_tc8_char);})
|
||||
|
||||
#define SCM_CODEPOINT_MAX (0x10ffff)
|
||||
#define SCM_IS_UNICODE_CHAR(c) \
|
||||
((scm_t_wchar)(c)<=0xd7ff || \
|
||||
((scm_t_wchar)(c)>=0xe000 && (scm_t_wchar)(c)<=SCM_CODEPOINT_MAX))
|
||||
|
||||
|
||||
|
||||
|
@ -55,9 +70,9 @@ SCM_API SCM scm_char_to_integer (SCM chr);
|
|||
SCM_API SCM scm_integer_to_char (SCM n);
|
||||
SCM_API SCM scm_char_upcase (SCM chr);
|
||||
SCM_API SCM scm_char_downcase (SCM chr);
|
||||
SCM_API int scm_c_upcase (unsigned int c);
|
||||
SCM_API int scm_c_downcase (unsigned int c);
|
||||
SCM_INTERNAL const char * scm_i_charname (SCM chr);
|
||||
SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c);
|
||||
SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c);
|
||||
SCM_INTERNAL const char *scm_i_charname (SCM chr);
|
||||
SCM_INTERNAL SCM scm_i_charname_to_char (const char *charname,
|
||||
size_t charname_len);
|
||||
SCM_INTERNAL void scm_init_chars (void);
|
||||
|
|
|
@ -5863,6 +5863,14 @@ scm_i_range_error (SCM bad_val, SCM min, SCM max)
|
|||
#define SCM_FROM_TYPE_PROTO(arg) scm_from_uint32 (arg)
|
||||
#include "libguile/conv-uinteger.i.c"
|
||||
|
||||
#define TYPE scm_t_wchar
|
||||
#define TYPE_MIN (scm_t_int32)-1
|
||||
#define TYPE_MAX (scm_t_int32)0x10ffff
|
||||
#define SIZEOF_TYPE 4
|
||||
#define SCM_TO_TYPE_PROTO(arg) scm_to_wchar (arg)
|
||||
#define SCM_FROM_TYPE_PROTO(arg) scm_from_wchar (arg)
|
||||
#include "libguile/conv-integer.i.c"
|
||||
|
||||
#if SCM_HAVE_T_INT64
|
||||
|
||||
#define TYPE scm_t_int64
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#ifndef SCM_NUMBERS_H
|
||||
#define SCM_NUMBERS_H
|
||||
|
||||
/* Copyright (C) 1995,1996,1998,2000,2001,2002,2003,2004,2005, 2006, 2008 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 1995,1996,1998,2000,2001,2002,2003,2004,2005, 2006, 2008, 2009 Free Software Foundation, Inc.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public License
|
||||
|
@ -174,6 +174,11 @@ typedef struct scm_t_complex
|
|||
double imag;
|
||||
} scm_t_complex;
|
||||
|
||||
#ifndef SCM_WCHAR_DEFINED
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
#define SCM_WCHAR_DEFINED
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
SCM_API SCM scm_exact_p (SCM x);
|
||||
|
@ -322,6 +327,9 @@ SCM_API SCM scm_from_int32 (scm_t_int32 x);
|
|||
SCM_API scm_t_uint32 scm_to_uint32 (SCM x);
|
||||
SCM_API SCM scm_from_uint32 (scm_t_uint32 x);
|
||||
|
||||
SCM_API scm_t_wchar scm_to_wchar (SCM x);
|
||||
SCM_API SCM scm_from_wchar (scm_t_wchar x);
|
||||
|
||||
#if SCM_HAVE_T_INT64
|
||||
|
||||
SCM_API scm_t_int64 scm_to_int64 (SCM x);
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <unictype.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/chars.h"
|
||||
|
@ -436,7 +437,7 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
case scm_tc3_imm24:
|
||||
if (SCM_CHARP (exp))
|
||||
{
|
||||
long i = SCM_CHAR (exp);
|
||||
scm_t_wchar i = SCM_CHAR (exp);
|
||||
const char *name;
|
||||
|
||||
if (SCM_WRITINGP (pstate))
|
||||
|
@ -445,10 +446,30 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
name = scm_i_charname (exp);
|
||||
if (name != NULL)
|
||||
scm_puts (name, port);
|
||||
else if (i < 0 || i > '\177')
|
||||
scm_intprint (i, 8, port);
|
||||
else
|
||||
scm_putc (i, port);
|
||||
else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L
|
||||
| UC_CATEGORY_MASK_M
|
||||
| UC_CATEGORY_MASK_N
|
||||
| UC_CATEGORY_MASK_P
|
||||
| UC_CATEGORY_MASK_S))
|
||||
/* Print the character if is graphic character. */
|
||||
{
|
||||
if (i<256)
|
||||
{
|
||||
/* Character is graphic. Print it. */
|
||||
scm_putc (i, port);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Character is a non-graphical character. */
|
||||
scm_intprint (i, 8, port);
|
||||
}
|
||||
}
|
||||
else
|
||||
scm_putc (i, port);
|
||||
|
|
|
@ -175,6 +175,19 @@ VM_DEFINE_INSTRUCTION (16, make_char8, "make-char8", 1, 0, 1)
|
|||
NEXT;
|
||||
}
|
||||
|
||||
VM_DEFINE_INSTRUCTION (42, make_char32, "make-char32", 4, 0, 1)
|
||||
{
|
||||
scm_t_wchar v = 0;
|
||||
v += FETCH ();
|
||||
v <<= 8; v += FETCH ();
|
||||
v <<= 8; v += FETCH ();
|
||||
v <<= 8; v += FETCH ();
|
||||
PUSH (SCM_MAKE_CHAR (v));
|
||||
NEXT;
|
||||
}
|
||||
|
||||
|
||||
|
||||
VM_DEFINE_INSTRUCTION (17, list, "list", 2, -1, 1)
|
||||
{
|
||||
unsigned h = FETCH ();
|
||||
|
|
|
@ -131,7 +131,11 @@
|
|||
(bytevector-s64-set! bv 0 x (endianness big))
|
||||
bv))))
|
||||
(else #f)))
|
||||
((char? x) `(make-char8 ,(char->integer x)))
|
||||
((char? x)
|
||||
(cond ((<= (char->integer x) #xff)
|
||||
`(make-char8 ,(char->integer x)))
|
||||
(else
|
||||
`(make-char32 ,(char->integer x)))))
|
||||
(else #f)))
|
||||
|
||||
(define (assembly->object code)
|
||||
|
@ -156,6 +160,11 @@
|
|||
(endianness big)))
|
||||
((make-char8 ,n)
|
||||
(integer->char n))
|
||||
((make-char32 ,n1 ,n2 ,n3 ,n4)
|
||||
(integer->char (+ (* n1 #x1000000)
|
||||
(* n2 #x10000)
|
||||
(* n3 #x100)
|
||||
n4)))
|
||||
((load-string ,s) s)
|
||||
((load-symbol ,s) (string->symbol s))
|
||||
((load-keyword ,s) (symbol->keyword (string->symbol s)))
|
||||
|
|
|
@ -122,6 +122,7 @@
|
|||
;; meets the alignment requirements of `scm_objcode'. See
|
||||
;; `scm_c_make_objcode_slice ()'.
|
||||
(write-bytecode meta write get-addr '()))))
|
||||
((make-char32 ,x) (write-uint32-be x))
|
||||
((load-unsigned-integer ,str) (write-loader str))
|
||||
((load-integer ,str) (write-loader str))
|
||||
((load-number ,str) (write-loader str))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue