mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
Add R6RS character names
R6RS adds new names for some of the control characters. * libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums) (SCM_N_R6RS_CHARNAMES): new character name constants (scm_alt_charnames, scm_alt_charnums): modified to remove duplicates (scm_i_charname, scm_i_charname_to_char): use new constants * test-suite/tests/chars.test (R5RS character names, R6RS character names): new tests * doc/ref/api-data.texi (Characters): updated
This commit is contained in:
parent
f39ede0067
commit
15b6a6b284
3 changed files with 75 additions and 24 deletions
|
@ -1688,11 +1688,28 @@ the backslash of @code{#\}.
|
|||
Many of the non-printing characters, such as whitespace characters and
|
||||
control characters, also have names.
|
||||
|
||||
The most commonly used non-printing characters are space and
|
||||
newline. Their character names are @code{#\space} and
|
||||
@code{#\newline}. There are also names for all of the ``C0 control
|
||||
characters'' (those with code points below 32). The following table
|
||||
describes the names for each character.
|
||||
The most commonly used non-printing characters have long character
|
||||
names, described in the table below.
|
||||
|
||||
@multitable {@code{#\backspace}} {Preferred}
|
||||
@item Character Name @tab Codepoint
|
||||
@item @code{#\nul} @tab U+0000
|
||||
@item @code{#\alarm} @tab u+0007
|
||||
@item @code{#\backspace} @tab U+0008
|
||||
@item @code{#\tab} @tab U+0009
|
||||
@item @code{#\linefeed} @tab U+000A
|
||||
@item @code{#\newline} @tab U+000A
|
||||
@item @code{#\vtab} @tab U+000B
|
||||
@item @code{#\page} @tab U+000C
|
||||
@item @code{#\return} @tab U+000D
|
||||
@item @code{#\esc} @tab U+001B
|
||||
@item @code{#\space} @tab U+0020
|
||||
@item @code{#\delete} @tab U+007F
|
||||
@end multitable
|
||||
|
||||
There are also short names for all of the ``C0 control characters''
|
||||
(those with code points below 32). The following table lists the short
|
||||
name for each character.
|
||||
|
||||
@multitable @columnfractions .25 .25 .25 .25
|
||||
@item 0 = @code{#\nul}
|
||||
|
@ -1730,24 +1747,16 @@ describes the names for each character.
|
|||
@item 32 = @code{#\sp}
|
||||
@end multitable
|
||||
|
||||
The ``delete'' character (code point U+007F) may be referred to with the
|
||||
name @code{#\del}.
|
||||
The short name for the ``delete'' character (code point U+007F) is
|
||||
@code{#\del}.
|
||||
|
||||
One might note that the space character has two names --
|
||||
@code{#\space} and @code{#\sp} -- as does the newline character.
|
||||
Several other non-printing characters have more than one name, for the
|
||||
sake of compatibility with previous versions.
|
||||
There are also a few alternative names left over for compatibility with
|
||||
previous versions of Guile.
|
||||
|
||||
@multitable {@code{#\backspace}} {Preferred}
|
||||
@item Alternate @tab Standard
|
||||
@item @code{#\sp} @tab @code{#\space}
|
||||
@item @code{#\nl} @tab @code{#\newline}
|
||||
@item @code{#\lf} @tab @code{#\newline}
|
||||
@item @code{#\tab} @tab @code{#\ht}
|
||||
@item @code{#\backspace} @tab @code{#\bs}
|
||||
@item @code{#\return} @tab @code{#\cr}
|
||||
@item @code{#\page} @tab @code{#\ff}
|
||||
@item @code{#\np} @tab @code{#\ff}
|
||||
@item @code{#\np} @tab @code{#\page}
|
||||
@item @code{#\null} @tab @code{#\nul}
|
||||
@end multitable
|
||||
|
||||
|
|
|
@ -536,11 +536,24 @@ static const char *const scm_r5rs_charnames[] = {
|
|||
};
|
||||
|
||||
static const scm_t_uint32 const scm_r5rs_charnums[] = {
|
||||
0x20, 0x0A
|
||||
0x20, 0x0a
|
||||
};
|
||||
|
||||
#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
|
||||
|
||||
static const char *const scm_r6rs_charnames[] = {
|
||||
"nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
|
||||
"return", "esc", "delete"
|
||||
/* 'space' and 'newline' are already included from the R5RS list. */
|
||||
};
|
||||
|
||||
static const scm_t_uint32 const scm_r6rs_charnums[] = {
|
||||
0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
|
||||
0x0d, 0x1b, 0x7f
|
||||
};
|
||||
|
||||
#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
|
||||
|
||||
/* The abbreviated names for control characters. */
|
||||
static const char *const scm_C0_control_charnames[] = {
|
||||
/* C0 controls */
|
||||
|
@ -562,11 +575,11 @@ static const scm_t_uint32 const scm_C0_control_charnums[] = {
|
|||
#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
|
||||
|
||||
static const char *const scm_alt_charnames[] = {
|
||||
"null", "backspace", "tab", "nl", "newline", "np", "page", "return",
|
||||
"null", "nl", "np"
|
||||
};
|
||||
|
||||
static const scm_t_uint32 const scm_alt_charnums[] = {
|
||||
0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d
|
||||
0x00, 0x0a, 0x0c
|
||||
};
|
||||
|
||||
#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
|
||||
|
@ -583,6 +596,10 @@ scm_i_charname (SCM chr)
|
|||
if (scm_r5rs_charnums[c] == i)
|
||||
return scm_r5rs_charnames[c];
|
||||
|
||||
for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
|
||||
if (scm_r6rs_charnums[c] == i)
|
||||
return scm_r6rs_charnames[c];
|
||||
|
||||
for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
|
||||
if (scm_C0_control_charnums[c] == i)
|
||||
return scm_C0_control_charnames[c];
|
||||
|
@ -602,14 +619,21 @@ scm_i_charname_to_char (const char *charname, size_t charname_len)
|
|||
{
|
||||
size_t c;
|
||||
|
||||
/* The R5RS charnames. These are supposed to be case
|
||||
insensitive. */
|
||||
/* The R5RS charnames. These are supposed to be case insensitive. */
|
||||
for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
|
||||
if ((strlen (scm_r5rs_charnames[c]) == charname_len)
|
||||
&& (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
|
||||
return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
|
||||
|
||||
/* Then come the controls. These are not case sensitive. */
|
||||
/* The R6RS charnames. R6RS says that these should be case-sensitive. They
|
||||
are left as case-insensitive to avoid confusion. */
|
||||
for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
|
||||
if ((strlen (scm_r6rs_charnames[c]) == charname_len)
|
||||
&& (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
|
||||
return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
|
||||
|
||||
/* Then come the controls. By Guile convention, these are not case
|
||||
sensitive. */
|
||||
for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
|
||||
if ((strlen (scm_C0_control_charnames[c]) == charname_len)
|
||||
&& (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))
|
||||
|
|
|
@ -258,6 +258,24 @@
|
|||
|
||||
(with-test-prefix "charnames"
|
||||
|
||||
(pass-if "R5RS character names"
|
||||
(and (eqv? #\space (integer->char #x20))
|
||||
(eqv? #\newline (integer->char #x0A))))
|
||||
|
||||
(pass-if "R6RS character names"
|
||||
(and (eqv? #\nul (integer->char #x00))
|
||||
(eqv? #\alarm (integer->char #x07))
|
||||
(eqv? #\backspace (integer->char #x08))
|
||||
(eqv? #\tab (integer->char #x09))
|
||||
(eqv? #\linefeed (integer->char #x0A))
|
||||
(eqv? #\newline (integer->char #x0A))
|
||||
(eqv? #\vtab (integer->char #x0B))
|
||||
(eqv? #\page (integer->char #x0C))
|
||||
(eqv? #\return (integer->char #x0D))
|
||||
(eqv? #\esc (integer->char #x1B))
|
||||
(eqv? #\space (integer->char #x20))
|
||||
(eqv? #\delete (integer->char #x7F))))
|
||||
|
||||
(pass-if "R5RS character names are case insensitive"
|
||||
(and (eqv? #\space #\ )
|
||||
(eqv? #\SPACE #\ )
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue