1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Add R6RS character names

R6RS adds new names for some of the control characters.

* libguile/chars.c (scm_r6rs_charnames, scm_r6rs_charnums)
  (SCM_N_R6RS_CHARNAMES): new character name constants
  (scm_alt_charnames, scm_alt_charnums): modified to remove duplicates
  (scm_i_charname, scm_i_charname_to_char): use new constants

* test-suite/tests/chars.test (R5RS character names, R6RS character names):
  new tests

* doc/ref/api-data.texi (Characters): updated
This commit is contained in:
Michael Gran 2010-01-10 15:08:19 -08:00
parent f39ede0067
commit 15b6a6b284
3 changed files with 75 additions and 24 deletions

View file

@ -1688,11 +1688,28 @@ the backslash of @code{#\}.
Many of the non-printing characters, such as whitespace characters and Many of the non-printing characters, such as whitespace characters and
control characters, also have names. control characters, also have names.
The most commonly used non-printing characters are space and The most commonly used non-printing characters have long character
newline. Their character names are @code{#\space} and names, described in the table below.
@code{#\newline}. There are also names for all of the ``C0 control
characters'' (those with code points below 32). The following table @multitable {@code{#\backspace}} {Preferred}
describes the names for each character. @item Character Name @tab Codepoint
@item @code{#\nul} @tab U+0000
@item @code{#\alarm} @tab u+0007
@item @code{#\backspace} @tab U+0008
@item @code{#\tab} @tab U+0009
@item @code{#\linefeed} @tab U+000A
@item @code{#\newline} @tab U+000A
@item @code{#\vtab} @tab U+000B
@item @code{#\page} @tab U+000C
@item @code{#\return} @tab U+000D
@item @code{#\esc} @tab U+001B
@item @code{#\space} @tab U+0020
@item @code{#\delete} @tab U+007F
@end multitable
There are also short names for all of the ``C0 control characters''
(those with code points below 32). The following table lists the short
name for each character.
@multitable @columnfractions .25 .25 .25 .25 @multitable @columnfractions .25 .25 .25 .25
@item 0 = @code{#\nul} @item 0 = @code{#\nul}
@ -1730,24 +1747,16 @@ describes the names for each character.
@item 32 = @code{#\sp} @item 32 = @code{#\sp}
@end multitable @end multitable
The ``delete'' character (code point U+007F) may be referred to with the The short name for the ``delete'' character (code point U+007F) is
name @code{#\del}. @code{#\del}.
One might note that the space character has two names -- There are also a few alternative names left over for compatibility with
@code{#\space} and @code{#\sp} -- as does the newline character. previous versions of Guile.
Several other non-printing characters have more than one name, for the
sake of compatibility with previous versions.
@multitable {@code{#\backspace}} {Preferred} @multitable {@code{#\backspace}} {Preferred}
@item Alternate @tab Standard @item Alternate @tab Standard
@item @code{#\sp} @tab @code{#\space}
@item @code{#\nl} @tab @code{#\newline} @item @code{#\nl} @tab @code{#\newline}
@item @code{#\lf} @tab @code{#\newline} @item @code{#\np} @tab @code{#\page}
@item @code{#\tab} @tab @code{#\ht}
@item @code{#\backspace} @tab @code{#\bs}
@item @code{#\return} @tab @code{#\cr}
@item @code{#\page} @tab @code{#\ff}
@item @code{#\np} @tab @code{#\ff}
@item @code{#\null} @tab @code{#\nul} @item @code{#\null} @tab @code{#\nul}
@end multitable @end multitable

View file

@ -536,11 +536,24 @@ static const char *const scm_r5rs_charnames[] = {
}; };
static const scm_t_uint32 const scm_r5rs_charnums[] = { static const scm_t_uint32 const scm_r5rs_charnums[] = {
0x20, 0x0A 0x20, 0x0a
}; };
#define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *)) #define SCM_N_R5RS_CHARNAMES (sizeof (scm_r5rs_charnames) / sizeof (char *))
static const char *const scm_r6rs_charnames[] = {
"nul", "alarm", "backspace", "tab", "linefeed", "vtab", "page",
"return", "esc", "delete"
/* 'space' and 'newline' are already included from the R5RS list. */
};
static const scm_t_uint32 const scm_r6rs_charnums[] = {
0x00, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c,
0x0d, 0x1b, 0x7f
};
#define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *))
/* The abbreviated names for control characters. */ /* The abbreviated names for control characters. */
static const char *const scm_C0_control_charnames[] = { static const char *const scm_C0_control_charnames[] = {
/* C0 controls */ /* C0 controls */
@ -562,11 +575,11 @@ static const scm_t_uint32 const scm_C0_control_charnums[] = {
#define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *)) #define SCM_N_C0_CONTROL_CHARNAMES (sizeof (scm_C0_control_charnames) / sizeof (char *))
static const char *const scm_alt_charnames[] = { static const char *const scm_alt_charnames[] = {
"null", "backspace", "tab", "nl", "newline", "np", "page", "return", "null", "nl", "np"
}; };
static const scm_t_uint32 const scm_alt_charnums[] = { static const scm_t_uint32 const scm_alt_charnums[] = {
0x00, 0x08, 0x09, 0x0a, 0x0a, 0x0c, 0x0c, 0x0d 0x00, 0x0a, 0x0c
}; };
#define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *)) #define SCM_N_ALT_CHARNAMES (sizeof (scm_alt_charnames) / sizeof (char *))
@ -583,6 +596,10 @@ scm_i_charname (SCM chr)
if (scm_r5rs_charnums[c] == i) if (scm_r5rs_charnums[c] == i)
return scm_r5rs_charnames[c]; return scm_r5rs_charnames[c];
for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
if (scm_r6rs_charnums[c] == i)
return scm_r6rs_charnames[c];
for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
if (scm_C0_control_charnums[c] == i) if (scm_C0_control_charnums[c] == i)
return scm_C0_control_charnames[c]; return scm_C0_control_charnames[c];
@ -602,14 +619,21 @@ scm_i_charname_to_char (const char *charname, size_t charname_len)
{ {
size_t c; size_t c;
/* The R5RS charnames. These are supposed to be case /* The R5RS charnames. These are supposed to be case insensitive. */
insensitive. */
for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++) for (c = 0; c < SCM_N_R5RS_CHARNAMES; c++)
if ((strlen (scm_r5rs_charnames[c]) == charname_len) if ((strlen (scm_r5rs_charnames[c]) == charname_len)
&& (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len))) && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len)))
return SCM_MAKE_CHAR (scm_r5rs_charnums[c]); return SCM_MAKE_CHAR (scm_r5rs_charnums[c]);
/* Then come the controls. These are not case sensitive. */ /* The R6RS charnames. R6RS says that these should be case-sensitive. They
are left as case-insensitive to avoid confusion. */
for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++)
if ((strlen (scm_r6rs_charnames[c]) == charname_len)
&& (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len)))
return SCM_MAKE_CHAR (scm_r6rs_charnums[c]);
/* Then come the controls. By Guile convention, these are not case
sensitive. */
for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++)
if ((strlen (scm_C0_control_charnames[c]) == charname_len) if ((strlen (scm_C0_control_charnames[c]) == charname_len)
&& (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len))) && (!strncasecmp (scm_C0_control_charnames[c], charname, charname_len)))

View file

@ -258,6 +258,24 @@
(with-test-prefix "charnames" (with-test-prefix "charnames"
(pass-if "R5RS character names"
(and (eqv? #\space (integer->char #x20))
(eqv? #\newline (integer->char #x0A))))
(pass-if "R6RS character names"
(and (eqv? #\nul (integer->char #x00))
(eqv? #\alarm (integer->char #x07))
(eqv? #\backspace (integer->char #x08))
(eqv? #\tab (integer->char #x09))
(eqv? #\linefeed (integer->char #x0A))
(eqv? #\newline (integer->char #x0A))
(eqv? #\vtab (integer->char #x0B))
(eqv? #\page (integer->char #x0C))
(eqv? #\return (integer->char #x0D))
(eqv? #\esc (integer->char #x1B))
(eqv? #\space (integer->char #x20))
(eqv? #\delete (integer->char #x7F))))
(pass-if "R5RS character names are case insensitive" (pass-if "R5RS character names are case insensitive"
(and (eqv? #\space #\ ) (and (eqv? #\space #\ )
(eqv? #\SPACE #\ ) (eqv? #\SPACE #\ )