mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-06-05 11:40:20 +02:00
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and a conversion failure strategy. These properties have getters and setters. The new properties are used to convert any locale text to/from the internal representation of strings. If unspecified, ports use a default value. The default value of these properties is held in a fluid. The default character encoding can be modified by calling setlocale. ISO-8859-1 is treated specially. Since it is a native encoding of strings, it can be processed more quickly. Source code is assumed to be ISO-8859-1 unless otherwise specified. The encoding of a source code file can be given as 'coding: XXXXX' in a magic comment at the top of a file. The C functions that deal with encoding often use a null pointer as shorthand for the native Latin-1 encoding, for efficiency's sake. * test-suite/tests/encoding-iso88591.test: new tests * test-suite/tests/encoding-iso88597.test: new tests * test-suite/tests/encoding-utf8.test: new tests * test-suite/tests/encoding-escapes.test: new tests * test-suite/tests/numbers.test: declare 'binary' encoding * test-suite/tests/ports.test: declare 'binary' encoding * test-suite/tests/r6rs-ports.test: declare 'binary' encoding * module/system/base/compile.scm (compile-file): use source-code file's self-declared encoding when compiling files * libguile/strports.c: store string ports in locale encoding (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector) (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector): new functions * libguile/strings.h: new declaration for scm_i_string_contains_char * libguile/strings.c (scm_i_string_contains_char): new function (scm_from_stringn, scm_to_stringn): use NULL for Latin-1 (scm_from_locale_stringn, scm_to_locale_stringn): respect character encoding of input and output ports * libguile/read.h: declaration for scm_scan_for_encoding * libguile/read.c: (read_token): now takes scheme string instead of C string/length (read_complete_token): new function (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol) (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment) (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector) (scm_read_scsh_block_comment, scm_read_commented_expression) (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart) (scm_read_expression): use scm_t_wchar for char type, use read_complete_token (scm_scan_for_encoding): new function to find a file's character encoding (scm_file_encoding): new function to find a port's character encoding * libguile/rdelim.c: don't unpack strings * libguile/print.h: declaration for modified function scm_i_charprint * libguile/print.c: use locale when printing characters and strings (scm_i_charprint): input parameter is now scm_t_wchar (scm_simple_format): don't unpack strings * libguile/posix.h: new declaration for scm_setbinary. * libguile/posix.c (scm_setlocale): set default and stdio port encodings based on the locale's character encoding (scm_setbinary): new function * libguile/ports.h (scm_t_port): add encoding and failed conversion handler to port type. Declarations for new or modified functions scm_getc, scm_unget_byte, scm_ungetc, scm_i_get_port_encoding, scm_i_set_port_encoding_x, scm_port_encoding, scm_set_port_encoding_x, scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x, scm_port_conversion_strategy, scm_set_port_conversion_strategy_x. * libguile/ports.c: assign the current ports to zero on startup so we can see if they've been set. (scm_current_input_port, scm_current_output_port, scm_current_error_port): return #f if the port is not yet initialized (scm_new_port_table_entry): set up a new port's encoding and illegal sequence handler based on the thread's current defaults (scm_i_remove_port): free port encoding name when port is removed (scm_i_mode_bits_n): now takes a scheme string instead of a c string and length. All callers changed. (SCM_MBCHAR_BUF_SIZE): new const (scm_getc): new function, since the scm_getc in inline.h is now scm_get_byte_or_eof. This pulls one codepoint from a port. (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding (scm_unget_byte): new function, incorportaing the low-level functionality of scm_ungetc (scm_ungetc): uses scm_unget_byte * libguile/numbers.h (scm_t_wchar): compilation order problem with scm_t_wchar being use in functions in multiple headers. Forward declare scm_t_wchar. * libguile/load.c (scm_primitive_load): scan for file encoding at top of file and use it to set the load port's encoding * libguile/inline.h (scm_get_byte_or_eof): new function incorporating most of the functionality of scm_getc. * libguile/fports.c (fport_fill_input): now returns scm_t_wchar * libguile/chars.h (scm_t_wchar): avoid compilation order problem with declaration of scm_t_wchar
This commit is contained in:
parent
9db8cf1634
commit
889975e51a
26 changed files with 1705 additions and 316 deletions
|
@ -24,7 +24,11 @@
|
|||
|
||||
|
||||
#include "libguile/__scm.h"
|
||||
#include "libguile/numbers.h"
|
||||
|
||||
#ifndef SCM_T_WCHAR_DEFINED
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
#define SCM_T_WCHAR_DEFINED
|
||||
#endif /* SCM_T_WCHAR_DEFINED */
|
||||
|
||||
|
||||
/* Immediate Characters
|
||||
|
|
|
@ -587,7 +587,7 @@ static void fport_flush (SCM port);
|
|||
|
||||
/* fill a port's read-buffer with a single read. returns the first
|
||||
char or EOF if end of file. */
|
||||
static int
|
||||
static scm_t_wchar
|
||||
fport_fill_input (SCM port)
|
||||
{
|
||||
long count;
|
||||
|
@ -601,7 +601,7 @@ fport_fill_input (SCM port)
|
|||
if (count == -1)
|
||||
scm_syserror ("fport_fill_input");
|
||||
if (count == 0)
|
||||
return EOF;
|
||||
return (scm_t_wchar) EOF;
|
||||
else
|
||||
{
|
||||
pt->read_pos = pt->read_buf;
|
||||
|
|
|
@ -86,7 +86,7 @@ SCM_API void scm_array_handle_set (scm_t_array_handle *h, ssize_t pos, SCM val);
|
|||
|
||||
SCM_API int scm_is_pair (SCM x);
|
||||
|
||||
SCM_API int scm_getc (SCM port);
|
||||
SCM_API int scm_get_byte_or_eof (SCM port);
|
||||
SCM_API void scm_putc (char c, SCM port);
|
||||
SCM_API void scm_puts (const char *str_data, SCM port);
|
||||
|
||||
|
@ -290,7 +290,7 @@ scm_is_pair (SCM x)
|
|||
SCM_C_EXTERN_INLINE
|
||||
#endif
|
||||
int
|
||||
scm_getc (SCM port)
|
||||
scm_get_byte_or_eof (SCM port)
|
||||
{
|
||||
int c;
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
@ -310,27 +310,6 @@ scm_getc (SCM port)
|
|||
|
||||
c = *(pt->read_pos++);
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\a':
|
||||
break;
|
||||
case '\b':
|
||||
SCM_DECCOL (port);
|
||||
break;
|
||||
case '\n':
|
||||
SCM_INCLINE (port);
|
||||
break;
|
||||
case '\r':
|
||||
SCM_ZEROCOL (port);
|
||||
break;
|
||||
case '\t':
|
||||
SCM_TABCOL (port);
|
||||
break;
|
||||
default:
|
||||
SCM_INCCOL (port);
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ SCM_DEFINE (scm_primitive_load, "primitive-load", 1, 0, 0,
|
|||
#define FUNC_NAME s_scm_primitive_load
|
||||
{
|
||||
SCM hook = *scm_loc_load_hook;
|
||||
char *encoding;
|
||||
SCM_VALIDATE_STRING (1, filename);
|
||||
if (scm_is_true (hook) && scm_is_false (scm_procedure_p (hook)))
|
||||
SCM_MISC_ERROR ("value of %load-hook is neither a procedure nor #f",
|
||||
|
@ -97,7 +98,15 @@ SCM_DEFINE (scm_primitive_load, "primitive-load", 1, 0, 0,
|
|||
SCM port = scm_open_file (filename, scm_from_locale_string ("r"));
|
||||
scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
|
||||
scm_i_dynwind_current_load_port (port);
|
||||
|
||||
encoding = scm_scan_for_encoding (port);
|
||||
if (encoding)
|
||||
{
|
||||
scm_i_set_port_encoding_x (port, encoding);
|
||||
free (encoding);
|
||||
}
|
||||
else
|
||||
/* The file has no encoding declaraed. We'll presume Latin-1. */
|
||||
scm_i_set_port_encoding_x (port, NULL);
|
||||
while (1)
|
||||
{
|
||||
SCM reader, form;
|
||||
|
|
|
@ -28,6 +28,11 @@
|
|||
#include "libguile/__scm.h"
|
||||
#include "libguile/print.h"
|
||||
|
||||
#ifndef SCM_T_WCHAR_DEFINED
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
#define SCM_T_WCHAR_DEFINED
|
||||
#endif /* SCM_T_WCHAR_DEFINED */
|
||||
|
||||
#if SCM_HAVE_FLOATINGPOINT_H
|
||||
# include <floatingpoint.h>
|
||||
#endif
|
||||
|
@ -174,7 +179,6 @@ typedef struct scm_t_complex
|
|||
double imag;
|
||||
} scm_t_complex;
|
||||
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
|
||||
|
||||
|
||||
|
|
557
libguile/ports.c
557
libguile/ports.c
|
@ -30,6 +30,9 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h> /* for chsize on mingw */
|
||||
#include <assert.h>
|
||||
#include <uniconv.h>
|
||||
#include <unistr.h>
|
||||
#include <striconveh.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/async.h"
|
||||
|
@ -51,6 +54,7 @@
|
|||
#include "libguile/vectors.h"
|
||||
#include "libguile/weaks.h"
|
||||
#include "libguile/fluids.h"
|
||||
#include "libguile/eq.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
|
@ -358,10 +362,10 @@ SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
|
|||
|
||||
/* Standard ports --- current input, output, error, and more(!). */
|
||||
|
||||
static SCM cur_inport_fluid;
|
||||
static SCM cur_outport_fluid;
|
||||
static SCM cur_errport_fluid;
|
||||
static SCM cur_loadport_fluid;
|
||||
static SCM cur_inport_fluid = 0;
|
||||
static SCM cur_outport_fluid = 0;
|
||||
static SCM cur_errport_fluid = 0;
|
||||
static SCM cur_loadport_fluid = 0;
|
||||
|
||||
SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
|
||||
(),
|
||||
|
@ -370,7 +374,10 @@ SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
|
|||
"returns the @dfn{standard input} in Unix and C terminology.")
|
||||
#define FUNC_NAME s_scm_current_input_port
|
||||
{
|
||||
return scm_fluid_ref (cur_inport_fluid);
|
||||
if (cur_inport_fluid)
|
||||
return scm_fluid_ref (cur_inport_fluid);
|
||||
else
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -382,7 +389,10 @@ SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
|
|||
"Unix and C terminology.")
|
||||
#define FUNC_NAME s_scm_current_output_port
|
||||
{
|
||||
return scm_fluid_ref (cur_outport_fluid);
|
||||
if (cur_outport_fluid)
|
||||
return scm_fluid_ref (cur_outport_fluid);
|
||||
else
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -392,7 +402,10 @@ SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
|
|||
"@dfn{standard error} in Unix and C terminology).")
|
||||
#define FUNC_NAME s_scm_current_error_port
|
||||
{
|
||||
return scm_fluid_ref (cur_errport_fluid);
|
||||
if (cur_errport_fluid)
|
||||
return scm_fluid_ref (cur_errport_fluid);
|
||||
else
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -509,10 +522,18 @@ scm_new_port_table_entry (scm_t_bits tag)
|
|||
|
||||
SCM z = scm_cons (SCM_EOL, SCM_EOL);
|
||||
scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
|
||||
const char *enc;
|
||||
|
||||
entry->file_name = SCM_BOOL_F;
|
||||
entry->rw_active = SCM_PORT_NEITHER;
|
||||
entry->port = z;
|
||||
/* Initialize this port with the thread's current default
|
||||
encoding. */
|
||||
if ((enc = scm_i_get_port_encoding (SCM_BOOL_F)) == NULL)
|
||||
entry->encoding = NULL;
|
||||
else
|
||||
entry->encoding = strdup (enc);
|
||||
entry->ilseq_handler = scm_i_get_conversion_strategy (SCM_BOOL_F);
|
||||
|
||||
SCM_SET_CELL_TYPE (z, tag);
|
||||
SCM_SETPTAB_ENTRY (z, entry);
|
||||
|
@ -549,6 +570,11 @@ scm_i_remove_port (SCM port)
|
|||
scm_t_port *p = SCM_PTAB_ENTRY (port);
|
||||
if (p->putback_buf)
|
||||
scm_gc_free (p->putback_buf, p->putback_buf_size, "putback buffer");
|
||||
if (p->encoding)
|
||||
{
|
||||
free (p->encoding);
|
||||
p->encoding = NULL;
|
||||
}
|
||||
scm_gc_free (p, sizeof (scm_t_port), "port");
|
||||
|
||||
SCM_SETPTAB_ENTRY (port, 0);
|
||||
|
@ -632,21 +658,22 @@ SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0,
|
|||
*/
|
||||
|
||||
static long
|
||||
scm_i_mode_bits_n (const char *modes, size_t n)
|
||||
scm_i_mode_bits_n (SCM modes)
|
||||
{
|
||||
return (SCM_OPN
|
||||
| (memchr (modes, 'r', n) || memchr (modes, '+', n) ? SCM_RDNG : 0)
|
||||
| ( memchr (modes, 'w', n)
|
||||
|| memchr (modes, 'a', n)
|
||||
|| memchr (modes, '+', n) ? SCM_WRTNG : 0)
|
||||
| (memchr (modes, '0', n) ? SCM_BUF0 : 0)
|
||||
| (memchr (modes, 'l', n) ? SCM_BUFLINE : 0));
|
||||
| (scm_i_string_contains_char (modes, 'r')
|
||||
|| scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
|
||||
| (scm_i_string_contains_char (modes, 'w')
|
||||
|| scm_i_string_contains_char (modes, 'a')
|
||||
|| scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
|
||||
| (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
|
||||
| (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
|
||||
}
|
||||
|
||||
long
|
||||
scm_mode_bits (char *modes)
|
||||
{
|
||||
return scm_i_mode_bits_n (modes, strlen (modes));
|
||||
return scm_i_mode_bits (scm_from_locale_string (modes));
|
||||
}
|
||||
|
||||
long
|
||||
|
@ -657,8 +684,7 @@ scm_i_mode_bits (SCM modes)
|
|||
if (!scm_is_string (modes))
|
||||
scm_wrong_type_arg_msg (NULL, 0, modes, "string");
|
||||
|
||||
bits = scm_i_mode_bits_n (scm_i_string_chars (modes),
|
||||
scm_i_string_length (modes));
|
||||
bits = scm_i_mode_bits_n (modes);
|
||||
scm_remember_upto_here_1 (modes);
|
||||
return bits;
|
||||
}
|
||||
|
@ -929,7 +955,7 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
|
|||
"characters are available, the end-of-file object is returned.")
|
||||
#define FUNC_NAME s_scm_read_char
|
||||
{
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
SCM_VALIDATE_OPINPORT (1, port);
|
||||
|
@ -940,6 +966,133 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
#define SCM_MBCHAR_BUF_SIZE (4)
|
||||
|
||||
/* Get one codepoint from a file, using the port's encoding. */
|
||||
scm_t_wchar
|
||||
scm_getc (SCM port)
|
||||
{
|
||||
int c;
|
||||
unsigned int bufcount = 0;
|
||||
char buf[SCM_MBCHAR_BUF_SIZE];
|
||||
scm_t_wchar codepoint = 0;
|
||||
scm_t_uint32 *u32;
|
||||
size_t u32len;
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
||||
c = scm_get_byte_or_eof (port);
|
||||
if (c == EOF)
|
||||
return (scm_t_wchar) EOF;
|
||||
|
||||
buf[0] = c;
|
||||
bufcount++;
|
||||
|
||||
if (pt->encoding == NULL)
|
||||
{
|
||||
/* The encoding is Latin-1: bytes are characters. */
|
||||
codepoint = buf[0];
|
||||
goto success;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
u32 = u32_conv_from_encoding (pt->encoding,
|
||||
(enum iconv_ilseq_handler) pt->ilseq_handler,
|
||||
buf, bufcount, NULL, NULL, &u32len);
|
||||
if (u32 == NULL || u32len == 0)
|
||||
{
|
||||
if (errno == ENOMEM)
|
||||
scm_memory_error ("Input decoding");
|
||||
|
||||
/* Otherwise errno is EILSEQ or EINVAL, so perhaps more
|
||||
bytes are needed. Keep looping. */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complete codepoint found. */
|
||||
codepoint = u32[0];
|
||||
free (u32);
|
||||
goto success;
|
||||
}
|
||||
|
||||
if (bufcount == SCM_MBCHAR_BUF_SIZE)
|
||||
{
|
||||
/* We've read several bytes and didn't find a good
|
||||
codepoint. Give up. */
|
||||
goto failure;
|
||||
}
|
||||
|
||||
c = scm_get_byte_or_eof (port);
|
||||
|
||||
if (c == EOF)
|
||||
{
|
||||
/* EOF before a complete character was read. Push it all
|
||||
back and return EOF. */
|
||||
while (bufcount > 0)
|
||||
{
|
||||
/* FIXME: this will probably cause errors in the port column. */
|
||||
scm_unget_byte (buf[bufcount-1], port);
|
||||
bufcount --;
|
||||
}
|
||||
return EOF;
|
||||
}
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
/* It is always invalid to have EOL in the middle of a
|
||||
multibyte character. */
|
||||
scm_unget_byte ('\n', port);
|
||||
goto failure;
|
||||
}
|
||||
|
||||
buf[bufcount++] = c;
|
||||
}
|
||||
|
||||
success:
|
||||
switch (codepoint)
|
||||
{
|
||||
case '\a':
|
||||
break;
|
||||
case '\b':
|
||||
SCM_DECCOL (port);
|
||||
break;
|
||||
case '\n':
|
||||
SCM_INCLINE (port);
|
||||
break;
|
||||
case '\r':
|
||||
SCM_ZEROCOL (port);
|
||||
break;
|
||||
case '\t':
|
||||
SCM_TABCOL (port);
|
||||
break;
|
||||
default:
|
||||
SCM_INCCOL (port);
|
||||
break;
|
||||
}
|
||||
|
||||
return codepoint;
|
||||
|
||||
failure:
|
||||
{
|
||||
char *err_buf;
|
||||
SCM err_str = scm_i_make_string (bufcount, &err_buf);
|
||||
memcpy (err_buf, buf, bufcount);
|
||||
|
||||
if (errno == EILSEQ)
|
||||
scm_misc_error (NULL, "input encoding error for ~s: ~s",
|
||||
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
|
||||
err_str));
|
||||
else
|
||||
scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
|
||||
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
|
||||
err_str));
|
||||
}
|
||||
|
||||
/* Never gets here. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* this should only be called when the read buffer is empty. it
|
||||
tries to refill the read buffer. it returns the first char from
|
||||
the port, which is either EOF or *(pt->read_pos). */
|
||||
|
@ -1027,7 +1180,7 @@ scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
|
|||
stringbuf_write_mutex. So, one shouldn't use scm_lfwrite_substr
|
||||
if the stringbuf write mutex may still be held elsewhere. */
|
||||
buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
|
||||
NULL, SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
pt->encoding, pt->ilseq_handler);
|
||||
ptob->write (port, buf, len);
|
||||
free (buf);
|
||||
|
||||
|
@ -1056,7 +1209,7 @@ scm_lfwrite_str (SCM str, SCM port)
|
|||
scm_end_input (port);
|
||||
|
||||
buf = scm_to_stringn (str, &len,
|
||||
NULL, SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
pt->encoding, pt->ilseq_handler);
|
||||
ptob->write (port, buf, len);
|
||||
free (buf);
|
||||
|
||||
|
@ -1257,8 +1410,8 @@ scm_end_input (SCM port)
|
|||
|
||||
|
||||
void
|
||||
scm_ungetc (int c, SCM port)
|
||||
#define FUNC_NAME "scm_ungetc"
|
||||
scm_unget_byte (int c, SCM port)
|
||||
#define FUNC_NAME "scm_unget_byte"
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
||||
|
@ -1317,6 +1470,25 @@ scm_ungetc (int c, SCM port)
|
|||
|
||||
if (pt->rw_random)
|
||||
pt->rw_active = SCM_PORT_READ;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
void
|
||||
scm_ungetc (scm_t_wchar c, SCM port)
|
||||
#define FUNC_NAME "scm_ungetc"
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
scm_t_wchar *wbuf;
|
||||
SCM str = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
int i;
|
||||
|
||||
wbuf[0] = c;
|
||||
buf = scm_to_stringn (str, &len, pt->encoding, pt->ilseq_handler);
|
||||
|
||||
for (i = len - 1; i >= 0; i--)
|
||||
scm_unget_byte (buf[i], port);
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
|
@ -1363,7 +1535,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
|
|||
"to @code{read-char} would have hung.")
|
||||
#define FUNC_NAME s_scm_peek_char
|
||||
{
|
||||
int c, column;
|
||||
scm_t_wchar c, column;
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
else
|
||||
|
@ -1409,13 +1581,17 @@ SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
|
|||
"@var{port} is not supplied, the current-input-port is used.")
|
||||
#define FUNC_NAME s_scm_unread_string
|
||||
{
|
||||
int n;
|
||||
SCM_VALIDATE_STRING (1, str);
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
else
|
||||
SCM_VALIDATE_OPINPORT (2, port);
|
||||
|
||||
scm_ungets (scm_i_string_chars (str), scm_i_string_length (str), port);
|
||||
n = scm_i_string_length (str);
|
||||
|
||||
while (n--)
|
||||
scm_ungetc (scm_i_string_ref (str, n), port);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
@ -1670,6 +1846,328 @@ SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
/* The default port encoding for this locale. New ports will have this
|
||||
encoding. If it is a string, that is the encoding. If it #f, it
|
||||
is in the native (Latin-1) encoding. */
|
||||
SCM_GLOBAL_VARIABLE (scm_port_encoding_var, "%port-encoding");
|
||||
static int scm_port_encoding_init = 0;
|
||||
|
||||
/* Return a C string representation of the current encoding. */
|
||||
const char *
|
||||
scm_i_get_port_encoding (SCM port)
|
||||
{
|
||||
SCM encoding;
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
if (!scm_port_encoding_init)
|
||||
return NULL;
|
||||
else if (!scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
|
||||
return NULL;
|
||||
else
|
||||
{
|
||||
encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_port_encoding_var));
|
||||
if (!scm_is_string (encoding))
|
||||
return NULL;
|
||||
else
|
||||
return scm_i_string_chars (encoding);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_port *pt;
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
if (pt->encoding)
|
||||
return pt->encoding;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns ENC is if is a recognized encoding. If it isn't, it tries
|
||||
to find an alias of ENC that is valid. Otherwise, it returns
|
||||
NULL. */
|
||||
static const char *
|
||||
find_valid_encoding (const char *enc)
|
||||
{
|
||||
int isvalid = 0;
|
||||
const char str[] = " ";
|
||||
scm_t_uint32 *u32;
|
||||
size_t u32len;
|
||||
|
||||
u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
|
||||
NULL, NULL, &u32len);
|
||||
isvalid = (u32 != NULL);
|
||||
free (u32);
|
||||
|
||||
if (isvalid)
|
||||
return enc;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
scm_i_set_port_encoding_x (SCM port, const char *enc)
|
||||
{
|
||||
const char *valid_enc;
|
||||
scm_t_port *pt;
|
||||
|
||||
/* Null is shorthand for the native, Latin-1 encoding. */
|
||||
if (enc == NULL)
|
||||
valid_enc = NULL;
|
||||
else
|
||||
{
|
||||
valid_enc = find_valid_encoding (enc);
|
||||
if (valid_enc == NULL)
|
||||
{
|
||||
SCM err;
|
||||
err = scm_from_locale_string (enc);
|
||||
scm_misc_error (NULL, "invalid or unknown character encoding ~s",
|
||||
scm_list_1 (err));
|
||||
}
|
||||
}
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
/* Set the default encoding for future ports. */
|
||||
if (!scm_port_encoding_init
|
||||
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
|
||||
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
|
||||
SCM_EOL);
|
||||
|
||||
if (valid_enc == NULL
|
||||
|| !strcmp (valid_enc, "ASCII")
|
||||
|| !strcmp (valid_enc, "ANSI_X3.4-1968")
|
||||
|| !strcmp (valid_enc, "ISO-8859-1"))
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
|
||||
else
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var),
|
||||
scm_from_locale_string (valid_enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set the character encoding for this port. */
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
if (pt->encoding)
|
||||
free (pt->encoding);
|
||||
if (valid_enc == NULL)
|
||||
pt->encoding = NULL;
|
||||
else
|
||||
pt->encoding = strdup (valid_enc);
|
||||
}
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
|
||||
(SCM port),
|
||||
"Returns, as a string, the character encoding that @var{port}\n"
|
||||
"uses to interpret its input and output.\n")
|
||||
#define FUNC_NAME s_scm_port_encoding
|
||||
{
|
||||
scm_t_port *pt;
|
||||
const char *enc;
|
||||
|
||||
SCM_VALIDATE_PORT (1, port);
|
||||
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
enc = scm_i_get_port_encoding (port);
|
||||
if (enc)
|
||||
return scm_from_locale_string (pt->encoding);
|
||||
else
|
||||
return scm_from_locale_string ("NONE");
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
|
||||
(SCM port, SCM enc),
|
||||
"Sets the character encoding that will be used to interpret all\n"
|
||||
"port I/O. New ports are created with the encoding\n"
|
||||
"appropriate for the current locale if @code{setlocale} has \n"
|
||||
"been called or ISO-8859-1 otherwise\n"
|
||||
"and this procedure can be used to modify that encoding.\n")
|
||||
|
||||
#define FUNC_NAME s_scm_set_port_encoding_x
|
||||
{
|
||||
char *enc_str;
|
||||
const char *valid_enc_str;
|
||||
|
||||
SCM_VALIDATE_PORT (1, port);
|
||||
SCM_VALIDATE_STRING (2, enc);
|
||||
|
||||
enc_str = scm_to_locale_string (enc);
|
||||
valid_enc_str = find_valid_encoding (enc_str);
|
||||
if (valid_enc_str == NULL)
|
||||
{
|
||||
free (enc_str);
|
||||
scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
|
||||
scm_list_1 (enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_i_set_port_encoding_x (port, valid_enc_str);
|
||||
free (enc_str);
|
||||
}
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
/* This determines how conversions handle unconvertible characters. */
|
||||
SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
|
||||
static int scm_conversion_strategy_init = 0;
|
||||
|
||||
scm_t_string_failed_conversion_handler
|
||||
scm_i_get_conversion_strategy (SCM port)
|
||||
{
|
||||
SCM encoding;
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
if (!scm_conversion_strategy_init
|
||||
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
|
||||
return SCM_FAILED_CONVERSION_QUESTION_MARK;
|
||||
else
|
||||
{
|
||||
encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
|
||||
if (scm_is_false (encoding))
|
||||
return SCM_FAILED_CONVERSION_QUESTION_MARK;
|
||||
else
|
||||
return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_port *pt;
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
return pt->ilseq_handler;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
scm_i_set_conversion_strategy_x (SCM port,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
SCM strategy;
|
||||
scm_t_port *pt;
|
||||
|
||||
strategy = scm_from_int ((int) handler);
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
/* Set the default encoding for future ports. */
|
||||
if (!scm_conversion_strategy
|
||||
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
|
||||
scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
|
||||
SCM_EOL);
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set the character encoding for this port. */
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
pt->ilseq_handler = handler;
|
||||
}
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
|
||||
1, 0, 0, (SCM port),
|
||||
"Returns the behavior of the port when handling a character that\n"
|
||||
"is not representable in the port's current encoding.\n"
|
||||
"It returns the symbol @code{error} if unrepresentable characters\n"
|
||||
"should cause exceptions, @code{substitute} if the port should\n"
|
||||
"try to replace unrepresentable characters with question marks or\n"
|
||||
"approximate characters, or @code{escape} if unrepresentable\n"
|
||||
"characters should be converted to string escapes.\n"
|
||||
"\n"
|
||||
"If @var{port} is @code{#f}, then the current default behavior\n"
|
||||
"will be returned. New ports will have this default behavior\n"
|
||||
"when they are created.\n")
|
||||
#define FUNC_NAME s_scm_port_conversion_strategy
|
||||
{
|
||||
scm_t_string_failed_conversion_handler h;
|
||||
|
||||
SCM_VALIDATE_OPPORT (1, port);
|
||||
|
||||
if (!scm_is_false (port))
|
||||
{
|
||||
SCM_VALIDATE_OPPORT (1, port);
|
||||
}
|
||||
|
||||
h = scm_i_get_conversion_strategy (port);
|
||||
if (h == SCM_FAILED_CONVERSION_ERROR)
|
||||
return scm_from_locale_symbol ("error");
|
||||
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
|
||||
return scm_from_locale_symbol ("substitute");
|
||||
else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
return scm_from_locale_symbol ("escape");
|
||||
else
|
||||
abort ();
|
||||
|
||||
/* Never gets here. */
|
||||
return SCM_UNDEFINED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
|
||||
2, 0, 0,
|
||||
(SCM port, SCM sym),
|
||||
"Sets the behavior of the interpreter when outputting a character\n"
|
||||
"that is not representable in the port's current encoding.\n"
|
||||
"@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
|
||||
"@code{'escape}. If it is @code{'error}, an error will be thrown\n"
|
||||
"when an unconvertible character is encountered. If it is\n"
|
||||
"@code{'substitute}, then unconvertible characters will \n"
|
||||
"be replaced with approximate characters, or with question marks\n"
|
||||
"if no approximately correct character is available.\n"
|
||||
"If it is @code{'escape},\n"
|
||||
"it will appear as a hex escape when output.\n"
|
||||
"\n"
|
||||
"If @var{port} is an open port, the conversion error behavior\n"
|
||||
"is set for that port. If it is @code{#f}, it is set as the\n"
|
||||
"default behavior for any future ports that get created in\n"
|
||||
"this thread.\n")
|
||||
#define FUNC_NAME s_scm_set_port_conversion_strategy_x
|
||||
{
|
||||
SCM err;
|
||||
SCM qm;
|
||||
SCM esc;
|
||||
|
||||
if (!scm_is_false (port))
|
||||
{
|
||||
SCM_VALIDATE_OPPORT (1, port);
|
||||
}
|
||||
|
||||
err = scm_from_locale_symbol ("error");
|
||||
if (scm_is_true (scm_eqv_p (sym, err)))
|
||||
{
|
||||
scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
qm = scm_from_locale_symbol ("substitute");
|
||||
if (scm_is_true (scm_eqv_p (sym, qm)))
|
||||
{
|
||||
scm_i_set_conversion_strategy_x (port,
|
||||
SCM_FAILED_CONVERSION_QUESTION_MARK);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
esc = scm_from_locale_symbol ("escape");
|
||||
if (scm_is_true (scm_eqv_p (sym, esc)))
|
||||
{
|
||||
scm_i_set_conversion_strategy_x (port,
|
||||
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
|
||||
void
|
||||
scm_print_port_mode (SCM exp, SCM port)
|
||||
{
|
||||
|
@ -1780,8 +2278,17 @@ scm_init_ports ()
|
|||
cur_loadport_fluid = scm_permanent_object (scm_make_fluid ());
|
||||
|
||||
scm_i_port_weak_hash = scm_permanent_object (scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)));
|
||||
|
||||
#include "libguile/ports.x"
|
||||
|
||||
SCM_VARIABLE_SET (scm_port_encoding_var, scm_make_fluid ());
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
|
||||
scm_port_encoding_init = 1;
|
||||
|
||||
SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
|
||||
scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));
|
||||
scm_conversion_strategy_init = 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "libguile/print.h"
|
||||
#include "libguile/struct.h"
|
||||
#include "libguile/threads.h"
|
||||
|
||||
#include "libguile/strings.h"
|
||||
|
||||
|
||||
|
||||
|
@ -56,6 +56,10 @@ typedef struct
|
|||
long line_number; /* debugging support. */
|
||||
int column_number; /* debugging support. */
|
||||
|
||||
/* Character encoding support */
|
||||
char *encoding;
|
||||
scm_t_string_failed_conversion_handler ilseq_handler;
|
||||
|
||||
/* port buffers. the buffer(s) are set up for all ports.
|
||||
in the case of string ports, the buffer is the string itself.
|
||||
in the case of unbuffered file ports, the buffer is a
|
||||
|
@ -266,6 +270,7 @@ SCM_API SCM scm_eof_object_p (SCM x);
|
|||
SCM_API SCM scm_force_output (SCM port);
|
||||
SCM_API SCM scm_flush_all_ports (void);
|
||||
SCM_API SCM scm_read_char (SCM port);
|
||||
SCM_API scm_t_wchar scm_getc (SCM port);
|
||||
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
|
||||
SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
|
||||
SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
|
||||
|
@ -275,7 +280,8 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
|
|||
SCM_API void scm_flush (SCM port);
|
||||
SCM_API void scm_end_input (SCM port);
|
||||
SCM_API int scm_fill_input (SCM port);
|
||||
SCM_API void scm_ungetc (int c, SCM port);
|
||||
SCM_INTERNAL void scm_unget_byte (int c, SCM port);
|
||||
SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
|
||||
SCM_API void scm_ungets (const char *s, int n, SCM port);
|
||||
SCM_API SCM scm_peek_char (SCM port);
|
||||
SCM_API SCM scm_unread_char (SCM cobj, SCM port);
|
||||
|
@ -288,6 +294,15 @@ SCM_API SCM scm_port_column (SCM port);
|
|||
SCM_API SCM scm_set_port_column_x (SCM port, SCM line);
|
||||
SCM_API SCM scm_port_filename (SCM port);
|
||||
SCM_API SCM scm_set_port_filename_x (SCM port, SCM filename);
|
||||
SCM_INTERNAL const char *scm_i_get_port_encoding (SCM port);
|
||||
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
|
||||
SCM_API SCM scm_port_encoding (SCM port);
|
||||
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
|
||||
SCM_INTERNAL scm_t_string_failed_conversion_handler scm_i_get_conversion_strategy (SCM port);
|
||||
SCM_INTERNAL void scm_i_set_conversion_strategy_x (SCM port,
|
||||
scm_t_string_failed_conversion_handler h);
|
||||
SCM_API SCM scm_port_conversion_strategy (SCM port);
|
||||
SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
|
||||
SCM_API int scm_port_print (SCM exp, SCM port, scm_print_state *);
|
||||
SCM_API void scm_print_port_mode (SCM exp, SCM port);
|
||||
SCM_API void scm_ports_prehistory (void);
|
||||
|
@ -295,7 +310,6 @@ SCM_API SCM scm_void_port (char * mode_str);
|
|||
SCM_API SCM scm_sys_make_void_port (SCM mode);
|
||||
SCM_INTERNAL void scm_init_ports (void);
|
||||
|
||||
|
||||
#if SCM_ENABLE_DEPRECATED==1
|
||||
SCM_API scm_t_port * scm_add_to_port_table (SCM port);
|
||||
#endif
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <uniconv.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/dynwind.h"
|
||||
|
@ -1528,12 +1529,17 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
|
|||
"Otherwise the specified locale category is set to the string\n"
|
||||
"@var{locale} and the new value is returned as a\n"
|
||||
"system-dependent string. If @var{locale} is an empty string,\n"
|
||||
"the locale will be set using environment variables.")
|
||||
"the locale will be set using environment variables.\n"
|
||||
"\n"
|
||||
"When the locale is changed, the character encoding of the new\n"
|
||||
"locale (UTF-8, ISO-8859-1, etc.) is used for the current\n"
|
||||
"input, output, and error ports\n")
|
||||
#define FUNC_NAME s_scm_setlocale
|
||||
{
|
||||
int c_category;
|
||||
char *clocale;
|
||||
char *rv;
|
||||
const char *enc;
|
||||
|
||||
scm_dynwind_begin (0);
|
||||
|
||||
|
@ -1562,15 +1568,47 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
|
|||
SCM_SYSERROR;
|
||||
}
|
||||
|
||||
/* Recompute the standard SRFI-14 character sets in a locale-dependent
|
||||
(actually charset-dependent) way. */
|
||||
scm_srfi_14_compute_char_sets ();
|
||||
enc = locale_charset ();
|
||||
/* Set the default encoding for new ports. */
|
||||
scm_i_set_port_encoding_x (SCM_BOOL_F, enc);
|
||||
/* Set the encoding for the stdio ports. */
|
||||
scm_i_set_port_encoding_x (scm_current_input_port (), enc);
|
||||
scm_i_set_port_encoding_x (scm_current_output_port (), enc);
|
||||
scm_i_set_port_encoding_x (scm_current_error_port (), enc);
|
||||
|
||||
scm_dynwind_end ();
|
||||
return scm_from_locale_string (rv);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
#endif /* HAVE_SETLOCALE */
|
||||
SCM_DEFINE (scm_setbinary, "setbinary", 0, 0, 0,
|
||||
(void),
|
||||
"Sets the encoding for the current input, output, and error\n"
|
||||
"ports to ISO-8859-1. That character encoding allows\n"
|
||||
"ports to operate on binary data.\n"
|
||||
"\n"
|
||||
"It also sets the default encoding for newly created ports\n"
|
||||
"to ISO-8859-1.\n"
|
||||
"\n"
|
||||
"The previous default encoding for new ports is returned\n")
|
||||
#define FUNC_NAME s_scm_setbinary
|
||||
{
|
||||
const char *enc = scm_i_get_port_encoding (SCM_BOOL_F);
|
||||
|
||||
/* Set the default encoding for new ports. */
|
||||
scm_i_set_port_encoding_x (SCM_BOOL_F, NULL);
|
||||
/* Set the encoding for the stdio ports. */
|
||||
scm_i_set_port_encoding_x (scm_current_input_port (), NULL);
|
||||
scm_i_set_port_encoding_x (scm_current_output_port (), NULL);
|
||||
scm_i_set_port_encoding_x (scm_current_error_port (), NULL);
|
||||
|
||||
if (enc)
|
||||
return scm_from_locale_string (enc);
|
||||
|
||||
return scm_from_locale_string ("ISO-8859-1");
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
#ifdef HAVE_MKNOD
|
||||
SCM_DEFINE (scm_mknod, "mknod", 4, 0, 0,
|
||||
|
|
|
@ -74,6 +74,7 @@ SCM_API SCM scm_access (SCM path, SCM how);
|
|||
SCM_API SCM scm_getpid (void);
|
||||
SCM_API SCM scm_putenv (SCM str);
|
||||
SCM_API SCM scm_setlocale (SCM category, SCM locale);
|
||||
SCM_API SCM scm_setbinary (void);
|
||||
SCM_API SCM scm_mknod (SCM path, SCM type, SCM perms, SCM dev);
|
||||
SCM_API SCM scm_nice (SCM incr);
|
||||
SCM_API SCM scm_sync (void);
|
||||
|
|
103
libguile/print.c
103
libguile/print.c
|
@ -463,20 +463,45 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
| UC_CATEGORY_MASK_S))
|
||||
/* Print the character if is graphic character. */
|
||||
{
|
||||
if (i<256)
|
||||
/* Character is graphic. Print it. */
|
||||
scm_putc (i, port);
|
||||
scm_t_wchar *wbuf;
|
||||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
const char *enc;
|
||||
|
||||
enc = scm_i_get_port_encoding (port);
|
||||
wbuf[0] = i;
|
||||
if (enc == NULL && i <= 0xFF)
|
||||
{
|
||||
/* Character is graphic and Latin-1. Print it */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
}
|
||||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
{
|
||||
buf = u32_conv_to_encoding (enc,
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1,
|
||||
NULL,
|
||||
NULL, &len);
|
||||
if (buf != NULL)
|
||||
{
|
||||
/* Character is graphic. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
}
|
||||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
}
|
||||
}
|
||||
else
|
||||
/* Character is a non-graphical character. */
|
||||
scm_intprint (i, 8, port);
|
||||
}
|
||||
else
|
||||
scm_putc (i, port);
|
||||
scm_i_charprint (i, port);
|
||||
}
|
||||
else if (SCM_IFLAGP (exp)
|
||||
&& ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
|
||||
|
@ -608,21 +633,32 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
|
||||
wbuf[0] = ch;
|
||||
|
||||
buf = u32_conv_to_encoding ("ISO-8859-1",
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1, NULL, NULL, &len);
|
||||
if (buf != NULL)
|
||||
|
||||
if (scm_i_get_port_encoding (port))
|
||||
{
|
||||
/* Character is graphic and representable in
|
||||
this encoding. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
printed = 1;
|
||||
wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
wbuf[0] = ch;
|
||||
buf = u32_conv_to_encoding (scm_i_get_port_encoding (port),
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1 ,
|
||||
NULL,
|
||||
NULL, &len);
|
||||
if (buf != NULL)
|
||||
{
|
||||
/* Character is graphic and representable in
|
||||
this encoding. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
printed = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (ch <= 0xFF)
|
||||
{
|
||||
scm_putc (ch, port);
|
||||
printed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!printed)
|
||||
|
@ -835,7 +871,7 @@ scm_prin1 (SCM exp, SCM port, int writingp)
|
|||
/* Print a character.
|
||||
*/
|
||||
void
|
||||
scm_i_charprint (scm_t_uint32 ch, SCM port)
|
||||
scm_i_charprint (scm_t_wchar ch, SCM port)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
|
@ -1057,9 +1093,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
|
|||
SCM port, answer = SCM_UNSPECIFIED;
|
||||
int fReturnString = 0;
|
||||
int writingp;
|
||||
const char *start;
|
||||
const char *end;
|
||||
const char *p;
|
||||
size_t start, p, end;
|
||||
|
||||
if (scm_is_eq (destination, SCM_BOOL_T))
|
||||
{
|
||||
|
@ -1082,15 +1116,16 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
|
|||
SCM_VALIDATE_STRING (2, message);
|
||||
SCM_VALIDATE_REST_ARGUMENT (args);
|
||||
|
||||
start = scm_i_string_chars (message);
|
||||
end = start + scm_i_string_length (message);
|
||||
p = 0;
|
||||
start = 0;
|
||||
end = scm_i_string_length (message);
|
||||
for (p = start; p != end; ++p)
|
||||
if (*p == '~')
|
||||
if (scm_i_string_ref (message, p) == '~')
|
||||
{
|
||||
if (++p == end)
|
||||
break;
|
||||
|
||||
switch (*p)
|
||||
switch (scm_i_string_ref (message, p))
|
||||
{
|
||||
case 'A': case 'a':
|
||||
writingp = 0;
|
||||
|
@ -1099,33 +1134,33 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
|
|||
writingp = 1;
|
||||
break;
|
||||
case '~':
|
||||
scm_lfwrite (start, p - start, port);
|
||||
scm_lfwrite_substr (message, start, p, port);
|
||||
start = p + 1;
|
||||
continue;
|
||||
case '%':
|
||||
scm_lfwrite (start, p - start - 1, port);
|
||||
scm_lfwrite_substr (message, start, p - 1, port);
|
||||
scm_newline (port);
|
||||
start = p + 1;
|
||||
continue;
|
||||
default:
|
||||
SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use (ice-9 format) instead",
|
||||
scm_list_1 (SCM_MAKE_CHAR (*p)));
|
||||
scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p))));
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!scm_is_pair (args))
|
||||
SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A",
|
||||
scm_list_1 (SCM_MAKE_CHAR (*p)));
|
||||
scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p))));
|
||||
|
||||
scm_lfwrite (start, p - start - 1, port);
|
||||
scm_lfwrite_substr (message, start, p - 1, port);
|
||||
/* we pass destination here */
|
||||
scm_prin1 (SCM_CAR (args), destination, writingp);
|
||||
args = SCM_CDR (args);
|
||||
start = p + 1;
|
||||
}
|
||||
|
||||
scm_lfwrite (start, p - start, port);
|
||||
scm_lfwrite_substr (message, start, p, port);
|
||||
if (!scm_is_eq (args, SCM_EOL))
|
||||
SCM_MISC_ERROR ("FORMAT: ~A superfluous arguments",
|
||||
scm_list_1 (scm_length (args)));
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "libguile/__scm.h"
|
||||
|
||||
#include "libguile/chars.h"
|
||||
#include "libguile/options.h"
|
||||
|
||||
|
||||
|
@ -77,7 +78,7 @@ SCM_API SCM scm_print_options (SCM setting);
|
|||
SCM_API SCM scm_make_print_state (void);
|
||||
SCM_API void scm_free_print_state (SCM print_state);
|
||||
SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state);
|
||||
SCM_INTERNAL void scm_i_charprint (scm_t_uint32 c, SCM port);
|
||||
SCM_INTERNAL void scm_i_charprint (scm_t_wchar c, SCM port);
|
||||
SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
|
||||
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
|
||||
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);
|
||||
|
|
|
@ -59,12 +59,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
|
|||
size_t j;
|
||||
size_t cstart;
|
||||
size_t cend;
|
||||
int c;
|
||||
const char *cdelims;
|
||||
scm_t_wchar c;
|
||||
size_t num_delims;
|
||||
|
||||
SCM_VALIDATE_STRING (1, delims);
|
||||
cdelims = scm_i_string_chars (delims);
|
||||
num_delims = scm_i_string_length (delims);
|
||||
|
||||
SCM_VALIDATE_STRING (2, str);
|
||||
|
@ -83,7 +81,7 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
|
|||
c = scm_getc (port);
|
||||
for (k = 0; k < num_delims; k++)
|
||||
{
|
||||
if (cdelims[k] == c)
|
||||
if (scm_i_string_ref (delims, k) == c)
|
||||
{
|
||||
if (scm_is_false (gobble))
|
||||
scm_ungetc (c, port);
|
||||
|
|
385
libguile/read.c
385
libguile/read.c
|
@ -27,6 +27,8 @@
|
|||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <unicase.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/bytevectors.h"
|
||||
|
@ -177,11 +179,6 @@ static SCM *scm_read_hash_procedures;
|
|||
(((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
|
||||
|| ((_chr) == 'd') || ((_chr) == 'l'))
|
||||
|
||||
/* An inlinable version of `scm_c_downcase ()'. */
|
||||
#define CHAR_DOWNCASE(_chr) \
|
||||
(((_chr) <= UCHAR_MAX) ? tolower ((int) (_chr)) : (_chr))
|
||||
|
||||
|
||||
/* Read an SCSH block comment. */
|
||||
static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
|
||||
static SCM scm_read_commented_expression (int chr, SCM port);
|
||||
|
@ -189,41 +186,69 @@ static SCM scm_read_commented_expression (int chr, SCM port);
|
|||
/* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
|
||||
zero if the whole token fits in BUF, non-zero otherwise. */
|
||||
static inline int
|
||||
read_token (SCM port, char *buf, size_t buf_size, size_t *read)
|
||||
read_token (SCM port, SCM buf, size_t *read)
|
||||
{
|
||||
scm_t_wchar chr;
|
||||
*read = 0;
|
||||
|
||||
while (*read < buf_size)
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
while (*read < scm_i_string_length (buf))
|
||||
{
|
||||
int chr;
|
||||
|
||||
chr = scm_getc (port);
|
||||
chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
|
||||
|
||||
if (chr == EOF)
|
||||
return 0;
|
||||
else if (CHAR_IS_DELIMITER (chr))
|
||||
{
|
||||
scm_i_string_stop_writing ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);
|
||||
|
||||
if (CHAR_IS_DELIMITER (chr))
|
||||
{
|
||||
scm_i_string_stop_writing ();
|
||||
scm_ungetc (chr, port);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf = (char) chr;
|
||||
buf++, (*read)++;
|
||||
}
|
||||
|
||||
scm_i_string_set_x (buf, *read, chr);
|
||||
(*read)++;
|
||||
}
|
||||
scm_i_string_stop_writing ();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static SCM
|
||||
read_complete_token (SCM port, size_t *read)
|
||||
{
|
||||
SCM buffer, str = SCM_EOL;
|
||||
size_t len;
|
||||
int overflow;
|
||||
|
||||
buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
|
||||
overflow = read_token (port, buffer, read);
|
||||
if (!overflow)
|
||||
return scm_i_substring (buffer, 0, *read);
|
||||
|
||||
str = scm_string_copy (buffer);
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, &len);
|
||||
str = scm_string_append (scm_list_2 (str, buffer));
|
||||
*read += len;
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
return scm_i_substring (str, 0, *read);
|
||||
}
|
||||
|
||||
/* Skip whitespace from PORT and return the first non-whitespace character
|
||||
read. Raise an error on end-of-file. */
|
||||
static int
|
||||
flush_ws (SCM port, const char *eoferr)
|
||||
{
|
||||
register int c;
|
||||
register scm_t_wchar c;
|
||||
while (1)
|
||||
switch (c = scm_getc (port))
|
||||
{
|
||||
|
@ -292,7 +317,7 @@ static SCM recsexpr (SCM obj, long line, int column, SCM filename);
|
|||
|
||||
|
||||
static SCM
|
||||
scm_read_sexp (int chr, SCM port)
|
||||
scm_read_sexp (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_i_lreadparen"
|
||||
{
|
||||
register int c;
|
||||
|
@ -553,107 +578,52 @@ scm_read_string (int chr, SCM port)
|
|||
|
||||
|
||||
static SCM
|
||||
scm_read_number (int chr, SCM port)
|
||||
scm_read_number (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
SCM result, str = SCM_EOL;
|
||||
char buffer[READER_BUFFER_SIZE];
|
||||
SCM result;
|
||||
SCM buffer;
|
||||
size_t read;
|
||||
int overflow = 0;
|
||||
|
||||
scm_ungetc (chr, port);
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, sizeof (buffer), &read);
|
||||
|
||||
if ((overflow) || (scm_is_pair (str)))
|
||||
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
if (scm_is_pair (str))
|
||||
{
|
||||
/* The slow path. */
|
||||
|
||||
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
|
||||
result = scm_string_to_number (str, SCM_UNDEFINED);
|
||||
if (!scm_is_true (result))
|
||||
/* Return a symbol instead of a number. */
|
||||
result = scm_string_to_symbol (str);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = scm_c_locale_stringn_to_number (buffer, read, 10);
|
||||
if (!scm_is_true (result))
|
||||
/* Return a symbol instead of a number. */
|
||||
result = scm_from_locale_symboln (buffer, read);
|
||||
}
|
||||
buffer = read_complete_token (port, &read);
|
||||
result = scm_string_to_number (buffer, SCM_UNDEFINED);
|
||||
if (!scm_is_true (result))
|
||||
/* Return a symbol instead of a number. */
|
||||
result = scm_string_to_symbol (buffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_mixed_case_symbol (int chr, SCM port)
|
||||
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
SCM result, str = SCM_EOL;
|
||||
int overflow = 0, ends_with_colon = 0;
|
||||
char buffer[READER_BUFFER_SIZE];
|
||||
SCM result;
|
||||
int ends_with_colon = 0;
|
||||
SCM buffer;
|
||||
size_t read = 0;
|
||||
int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
|
||||
|
||||
scm_ungetc (chr, port);
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, sizeof (buffer), &read);
|
||||
buffer = read_complete_token (port, &read);
|
||||
if (read > 0)
|
||||
ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';
|
||||
|
||||
if (read > 0)
|
||||
ends_with_colon = (buffer[read - 1] == ':');
|
||||
|
||||
if ((overflow) || (scm_is_pair (str)))
|
||||
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
if (scm_is_pair (str))
|
||||
{
|
||||
size_t len;
|
||||
|
||||
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
|
||||
len = scm_c_string_length (str);
|
||||
|
||||
/* Per SRFI-88, `:' alone is an identifier, not a keyword. */
|
||||
if (postfix && ends_with_colon && (len > 1))
|
||||
{
|
||||
/* Strip off colon. */
|
||||
str = scm_c_substring (str, 0, len-1);
|
||||
result = scm_string_to_symbol (str);
|
||||
result = scm_symbol_to_keyword (result);
|
||||
}
|
||||
else
|
||||
result = scm_string_to_symbol (str);
|
||||
}
|
||||
if (postfix && ends_with_colon && (read > 1))
|
||||
result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
|
||||
else
|
||||
{
|
||||
/* For symbols smaller than `sizeof (buffer)', we don't need to recur
|
||||
to Scheme strings. Therefore, we only create one Scheme object (a
|
||||
symbol) per symbol read. */
|
||||
if (postfix && ends_with_colon && (read > 1))
|
||||
result = scm_from_locale_keywordn (buffer, read - 1);
|
||||
else
|
||||
result = scm_from_locale_symboln (buffer, read);
|
||||
}
|
||||
result = scm_string_to_symbol (buffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_number_and_radix (int chr, SCM port)
|
||||
scm_read_number_and_radix (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM result, str = SCM_EOL;
|
||||
SCM result;
|
||||
size_t read;
|
||||
char buffer[READER_BUFFER_SIZE];
|
||||
SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
|
||||
unsigned int radix;
|
||||
int overflow = 0;
|
||||
|
||||
switch (chr)
|
||||
{
|
||||
|
@ -683,22 +653,8 @@ scm_read_number_and_radix (int chr, SCM port)
|
|||
radix = 10;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, sizeof (buffer), &read);
|
||||
|
||||
if ((overflow) || (scm_is_pair (str)))
|
||||
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
if (scm_is_pair (str))
|
||||
{
|
||||
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
|
||||
result = scm_string_to_number (str, scm_from_uint (radix));
|
||||
}
|
||||
else
|
||||
result = scm_c_locale_stringn_to_number (buffer, read, radix);
|
||||
buffer = read_complete_token (port, &read);
|
||||
result = scm_string_to_number (buffer, scm_from_uint (radix));
|
||||
|
||||
if (scm_is_true (result))
|
||||
return result;
|
||||
|
@ -728,7 +684,7 @@ scm_read_quote (int chr, SCM port)
|
|||
|
||||
case ',':
|
||||
{
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
|
||||
c = scm_getc (port);
|
||||
if ('@' == c)
|
||||
|
@ -827,7 +783,10 @@ scm_read_semicolon_comment (int chr, SCM port)
|
|||
{
|
||||
int c;
|
||||
|
||||
for (c = scm_getc (port);
|
||||
/* We use the get_byte here because there is no need to get the
|
||||
locale correct with comment input. This presumes that newline
|
||||
always represents itself no matter what the encoding is. */
|
||||
for (c = scm_get_byte_or_eof (port);
|
||||
(c != EOF) && (c != '\n');
|
||||
c = scm_getc (port));
|
||||
|
||||
|
@ -855,14 +814,19 @@ scm_read_boolean (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_character (int chr, SCM port)
|
||||
scm_read_character (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM ch;
|
||||
char charname[READER_CHAR_NAME_MAX_SIZE];
|
||||
SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
|
||||
size_t charname_len;
|
||||
scm_t_wchar cp;
|
||||
int overflow;
|
||||
|
||||
if (read_token (port, charname, sizeof (charname), &charname_len))
|
||||
overflow = read_token (port, charname, &charname_len);
|
||||
charname = scm_c_substring (charname, 0, charname_len);
|
||||
|
||||
if (overflow)
|
||||
goto char_error;
|
||||
|
||||
if (charname_len == 0)
|
||||
|
@ -877,28 +841,33 @@ scm_read_character (int chr, SCM port)
|
|||
}
|
||||
|
||||
if (charname_len == 1)
|
||||
return SCM_MAKE_CHAR (charname[0]);
|
||||
return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
|
||||
|
||||
if (*charname >= '0' && *charname < '8')
|
||||
cp = scm_i_string_ref (charname, 0);
|
||||
if (cp >= '0' && cp < '8')
|
||||
{
|
||||
/* Dirk:FIXME:: This type of character syntax is not R5RS
|
||||
* compliant. Further, it should be verified that the constant
|
||||
* does only consist of octal digits. Finally, it should be
|
||||
* checked whether the resulting fixnum is in the range of
|
||||
* characters. */
|
||||
SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
|
||||
SCM p = scm_string_to_number (charname, scm_from_uint (8));
|
||||
if (SCM_I_INUMP (p))
|
||||
return SCM_MAKE_CHAR (SCM_I_INUM (p));
|
||||
}
|
||||
|
||||
ch = scm_i_charname_to_char (charname, charname_len);
|
||||
/* The names of characters should never have non-Latin1
|
||||
characters. */
|
||||
if (scm_i_is_narrow_string (charname)
|
||||
|| scm_i_try_narrow_string (charname))
|
||||
ch = scm_i_charname_to_char (scm_i_string_chars (charname),
|
||||
charname_len);
|
||||
if (scm_is_true (ch))
|
||||
return ch;
|
||||
|
||||
char_error:
|
||||
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
|
||||
scm_list_1 (scm_from_locale_stringn (charname,
|
||||
charname_len)));
|
||||
scm_list_1 (charname));
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
@ -940,7 +909,7 @@ scm_read_srfi4_vector (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_bytevector (int chr, SCM port)
|
||||
scm_read_bytevector (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
chr = scm_getc (port);
|
||||
if (chr != 'u')
|
||||
|
@ -964,7 +933,7 @@ scm_read_bytevector (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_guile_bit_vector (int chr, SCM port)
|
||||
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
|
||||
terribly inefficient but who cares? */
|
||||
|
@ -984,13 +953,17 @@ scm_read_guile_bit_vector (int chr, SCM port)
|
|||
}
|
||||
|
||||
static inline SCM
|
||||
scm_read_scsh_block_comment (int chr, SCM port)
|
||||
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
int bang_seen = 0;
|
||||
|
||||
/* We can use the get_byte here because there is no need to get the
|
||||
locale correct when reading comments. This presumes that
|
||||
hash and exclamation points always represent themselves no
|
||||
matter what the source encoding is.*/
|
||||
for (;;)
|
||||
{
|
||||
int c = scm_getc (port);
|
||||
int c = scm_get_byte_or_eof (port);
|
||||
|
||||
if (c == EOF)
|
||||
scm_i_input_error ("skip_block_comment", port,
|
||||
|
@ -1008,9 +981,9 @@ scm_read_scsh_block_comment (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_commented_expression (int chr, SCM port)
|
||||
scm_read_commented_expression (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
|
||||
c = flush_ws (port, (char *) NULL);
|
||||
if (EOF == c)
|
||||
|
@ -1022,19 +995,18 @@ scm_read_commented_expression (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_extended_symbol (int chr, SCM port)
|
||||
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
/* Guile's extended symbol read syntax looks like this:
|
||||
|
||||
#{This is all a symbol name}#
|
||||
|
||||
So here, CHR is expected to be `{'. */
|
||||
SCM result;
|
||||
int saw_brace = 0, finished = 0;
|
||||
size_t len = 0;
|
||||
char buf[1024];
|
||||
SCM buf = scm_i_make_string (1024, NULL);
|
||||
|
||||
result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
|
||||
while ((chr = scm_getc (port)) != EOF)
|
||||
{
|
||||
|
@ -1048,32 +1020,30 @@ scm_read_extended_symbol (int chr, SCM port)
|
|||
else
|
||||
{
|
||||
saw_brace = 0;
|
||||
buf[len++] = '}';
|
||||
buf[len++] = chr;
|
||||
scm_i_string_set_x (buf, len++, '}');
|
||||
scm_i_string_set_x (buf, len++, chr);
|
||||
}
|
||||
}
|
||||
else if (chr == '}')
|
||||
saw_brace = 1;
|
||||
else
|
||||
buf[len++] = chr;
|
||||
scm_i_string_set_x (buf, len++, chr);
|
||||
|
||||
if (len >= sizeof (buf) - 2)
|
||||
if (len >= scm_i_string_length (buf) - 2)
|
||||
{
|
||||
scm_string_append (scm_list_2 (result,
|
||||
scm_from_locale_stringn (buf, len)));
|
||||
scm_i_string_stop_writing ();
|
||||
SCM addy = scm_i_make_string (1024, NULL);
|
||||
buf = scm_string_append (scm_list_2 (buf, addy));
|
||||
len = 0;
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
}
|
||||
|
||||
if (finished)
|
||||
break;
|
||||
}
|
||||
scm_i_string_stop_writing ();
|
||||
|
||||
if (len)
|
||||
result = scm_string_append (scm_list_2
|
||||
(result,
|
||||
scm_from_locale_stringn (buf, len)));
|
||||
|
||||
return (scm_string_to_symbol (result));
|
||||
return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -1109,7 +1079,7 @@ scm_read_sharp_extension (int chr, SCM port)
|
|||
/* The reader for the sharp `#' character. It basically dispatches reads
|
||||
among the above token readers. */
|
||||
static SCM
|
||||
scm_read_sharp (int chr, SCM port)
|
||||
scm_read_sharp (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM result;
|
||||
|
@ -1161,7 +1131,7 @@ scm_read_sharp (int chr, SCM port)
|
|||
{
|
||||
/* When next char is '(', it really is an old-style
|
||||
uniform array. */
|
||||
int next_c = scm_getc (port);
|
||||
scm_t_wchar next_c = scm_getc (port);
|
||||
if (next_c != EOF)
|
||||
scm_ungetc (next_c, port);
|
||||
if (next_c == '(')
|
||||
|
@ -1209,7 +1179,7 @@ scm_read_expression (SCM port)
|
|||
{
|
||||
while (1)
|
||||
{
|
||||
register int chr;
|
||||
register scm_t_wchar chr;
|
||||
|
||||
chr = scm_getc (port);
|
||||
|
||||
|
@ -1420,6 +1390,127 @@ scm_get_hash_procedure (int c)
|
|||
}
|
||||
}
|
||||
|
||||
#define SCM_ENCODING_SEARCH_SIZE (500)
|
||||
|
||||
/* Search the first few hundred characters of a file for
|
||||
an emacs-like coding declaration. */
|
||||
char *
|
||||
scm_scan_for_encoding (SCM port)
|
||||
{
|
||||
char header[SCM_ENCODING_SEARCH_SIZE+1];
|
||||
size_t bytes_read;
|
||||
char *encoding = NULL;
|
||||
int utf8_bom = 0;
|
||||
char *pos;
|
||||
int i;
|
||||
int in_comment;
|
||||
|
||||
bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
|
||||
scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
|
||||
|
||||
if (bytes_read > 3
|
||||
&& header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
|
||||
utf8_bom = 1;
|
||||
|
||||
/* search past "coding[:=]" */
|
||||
pos = header;
|
||||
while (1)
|
||||
{
|
||||
if ((pos = strstr(pos, "coding")) == NULL)
|
||||
return NULL;
|
||||
|
||||
pos += strlen("coding");
|
||||
if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
|
||||
(*pos == ':' || *pos == '='))
|
||||
{
|
||||
pos ++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* skip spaces */
|
||||
while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
|
||||
(*pos == ' ' || *pos == '\t'))
|
||||
pos ++;
|
||||
|
||||
/* grab the next token */
|
||||
i = 0;
|
||||
while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
|
||||
&& (isalnum(pos[i]) || pos[i] == '_' || pos[i] == '-' || pos[i] == '.'))
|
||||
i++;
|
||||
|
||||
if (i == 0)
|
||||
return NULL;
|
||||
|
||||
encoding = scm_malloc (i+1);
|
||||
memcpy (encoding, pos, i);
|
||||
encoding[i] ='\0';
|
||||
for (i = 0; i < strlen(encoding); i++)
|
||||
encoding[i] = toupper(encoding[i]);
|
||||
|
||||
/* push backwards to make sure we were in a comment */
|
||||
in_comment = 0;
|
||||
while (pos - i - header > 0)
|
||||
{
|
||||
if (*(pos - i) == '\n')
|
||||
{
|
||||
/* This wasn't in a semicolon comment. Check for a
|
||||
hash-bang comment. */
|
||||
char *beg = strstr (header, "#!");
|
||||
char *end = strstr (header, "!#");
|
||||
if (beg < pos && pos < end)
|
||||
in_comment = 1;
|
||||
break;
|
||||
}
|
||||
if (*(pos - i) == ';')
|
||||
{
|
||||
in_comment = 1;
|
||||
break;
|
||||
}
|
||||
i ++;
|
||||
}
|
||||
if (!in_comment)
|
||||
{
|
||||
/* This wasn't in a comment */
|
||||
free (encoding);
|
||||
return NULL;
|
||||
}
|
||||
if (utf8_bom && strcmp(encoding, "UTF-8"))
|
||||
scm_misc_error (NULL,
|
||||
"the port input declares the encoding ~s but is encoded as UTF-8",
|
||||
scm_list_1 (scm_from_locale_string (encoding)));
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
|
||||
(SCM port),
|
||||
"Scans the port for an EMACS-like character coding declaration\n"
|
||||
"near the top of the contents of a port with random-acessible contents.\n"
|
||||
"The coding declaration is of the form\n"
|
||||
"@code{coding: XXXXX} and must appear in a scheme comment.\n"
|
||||
"\n"
|
||||
"Returns a string containing the character encoding of the file\n"
|
||||
"if a declaration was found, or @code{#f} otherwise.\n")
|
||||
#define FUNC_NAME s_scm_file_encoding
|
||||
{
|
||||
char *enc;
|
||||
SCM s_enc;
|
||||
|
||||
enc = scm_scan_for_encoding (port);
|
||||
if (enc == NULL)
|
||||
return SCM_BOOL_F;
|
||||
else
|
||||
{
|
||||
s_enc = scm_from_locale_string (enc);
|
||||
free (enc);
|
||||
return s_enc;
|
||||
}
|
||||
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
void
|
||||
scm_init_read ()
|
||||
{
|
||||
|
|
|
@ -56,6 +56,8 @@ SCM_API SCM scm_read_options (SCM setting);
|
|||
SCM_API SCM scm_read (SCM port);
|
||||
SCM_API size_t scm_read_token (int ic, SCM * tok_buf, SCM port, int weird);
|
||||
SCM_API SCM scm_read_hash_extend (SCM chr, SCM proc);
|
||||
SCM_INTERNAL char *scm_scan_for_encoding (SCM port);
|
||||
SCM_API SCM scm_file_encoding (SCM port);
|
||||
|
||||
SCM_INTERNAL void scm_i_input_error (const char *func, SCM port,
|
||||
const char *message, SCM arg)
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#include <unistr.h>
|
||||
#include <uniconv.h>
|
||||
|
||||
#include "striconveh.h"
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/chars.h"
|
||||
#include "libguile/root.h"
|
||||
|
@ -632,6 +634,37 @@ scm_i_string_ref (SCM str, size_t x)
|
|||
return scm_i_string_wide_chars (str)[x];
|
||||
}
|
||||
|
||||
/* Returns index+1 of the first char in STR that matches C, or
|
||||
0 if the char is not found. */
|
||||
int
|
||||
scm_i_string_contains_char (SCM str, char ch)
|
||||
{
|
||||
size_t i;
|
||||
size_t len = scm_i_string_length (str);
|
||||
|
||||
i = 0;
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
while (i < len)
|
||||
{
|
||||
if (scm_i_string_chars (str)[i] == ch)
|
||||
return i+1;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (i < len)
|
||||
{
|
||||
if (scm_i_string_wide_chars (str)[i]
|
||||
== (unsigned char) ch)
|
||||
return i+1;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
|
||||
{
|
||||
|
@ -1443,31 +1476,6 @@ scm_is_string (SCM obj)
|
|||
return IS_STRING (obj);
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_locale_stringn (const char *str, size_t len)
|
||||
{
|
||||
SCM res;
|
||||
char *dst;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
if (len == 0)
|
||||
return scm_nullstr;
|
||||
|
||||
res = scm_i_make_string (len, &dst);
|
||||
memcpy (dst, str, len);
|
||||
return res;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_locale_string (const char *str)
|
||||
{
|
||||
if (str == NULL)
|
||||
return scm_nullstr;
|
||||
|
||||
return scm_from_locale_stringn (str, -1);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
|
@ -1477,6 +1485,15 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
|||
int wide = 0;
|
||||
SCM res;
|
||||
|
||||
if (encoding == NULL)
|
||||
{
|
||||
/* If encoding is null, use Latin-1. */
|
||||
char *buf;
|
||||
res = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, str, len);
|
||||
return res;
|
||||
}
|
||||
|
||||
u32len = 0;
|
||||
u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
|
||||
(enum iconv_ilseq_handler)
|
||||
|
@ -1491,12 +1508,9 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
|||
scm_memory_error ("locale string conversion");
|
||||
else
|
||||
{
|
||||
/* There are invalid sequences in the input string. Since
|
||||
it is partially nonsense, what is the best strategy for
|
||||
printing it in the error message? */
|
||||
/* There are invalid sequences in the input string. */
|
||||
SCM errstr;
|
||||
char *dst;
|
||||
/* We'll just print it unconverted and hope for the best. */
|
||||
errstr = scm_i_make_string (len, &dst);
|
||||
memcpy (dst, str, len);
|
||||
scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
|
||||
|
@ -1534,6 +1548,44 @@ scm_from_stringn (const char *str, size_t len, const char *encoding,
|
|||
return res;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_locale_stringn (const char *str, size_t len)
|
||||
{
|
||||
const char *enc;
|
||||
scm_t_string_failed_conversion_handler hndl;
|
||||
SCM inport;
|
||||
scm_t_port *pt;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
if (len == 0)
|
||||
return scm_nullstr;
|
||||
|
||||
inport = scm_current_input_port ();
|
||||
if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
|
||||
{
|
||||
pt = SCM_PTAB_ENTRY (inport);
|
||||
enc = pt->encoding;
|
||||
hndl = pt->ilseq_handler;
|
||||
}
|
||||
else
|
||||
{
|
||||
enc = NULL;
|
||||
hndl = SCM_FAILED_CONVERSION_ERROR;
|
||||
}
|
||||
|
||||
return scm_from_stringn (str, len, enc, hndl);
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_locale_string (const char *str)
|
||||
{
|
||||
if (str == NULL)
|
||||
return scm_nullstr;
|
||||
|
||||
return scm_from_locale_stringn (str, -1);
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_i_from_utf8_string (const scm_t_uint8 *str)
|
||||
{
|
||||
|
@ -1630,13 +1682,22 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
|
|||
char *
|
||||
scm_to_locale_stringn (SCM str, size_t * lenp)
|
||||
{
|
||||
SCM outport;
|
||||
scm_t_port *pt;
|
||||
const char *enc;
|
||||
|
||||
/* In the future, enc will hold the port's encoding. */
|
||||
enc = NULL;
|
||||
outport = scm_current_output_port ();
|
||||
if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
|
||||
{
|
||||
pt = SCM_PTAB_ENTRY (outport);
|
||||
enc = pt->encoding;
|
||||
}
|
||||
else
|
||||
enc = NULL;
|
||||
|
||||
return scm_to_stringn (str, lenp, enc,
|
||||
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
return scm_to_stringn (str, lenp,
|
||||
enc,
|
||||
scm_i_get_conversion_strategy (SCM_BOOL_F));
|
||||
}
|
||||
|
||||
/* Low-level scheme to C string conversion function. */
|
||||
|
@ -1646,6 +1707,8 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
|||
{
|
||||
char *buf;
|
||||
size_t ilen, len, i;
|
||||
int ret;
|
||||
const char *enc;
|
||||
|
||||
if (!scm_is_string (str))
|
||||
scm_wrong_type_arg_msg (NULL, 0, str, "string");
|
||||
|
@ -1667,8 +1730,10 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
|||
"string contains #\\nul character: ~S",
|
||||
scm_list_1 (str));
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
if (scm_i_is_narrow_string (str) && (encoding == NULL))
|
||||
{
|
||||
/* If using native Latin-1 encoding, just copy the string
|
||||
contents. */
|
||||
if (lenp)
|
||||
{
|
||||
buf = scm_malloc (ilen);
|
||||
|
@ -1688,17 +1753,41 @@ scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
|||
|
||||
buf = NULL;
|
||||
len = 0;
|
||||
buf = u32_conv_to_encoding (encoding ? encoding : "ISO-8859-1",
|
||||
(enum iconv_ilseq_handler) handler,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (str),
|
||||
ilen, NULL, NULL, &len);
|
||||
if (buf == NULL)
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (encoding), str));
|
||||
enc = encoding;
|
||||
if (enc == NULL)
|
||||
enc = "ISO-8859-1";
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
ret = mem_iconveh (scm_i_string_chars (str), ilen,
|
||||
"ISO-8859-1", enc,
|
||||
(enum iconv_ilseq_handler) handler, NULL,
|
||||
&buf, &len);
|
||||
|
||||
if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
unistring_escapes_to_guile_escapes (&buf, &len);
|
||||
if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
unistring_escapes_to_guile_escapes (&buf, &len);
|
||||
|
||||
if (ret != 0)
|
||||
{
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (enc),
|
||||
str));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = u32_conv_to_encoding (enc,
|
||||
(enum iconv_ilseq_handler) handler,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (str),
|
||||
ilen,
|
||||
NULL,
|
||||
NULL, &len);
|
||||
if (buf == NULL)
|
||||
{
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (enc),
|
||||
str));
|
||||
}
|
||||
}
|
||||
if (lenp)
|
||||
*lenp = len;
|
||||
else
|
||||
|
|
|
@ -154,6 +154,7 @@ SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
|
|||
SCM_INTERNAL void scm_i_string_stop_writing (void);
|
||||
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
|
||||
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
|
||||
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
|
||||
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
|
||||
/* internal functions related to symbols. */
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "libguile/modules.h"
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/deprecation.h"
|
||||
#include "libguile/srfi-4.h"
|
||||
|
||||
#include "libguile/strports.h"
|
||||
|
||||
|
@ -289,42 +290,33 @@ st_truncate (SCM port, scm_t_off length)
|
|||
}
|
||||
|
||||
SCM
|
||||
scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
||||
scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, const char *caller)
|
||||
{
|
||||
SCM z;
|
||||
SCM z, str;
|
||||
scm_t_port *pt;
|
||||
size_t str_len, c_pos;
|
||||
size_t c_pos;
|
||||
char *buf;
|
||||
|
||||
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
|
||||
/* Because ports are inherently 8-bit, strings need to be converted
|
||||
to a locale representation for storage. But, since string ports
|
||||
rely on string functionality for their memory management, we need
|
||||
to create a new string that has the 8-bit locale representation
|
||||
of the underlying string. This violates the guideline that the
|
||||
internal encoding of characters in strings is in unicode
|
||||
codepoints. */
|
||||
str = scm_i_make_string (str_len, &buf);
|
||||
memcpy (buf, locale_str, str_len);
|
||||
|
||||
str_len = scm_i_string_length (str);
|
||||
c_pos = scm_to_unsigned_integer (pos, 0, str_len);
|
||||
|
||||
if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
|
||||
scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
|
||||
|
||||
/* XXX
|
||||
|
||||
Make a new string to isolate us from changes to the original.
|
||||
This is done so that we can rely on scm_i_string_chars to stay in
|
||||
place even across SCM_TICKs.
|
||||
|
||||
Additionally, when we are going to write to the string, we make a
|
||||
copy so that we can write to it without having to use
|
||||
scm_i_string_writable_chars.
|
||||
*/
|
||||
|
||||
if (modes & SCM_WRTNG)
|
||||
str = scm_c_substring_copy (str, 0, str_len);
|
||||
else
|
||||
str = scm_c_substring (str, 0, str_len);
|
||||
|
||||
scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
|
||||
z = scm_new_port_table_entry (scm_tc16_strport);
|
||||
pt = SCM_PTAB_ENTRY(z);
|
||||
SCM_SETSTREAM (z, SCM_UNPACK (str));
|
||||
SCM_SET_CELL_TYPE(z, scm_tc16_strport|modes);
|
||||
/* see above why we can use scm_i_string_chars here. */
|
||||
pt->write_buf = pt->read_buf = (unsigned char *) scm_i_string_chars (str);
|
||||
pt->read_pos = pt->write_pos = pt->read_buf + c_pos;
|
||||
pt->write_buf_size = pt->read_buf_size = str_len;
|
||||
|
@ -340,22 +332,60 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
|||
return z;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
||||
{
|
||||
SCM z;
|
||||
size_t str_len;
|
||||
char *buf;
|
||||
|
||||
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
|
||||
|
||||
/* Because ports are inherently 8-bit, strings need to be converted
|
||||
to a locale representation for storage. But, since string ports
|
||||
rely on string functionality for their memory management, we need
|
||||
to create a new string that has the 8-bit locale representation
|
||||
of the underlying string. This violates the guideline that the
|
||||
internal encoding of characters in strings is in unicode
|
||||
codepoints. */
|
||||
buf = scm_to_locale_stringn (str, &str_len);
|
||||
z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
|
||||
free (buf);
|
||||
return z;
|
||||
}
|
||||
|
||||
/* create a new string from a string port's buffer. */
|
||||
SCM scm_strport_to_string (SCM port)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
SCM str;
|
||||
char *dst;
|
||||
|
||||
if (pt->rw_active == SCM_PORT_WRITE)
|
||||
st_flush (port);
|
||||
|
||||
str = scm_i_make_string (pt->read_buf_size, &dst);
|
||||
memcpy (dst, (char *) pt->read_buf, pt->read_buf_size);
|
||||
str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
|
||||
scm_remember_upto_here_1 (port);
|
||||
return str;
|
||||
}
|
||||
|
||||
/* Create a vector containing the locale representation of the string in the
|
||||
port's buffer. */
|
||||
SCM scm_strport_to_locale_u8vector (SCM port)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
SCM vec;
|
||||
char *buf;
|
||||
|
||||
if (pt->rw_active == SCM_PORT_WRITE)
|
||||
st_flush (port);
|
||||
|
||||
buf = scm_malloc (pt->read_buf_size);
|
||||
memcpy (buf, pt->read_buf, pt->read_buf_size);
|
||||
vec = scm_take_u8vector ((unsigned char *) buf, pt->read_buf_size);
|
||||
scm_remember_upto_here_1 (port);
|
||||
return vec;
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
|
||||
(SCM obj, SCM printer),
|
||||
"Return a Scheme string obtained by printing @var{obj}.\n"
|
||||
|
@ -380,6 +410,25 @@ SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_call_with_output_locale_u8vector, "call-with-output-locale-u8vector", 1, 0, 0,
|
||||
(SCM proc),
|
||||
"Calls the one-argument procedure @var{proc} with a newly created output\n"
|
||||
"port. When the function returns, a vector containing the bytes of a\n"
|
||||
"locale representation of the characters written into the port is returned\n")
|
||||
#define FUNC_NAME s_scm_call_with_output_locale_u8vector
|
||||
{
|
||||
SCM p;
|
||||
|
||||
p = scm_mkstrport (SCM_INUM0,
|
||||
scm_make_string (SCM_INUM0, SCM_UNDEFINED),
|
||||
SCM_OPN | SCM_WRTNG,
|
||||
FUNC_NAME);
|
||||
scm_call_1 (proc, p);
|
||||
|
||||
return scm_get_output_locale_u8vector (p);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0,
|
||||
(SCM proc),
|
||||
"Calls the one-argument procedure @var{proc} with a newly created output\n"
|
||||
|
@ -424,6 +473,27 @@ SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 0, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_open_input_locale_u8vector, "open-input-locale-u8vector", 1, 0, 0,
|
||||
(SCM vec),
|
||||
"Take a u8vector containing the bytes of a string encoded in the\n"
|
||||
"current locale and return an input port that delivers characters\n"
|
||||
"from the string. The port can be closed by\n"
|
||||
"@code{close-input-port}, though its storage will be reclaimed\n"
|
||||
"by the garbage collector if it becomes inaccessible.")
|
||||
#define FUNC_NAME s_scm_open_input_locale_u8vector
|
||||
{
|
||||
scm_t_array_handle hnd;
|
||||
ssize_t inc;
|
||||
size_t len;
|
||||
const scm_t_uint8 *buf;
|
||||
|
||||
buf = scm_u8vector_elements (vec, &hnd, &len, &inc);
|
||||
SCM p = scm_i_mkstrport(SCM_INUM0, (const char *) buf, len, SCM_OPN | SCM_RDNG, FUNC_NAME);
|
||||
scm_array_handle_release (&hnd);
|
||||
return p;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0,
|
||||
(void),
|
||||
"Return an output port that will accumulate characters for\n"
|
||||
|
@ -456,11 +526,26 @@ SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 0, 0,
|
|||
#undef FUNC_NAME
|
||||
|
||||
|
||||
SCM_DEFINE (scm_get_output_locale_u8vector, "get-output-locale-u8vector", 1, 0, 0,
|
||||
(SCM port),
|
||||
"Given an output port created by @code{open-output-string},\n"
|
||||
"return a u8 vector containing the characters of the string\n"
|
||||
"encoded in the current locale.")
|
||||
#define FUNC_NAME s_scm_get_output_locale_u8vector
|
||||
{
|
||||
SCM_VALIDATE_OPOUTSTRPORT (1, port);
|
||||
return scm_strport_to_locale_u8vector (port);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
/* Given a null-terminated string EXPR containing a Scheme expression
|
||||
read it, and return it as an SCM value. */
|
||||
SCM
|
||||
scm_c_read_string (const char *expr)
|
||||
{
|
||||
/* FIXME: the c string gets packed into a string, only to get
|
||||
immediately unpacked in scm_mkstrport. */
|
||||
SCM port = scm_mkstrport (SCM_INUM0,
|
||||
scm_from_locale_string (expr),
|
||||
SCM_OPN | SCM_RDNG,
|
||||
|
|
|
@ -44,13 +44,19 @@ SCM_API scm_t_bits scm_tc16_strport;
|
|||
|
||||
|
||||
SCM_API SCM scm_mkstrport (SCM pos, SCM str, long modes, const char * caller);
|
||||
SCM_INTERNAL SCM scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len,
|
||||
long modes, const char *caller);
|
||||
SCM_API SCM scm_strport_to_string (SCM port);
|
||||
SCM_API SCM scm_strport_to_locale_u8vector (SCM port);
|
||||
SCM_API SCM scm_object_to_string (SCM obj, SCM printer);
|
||||
SCM_API SCM scm_call_with_output_string (SCM proc);
|
||||
SCM_API SCM scm_call_with_output_locale_u8vector (SCM proc);
|
||||
SCM_API SCM scm_call_with_input_string (SCM str, SCM proc);
|
||||
SCM_API SCM scm_open_input_string (SCM str);
|
||||
SCM_API SCM scm_open_input_locale_u8vector (SCM str);
|
||||
SCM_API SCM scm_open_output_string (void);
|
||||
SCM_API SCM scm_get_output_string (SCM port);
|
||||
SCM_API SCM scm_get_output_locale_u8vector (SCM port);
|
||||
SCM_API SCM scm_c_read_string (const char *expr);
|
||||
SCM_API SCM scm_c_eval_string (const char *expr);
|
||||
SCM_API SCM scm_c_eval_string_in_module (const char *expr, SCM module);
|
||||
|
|
|
@ -145,8 +145,11 @@
|
|||
(from (current-language))
|
||||
(to 'objcode)
|
||||
(opts '()))
|
||||
(let ((comp (or output-file (compiled-file-name file)))
|
||||
(in (open-input-file file)))
|
||||
(let* ((comp (or output-file (compiled-file-name file)))
|
||||
(in (open-input-file file))
|
||||
(enc (file-encoding in)))
|
||||
(if enc
|
||||
(set-port-encoding! in enc))
|
||||
(ensure-writable-dir (dirname comp))
|
||||
(call-with-output-file/atomic comp
|
||||
(lambda (port)
|
||||
|
|
139
test-suite/tests/encoding-escapes.test
Normal file
139
test-suite/tests/encoding-escapes.test
Normal file
|
@ -0,0 +1,139 @@
|
|||
;;;; encoding-escapes.test --- test suite for Guile's string encodings -*- mode: scheme; coding: utf-8 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(define s1 "última")
|
||||
(define s2 "cédula")
|
||||
(define s3 "años")
|
||||
(define s4 "羅生門")
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "ultima"
|
||||
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
|
||||
|
||||
(pass-if "cedula"
|
||||
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
|
||||
|
||||
(pass-if "anos"
|
||||
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
|
||||
|
||||
(pass-if "Rashomon"
|
||||
(string=? s4 (string-ints #x7f85 #x751f #x9580))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "ultima"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\372 #\l #\t #\i #\m #\a)))
|
||||
|
||||
(pass-if "cedula"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\c #\351 #\d #\u #\l #\a)))
|
||||
|
||||
(pass-if "anos"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\a #\361 #\o #\s)))
|
||||
|
||||
(pass-if "Rashomon"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\77605 #\72437 #\112600))))
|
||||
|
||||
|
||||
;; Check that an error is flagged on display output when the output
|
||||
;; error strategy is 'error
|
||||
|
||||
(with-test-prefix "display output errors"
|
||||
|
||||
(pass-if-exception "ultima"
|
||||
exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display s1 pt)))
|
||||
|
||||
(pass-if-exception "Rashomon"
|
||||
exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display s4 pt))))
|
||||
|
||||
;; Check that questions marks or substitutions appear when the conversion
|
||||
;; mode is substitute
|
||||
(with-test-prefix "display output substitutions"
|
||||
|
||||
(pass-if "ultima"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'substitute)
|
||||
(display s1 pt)
|
||||
(string=? "?ltima"
|
||||
(get-output-string pt))))
|
||||
|
||||
(pass-if "Rashomon"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'substitute)
|
||||
(display s4 pt)
|
||||
(string=? "???"
|
||||
(get-output-string pt)))))
|
||||
|
||||
|
||||
;; Check that hex escapes appear in the write output and that no error
|
||||
;; is thrown. The output error strategy should be irrelevant here.
|
||||
(with-test-prefix "display output escapes"
|
||||
|
||||
(pass-if "ultima"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'escape)
|
||||
(display s1 pt)
|
||||
(string=? "\\xfaltima"
|
||||
(get-output-string pt))))
|
||||
(pass-if "Rashomon"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'escape)
|
||||
(display s4 pt)
|
||||
(string=? "\\u7F85\\u751F\\u9580"
|
||||
(get-output-string pt)))))
|
||||
|
||||
(setlocale LC_ALL "en_US.utf8")
|
||||
|
||||
(with-test-prefix "input escapes"
|
||||
|
||||
(pass-if "última"
|
||||
(string=? "última"
|
||||
(with-input-from-string "\"\\xfaltima\"" read)))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(string=? "羅生門"
|
||||
(with-input-from-string "\"\\u7F85\\u751F\\u9580\"" read))))
|
||||
|
135
test-suite/tests/encoding-iso88591.test
Normal file
135
test-suite/tests/encoding-iso88591.test
Normal file
|
@ -0,0 +1,135 @@
|
|||
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: iso-8859-1 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(setlocale LC_ALL "")
|
||||
|
||||
(define s1 "última")
|
||||
(define s2 "cédula")
|
||||
(define s3 "años")
|
||||
(define s4 "¿Cómo?")
|
||||
|
||||
(with-test-prefix "string length"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string-length s1) 6))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string-length s2) 6))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string-length s3) 4))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(eq? (string-length s4) 6)))
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "última"
|
||||
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
|
||||
|
||||
(pass-if "años"
|
||||
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(string=? s4 (string-ints #xbf #x43 #xf3 #x6d #x6f #x3f))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "última"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\ú #\l #\t #\i #\m #\a)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\c #\é #\d #\u #\l #\a)))
|
||||
|
||||
(pass-if "años"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\a #\ñ #\o #\s)))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\¿ #\C #\ó #\m #\o #\?))))
|
||||
|
||||
;; Check that the output is in ISO-8859-1 encoding
|
||||
(with-test-prefix "display"
|
||||
|
||||
(pass-if "s1"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-1")
|
||||
(display s1 pt)
|
||||
(list= eqv?
|
||||
(list #xfa #x6c #x74 #x69 #x6d #x61)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt)))))
|
||||
|
||||
(pass-if "s2"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-1")
|
||||
(display s2 pt)
|
||||
(list= eqv?
|
||||
(list #x63 #xe9 #x64 #x75 #x6c #x61)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt))))))
|
||||
|
||||
(with-test-prefix "symbols == strings"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string->symbol s1) 'última))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string->symbol s2) 'cédula))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string->symbol s3) 'años))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(eq? (string->symbol s4) '¿Cómo?)))
|
||||
|
||||
(with-test-prefix "non-ascii variable names"
|
||||
|
||||
(pass-if "1"
|
||||
(let ((á 1)
|
||||
(ñ 2))
|
||||
(eq? (+ á ñ) 3))))
|
||||
|
||||
(with-test-prefix "output errors"
|
||||
|
||||
(pass-if-exception "char 256" exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-1")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display (string-ints 256) pt))))
|
||||
|
||||
;; Reset locales
|
||||
(setlocale LC_ALL "C")
|
136
test-suite/tests/encoding-iso88597.test
Normal file
136
test-suite/tests/encoding-iso88597.test
Normal file
|
@ -0,0 +1,136 @@
|
|||
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: iso-8859-7 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(setlocale LC_ALL "")
|
||||
|
||||
(define s1 "Ðåñß")
|
||||
(define s2 "ôçò")
|
||||
(define s3 "êñéôéêÞò")
|
||||
(define s4 "êáé")
|
||||
|
||||
(with-test-prefix "string length"
|
||||
|
||||
(pass-if "s1"
|
||||
(eq? (string-length s1) 4))
|
||||
|
||||
(pass-if "s2"
|
||||
(eq? (string-length s2) 3))
|
||||
|
||||
(pass-if "s3"
|
||||
(eq? (string-length s3) 8))
|
||||
|
||||
(pass-if "s4"
|
||||
(eq? (string-length s4) 3)))
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "s1"
|
||||
(string=? s1 (string-ints #x03a0 #x03b5 #x03c1 #x03af)))
|
||||
|
||||
(pass-if "s2"
|
||||
(string=? s2 (string-ints #x03c4 #x03b7 #x03c2)))
|
||||
|
||||
(pass-if "s3"
|
||||
(string=? s3 (string-ints #x03ba #x03c1 #x03b9 #x03c4 #x03b9 #x03ba #x03ae #x03c2)))
|
||||
|
||||
(pass-if "s4"
|
||||
(string=? s4 (string-ints #x03ba #x03b1 #x03b9))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "s1"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\Ð #\å #\ñ #\ß)))
|
||||
|
||||
(pass-if "s2"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\ô #\ç #\ò)))
|
||||
|
||||
(pass-if "s3"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\ê #\ñ #\é #\ô #\é #\ê #\Þ #\ò)))
|
||||
|
||||
(pass-if "s4"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\ê #\á #\é))))
|
||||
|
||||
;; Testing that the display of the string is output in the ISO-8859-7
|
||||
;; encoding
|
||||
(with-test-prefix "display"
|
||||
|
||||
(pass-if "s1"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-7")
|
||||
(display s1 pt)
|
||||
(list= eqv?
|
||||
(list #xd0 #xe5 #xf1 #xdf)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt)))))
|
||||
(pass-if "s2"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-7")
|
||||
(display s2 pt)
|
||||
(list= eqv?
|
||||
(list #xf4 #xe7 #xf2)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt))))))
|
||||
|
||||
(with-test-prefix "symbols == strings"
|
||||
|
||||
(pass-if "Ðåñß"
|
||||
(eq? (string->symbol s1) 'Ðåñß))
|
||||
|
||||
(pass-if "ôçò"
|
||||
(eq? (string->symbol s2) 'ôçò))
|
||||
|
||||
(pass-if "êñéôéêÞò"
|
||||
(eq? (string->symbol s3) 'êñéôéêÞò))
|
||||
|
||||
(pass-if "êáé"
|
||||
(eq? (string->symbol s4) 'êáé)))
|
||||
|
||||
(with-test-prefix "non-ascii variable names"
|
||||
|
||||
(pass-if "1"
|
||||
(let ((á 1)
|
||||
(ñ 2))
|
||||
(eq? (+ á ñ) 3))))
|
||||
|
||||
(with-test-prefix "output errors"
|
||||
|
||||
(pass-if-exception "char #x0400"
|
||||
exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-7")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display (string-ints #x0400) pt))))
|
||||
|
||||
;; Reset locale
|
||||
(setlocale LC_ALL "C")
|
105
test-suite/tests/encoding-utf8.test
Normal file
105
test-suite/tests/encoding-utf8.test
Normal file
|
@ -0,0 +1,105 @@
|
|||
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: utf-8 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(setlocale LC_ALL "")
|
||||
|
||||
(define s1 "última")
|
||||
(define s2 "cédula")
|
||||
(define s3 "años")
|
||||
(define s4 "羅生門")
|
||||
|
||||
(with-test-prefix "string length"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string-length s1) 6))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string-length s2) 6))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string-length s3) 4))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(eq? (string-length s4) 3)))
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "última"
|
||||
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
|
||||
|
||||
(pass-if "años"
|
||||
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(string=? s4 (string-ints #x7f85 #x751f #x9580))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "última"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\ú #\l #\t #\i #\m #\a)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\c #\é #\d #\u #\l #\a)))
|
||||
|
||||
(pass-if "años"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\a #\ñ #\o #\s)))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\羅 #\生 #\門))))
|
||||
|
||||
(with-test-prefix "symbols == strings"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string->symbol s1) 'última))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string->symbol s2) 'cédula))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string->symbol s3) 'años))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(eq? (string->symbol s4) '羅生門)))
|
||||
|
||||
(with-test-prefix "non-ascii variable names"
|
||||
|
||||
(pass-if "1"
|
||||
(let ((芥川龍之介 1)
|
||||
(ñ 2))
|
||||
(eq? (+ 芥川龍之介 ñ) 3))))
|
||||
|
||||
|
|
@ -22,6 +22,7 @@
|
|||
;;;
|
||||
;;; miscellaneous
|
||||
;;;
|
||||
(setbinary)
|
||||
|
||||
(define exception:numerical-overflow
|
||||
(cons 'numerical-overflow "^Numerical overflow"))
|
||||
|
|
|
@ -33,6 +33,9 @@
|
|||
|
||||
;;;; Some general utilities for testing ports.
|
||||
|
||||
;;; Make sure we are set up for 8-bit data
|
||||
(setbinary)
|
||||
|
||||
;;; Read from PORT until EOF, and return the result as a string.
|
||||
(define (read-all port)
|
||||
(let loop ((chars '()))
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
;;; All these tests assume Guile 1.8's port system, where characters are
|
||||
;;; treated as octets.
|
||||
|
||||
;;; Set the default encoding of future ports to be binary
|
||||
(setbinary)
|
||||
|
||||
|
||||
(with-test-prefix "7.2.5 End-of-File Object"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue