mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-20 11:40:18 +02:00
Merge commit 'origin/master'
Conflicts: libguile/unif.c
This commit is contained in:
commit
c6a1380bde
33 changed files with 1980 additions and 375 deletions
|
@ -24,7 +24,11 @@
|
|||
|
||||
|
||||
#include "libguile/__scm.h"
|
||||
#include "libguile/numbers.h"
|
||||
|
||||
#ifndef SCM_T_WCHAR_DEFINED
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
#define SCM_T_WCHAR_DEFINED
|
||||
#endif /* SCM_T_WCHAR_DEFINED */
|
||||
|
||||
|
||||
/* Immediate Characters
|
||||
|
|
|
@ -1076,7 +1076,8 @@ SCM_DEFINE (scm_gentemp, "gentemp", 0, 2, 0,
|
|||
{
|
||||
char buf[MAX_PREFIX_LENGTH + SCM_INTBUFLEN];
|
||||
char *name = buf;
|
||||
int len, n_digits;
|
||||
int n_digits;
|
||||
size_t len;
|
||||
|
||||
scm_c_issue_deprecation_warning ("`gentemp' is deprecated. "
|
||||
"Use `gensym' instead.");
|
||||
|
@ -1090,7 +1091,7 @@ SCM_DEFINE (scm_gentemp, "gentemp", 0, 2, 0,
|
|||
{
|
||||
SCM_VALIDATE_STRING (1, prefix);
|
||||
len = scm_i_string_length (prefix);
|
||||
name = scm_to_locale_stringn (prefix, (size_t *)(&len));
|
||||
name = scm_to_locale_stringn (prefix, &len);
|
||||
name = scm_realloc (name, len + SCM_INTBUFLEN);
|
||||
}
|
||||
|
||||
|
|
|
@ -587,7 +587,7 @@ static void fport_flush (SCM port);
|
|||
|
||||
/* fill a port's read-buffer with a single read. returns the first
|
||||
char or EOF if end of file. */
|
||||
static int
|
||||
static scm_t_wchar
|
||||
fport_fill_input (SCM port)
|
||||
{
|
||||
long count;
|
||||
|
@ -601,7 +601,7 @@ fport_fill_input (SCM port)
|
|||
if (count == -1)
|
||||
scm_syserror ("fport_fill_input");
|
||||
if (count == 0)
|
||||
return EOF;
|
||||
return (scm_t_wchar) EOF;
|
||||
else
|
||||
{
|
||||
pt->read_pos = pt->read_buf;
|
||||
|
|
|
@ -176,6 +176,8 @@ static SCM scm_unbound_p (SCM obj);
|
|||
static SCM scm_assert_bound (SCM value, SCM obj);
|
||||
static SCM scm_at_assert_bound_ref (SCM obj, SCM index);
|
||||
static SCM scm_sys_goops_loaded (void);
|
||||
static SCM scm_make_extended_class_from_symbol (SCM type_name_sym,
|
||||
int applicablep);
|
||||
|
||||
/* This function is used for efficient type dispatch. */
|
||||
SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0,
|
||||
|
@ -281,9 +283,9 @@ SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0,
|
|||
else
|
||||
{
|
||||
SCM name = SCM_STRUCT_TABLE_NAME (SCM_CDR (handle));
|
||||
SCM class = scm_make_extended_class (scm_is_true (name)
|
||||
? scm_i_symbol_chars (name)
|
||||
: 0,
|
||||
SCM class = scm_make_extended_class_from_symbol (scm_is_true (name)
|
||||
? name
|
||||
: scm_nullstr,
|
||||
SCM_I_OPERATORP (x));
|
||||
SCM_SET_STRUCT_TABLE_CLASS (SCM_CDR (handle), class);
|
||||
return class;
|
||||
|
@ -1526,11 +1528,11 @@ wrap_init (SCM class, SCM *m, long n)
|
|||
{
|
||||
long i;
|
||||
scm_t_bits slayout = SCM_STRUCT_DATA (class)[scm_vtable_index_layout];
|
||||
const char *layout = scm_i_symbol_chars (SCM_PACK (slayout));
|
||||
SCM layout = SCM_PACK (slayout);
|
||||
|
||||
/* Set all SCM-holding slots to unbound */
|
||||
for (i = 0; i < n; i++)
|
||||
if (layout[i*2] == 'p')
|
||||
if (scm_i_symbol_ref (layout, i*2) == 'p')
|
||||
m[i] = SCM_GOOPS_UNBOUND;
|
||||
else
|
||||
m[i] = 0;
|
||||
|
@ -2680,6 +2682,34 @@ make_class_from_template (char const *template, char const *type_name, SCM super
|
|||
return class;
|
||||
}
|
||||
|
||||
static SCM
|
||||
make_class_from_symbol (SCM type_name_sym, SCM supers, int applicablep)
|
||||
{
|
||||
SCM class, name;
|
||||
if (type_name_sym != SCM_BOOL_F)
|
||||
{
|
||||
name = scm_string_append (scm_list_3 (scm_from_locale_string ("<"),
|
||||
scm_symbol_to_string (type_name_sym),
|
||||
scm_from_locale_string (">")));
|
||||
name = scm_string_to_symbol (name);
|
||||
}
|
||||
else
|
||||
name = SCM_GOOPS_UNBOUND;
|
||||
|
||||
class = scm_permanent_object (scm_basic_make_class (applicablep
|
||||
? scm_class_procedure_class
|
||||
: scm_class_class,
|
||||
name,
|
||||
supers,
|
||||
SCM_EOL));
|
||||
|
||||
/* Only define name if doesn't already exist. */
|
||||
if (!SCM_GOOPS_UNBOUNDP (name)
|
||||
&& scm_is_false (scm_module_variable (scm_module_goops, name)))
|
||||
DEFVAR (name, class);
|
||||
return class;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_make_extended_class (char const *type_name, int applicablep)
|
||||
{
|
||||
|
@ -2691,6 +2721,16 @@ scm_make_extended_class (char const *type_name, int applicablep)
|
|||
applicablep);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_make_extended_class_from_symbol (SCM type_name_sym, int applicablep)
|
||||
{
|
||||
return make_class_from_symbol (type_name_sym,
|
||||
scm_list_1 (applicablep
|
||||
? scm_class_applicable
|
||||
: scm_class_top),
|
||||
applicablep);
|
||||
}
|
||||
|
||||
void
|
||||
scm_i_inherit_applicable (SCM c)
|
||||
{
|
||||
|
@ -2783,11 +2823,16 @@ static SCM
|
|||
make_struct_class (void *closure SCM_UNUSED,
|
||||
SCM vtable, SCM data, SCM prev SCM_UNUSED)
|
||||
{
|
||||
if (scm_is_true (SCM_STRUCT_TABLE_NAME (data)))
|
||||
SCM_SET_STRUCT_TABLE_CLASS (data,
|
||||
scm_make_extended_class
|
||||
(scm_i_symbol_chars (SCM_STRUCT_TABLE_NAME (data)),
|
||||
SCM_CLASS_FLAGS (vtable) & SCM_CLASSF_OPERATOR));
|
||||
SCM sym = SCM_STRUCT_TABLE_NAME (data);
|
||||
if (scm_is_true (sym))
|
||||
{
|
||||
int applicablep = SCM_CLASS_FLAGS (vtable) & SCM_CLASSF_OPERATOR;
|
||||
|
||||
SCM_SET_STRUCT_TABLE_CLASS (data,
|
||||
scm_make_extended_class_from_symbol (sym, applicablep));
|
||||
}
|
||||
|
||||
scm_remember_upto_here_2 (data, vtable);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ SCM_API void scm_array_handle_set (scm_t_array_handle *h, ssize_t pos, SCM val);
|
|||
|
||||
SCM_API int scm_is_pair (SCM x);
|
||||
|
||||
SCM_API int scm_getc (SCM port);
|
||||
SCM_API int scm_get_byte_or_eof (SCM port);
|
||||
SCM_API void scm_putc (char c, SCM port);
|
||||
SCM_API void scm_puts (const char *str_data, SCM port);
|
||||
|
||||
|
@ -299,7 +299,7 @@ scm_is_pair (SCM x)
|
|||
SCM_C_EXTERN_INLINE
|
||||
#endif
|
||||
int
|
||||
scm_getc (SCM port)
|
||||
scm_get_byte_or_eof (SCM port)
|
||||
{
|
||||
int c;
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
@ -319,27 +319,6 @@ scm_getc (SCM port)
|
|||
|
||||
c = *(pt->read_pos++);
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\a':
|
||||
break;
|
||||
case '\b':
|
||||
SCM_DECCOL (port);
|
||||
break;
|
||||
case '\n':
|
||||
SCM_INCLINE (port);
|
||||
break;
|
||||
case '\r':
|
||||
SCM_ZEROCOL (port);
|
||||
break;
|
||||
case '\t':
|
||||
SCM_TABCOL (port);
|
||||
break;
|
||||
default:
|
||||
SCM_INCCOL (port);
|
||||
break;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
|
|
@ -85,6 +85,7 @@ SCM_DEFINE (scm_primitive_load, "primitive-load", 1, 0, 0,
|
|||
#define FUNC_NAME s_scm_primitive_load
|
||||
{
|
||||
SCM hook = *scm_loc_load_hook;
|
||||
char *encoding;
|
||||
SCM_VALIDATE_STRING (1, filename);
|
||||
if (scm_is_true (hook) && scm_is_false (scm_procedure_p (hook)))
|
||||
SCM_MISC_ERROR ("value of %load-hook is neither a procedure nor #f",
|
||||
|
@ -97,7 +98,15 @@ SCM_DEFINE (scm_primitive_load, "primitive-load", 1, 0, 0,
|
|||
SCM port = scm_open_file (filename, scm_from_locale_string ("r"));
|
||||
scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
|
||||
scm_i_dynwind_current_load_port (port);
|
||||
|
||||
encoding = scm_scan_for_encoding (port);
|
||||
if (encoding)
|
||||
{
|
||||
scm_i_set_port_encoding_x (port, encoding);
|
||||
free (encoding);
|
||||
}
|
||||
else
|
||||
/* The file has no encoding declaraed. We'll presume Latin-1. */
|
||||
scm_i_set_port_encoding_x (port, NULL);
|
||||
while (1)
|
||||
{
|
||||
SCM reader, form;
|
||||
|
|
|
@ -28,6 +28,11 @@
|
|||
#include "libguile/__scm.h"
|
||||
#include "libguile/print.h"
|
||||
|
||||
#ifndef SCM_T_WCHAR_DEFINED
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
#define SCM_T_WCHAR_DEFINED
|
||||
#endif /* SCM_T_WCHAR_DEFINED */
|
||||
|
||||
#if SCM_HAVE_FLOATINGPOINT_H
|
||||
# include <floatingpoint.h>
|
||||
#endif
|
||||
|
@ -174,7 +179,6 @@ typedef struct scm_t_complex
|
|||
double imag;
|
||||
} scm_t_complex;
|
||||
|
||||
typedef scm_t_int32 scm_t_wchar;
|
||||
|
||||
|
||||
|
||||
|
|
557
libguile/ports.c
557
libguile/ports.c
|
@ -30,6 +30,9 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h> /* for chsize on mingw */
|
||||
#include <assert.h>
|
||||
#include <uniconv.h>
|
||||
#include <unistr.h>
|
||||
#include <striconveh.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/async.h"
|
||||
|
@ -51,6 +54,7 @@
|
|||
#include "libguile/vectors.h"
|
||||
#include "libguile/weaks.h"
|
||||
#include "libguile/fluids.h"
|
||||
#include "libguile/eq.h"
|
||||
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
|
@ -358,10 +362,10 @@ SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
|
|||
|
||||
/* Standard ports --- current input, output, error, and more(!). */
|
||||
|
||||
static SCM cur_inport_fluid;
|
||||
static SCM cur_outport_fluid;
|
||||
static SCM cur_errport_fluid;
|
||||
static SCM cur_loadport_fluid;
|
||||
static SCM cur_inport_fluid = 0;
|
||||
static SCM cur_outport_fluid = 0;
|
||||
static SCM cur_errport_fluid = 0;
|
||||
static SCM cur_loadport_fluid = 0;
|
||||
|
||||
SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
|
||||
(),
|
||||
|
@ -370,7 +374,10 @@ SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
|
|||
"returns the @dfn{standard input} in Unix and C terminology.")
|
||||
#define FUNC_NAME s_scm_current_input_port
|
||||
{
|
||||
return scm_fluid_ref (cur_inport_fluid);
|
||||
if (cur_inport_fluid)
|
||||
return scm_fluid_ref (cur_inport_fluid);
|
||||
else
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -382,7 +389,10 @@ SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
|
|||
"Unix and C terminology.")
|
||||
#define FUNC_NAME s_scm_current_output_port
|
||||
{
|
||||
return scm_fluid_ref (cur_outport_fluid);
|
||||
if (cur_outport_fluid)
|
||||
return scm_fluid_ref (cur_outport_fluid);
|
||||
else
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -392,7 +402,10 @@ SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
|
|||
"@dfn{standard error} in Unix and C terminology).")
|
||||
#define FUNC_NAME s_scm_current_error_port
|
||||
{
|
||||
return scm_fluid_ref (cur_errport_fluid);
|
||||
if (cur_errport_fluid)
|
||||
return scm_fluid_ref (cur_errport_fluid);
|
||||
else
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
@ -509,10 +522,18 @@ scm_new_port_table_entry (scm_t_bits tag)
|
|||
|
||||
SCM z = scm_cons (SCM_EOL, SCM_EOL);
|
||||
scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
|
||||
const char *enc;
|
||||
|
||||
entry->file_name = SCM_BOOL_F;
|
||||
entry->rw_active = SCM_PORT_NEITHER;
|
||||
entry->port = z;
|
||||
/* Initialize this port with the thread's current default
|
||||
encoding. */
|
||||
if ((enc = scm_i_get_port_encoding (SCM_BOOL_F)) == NULL)
|
||||
entry->encoding = NULL;
|
||||
else
|
||||
entry->encoding = strdup (enc);
|
||||
entry->ilseq_handler = scm_i_get_conversion_strategy (SCM_BOOL_F);
|
||||
|
||||
SCM_SET_CELL_TYPE (z, tag);
|
||||
SCM_SETPTAB_ENTRY (z, entry);
|
||||
|
@ -549,6 +570,11 @@ scm_i_remove_port (SCM port)
|
|||
scm_t_port *p = SCM_PTAB_ENTRY (port);
|
||||
if (p->putback_buf)
|
||||
scm_gc_free (p->putback_buf, p->putback_buf_size, "putback buffer");
|
||||
if (p->encoding)
|
||||
{
|
||||
free (p->encoding);
|
||||
p->encoding = NULL;
|
||||
}
|
||||
scm_gc_free (p, sizeof (scm_t_port), "port");
|
||||
|
||||
SCM_SETPTAB_ENTRY (port, 0);
|
||||
|
@ -632,21 +658,22 @@ SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0,
|
|||
*/
|
||||
|
||||
static long
|
||||
scm_i_mode_bits_n (const char *modes, size_t n)
|
||||
scm_i_mode_bits_n (SCM modes)
|
||||
{
|
||||
return (SCM_OPN
|
||||
| (memchr (modes, 'r', n) || memchr (modes, '+', n) ? SCM_RDNG : 0)
|
||||
| ( memchr (modes, 'w', n)
|
||||
|| memchr (modes, 'a', n)
|
||||
|| memchr (modes, '+', n) ? SCM_WRTNG : 0)
|
||||
| (memchr (modes, '0', n) ? SCM_BUF0 : 0)
|
||||
| (memchr (modes, 'l', n) ? SCM_BUFLINE : 0));
|
||||
| (scm_i_string_contains_char (modes, 'r')
|
||||
|| scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
|
||||
| (scm_i_string_contains_char (modes, 'w')
|
||||
|| scm_i_string_contains_char (modes, 'a')
|
||||
|| scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
|
||||
| (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
|
||||
| (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
|
||||
}
|
||||
|
||||
long
|
||||
scm_mode_bits (char *modes)
|
||||
{
|
||||
return scm_i_mode_bits_n (modes, strlen (modes));
|
||||
return scm_i_mode_bits (scm_from_locale_string (modes));
|
||||
}
|
||||
|
||||
long
|
||||
|
@ -657,8 +684,7 @@ scm_i_mode_bits (SCM modes)
|
|||
if (!scm_is_string (modes))
|
||||
scm_wrong_type_arg_msg (NULL, 0, modes, "string");
|
||||
|
||||
bits = scm_i_mode_bits_n (scm_i_string_chars (modes),
|
||||
scm_i_string_length (modes));
|
||||
bits = scm_i_mode_bits_n (modes);
|
||||
scm_remember_upto_here_1 (modes);
|
||||
return bits;
|
||||
}
|
||||
|
@ -929,7 +955,7 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
|
|||
"characters are available, the end-of-file object is returned.")
|
||||
#define FUNC_NAME s_scm_read_char
|
||||
{
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
SCM_VALIDATE_OPINPORT (1, port);
|
||||
|
@ -940,6 +966,133 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
#define SCM_MBCHAR_BUF_SIZE (4)
|
||||
|
||||
/* Get one codepoint from a file, using the port's encoding. */
|
||||
scm_t_wchar
|
||||
scm_getc (SCM port)
|
||||
{
|
||||
int c;
|
||||
unsigned int bufcount = 0;
|
||||
char buf[SCM_MBCHAR_BUF_SIZE];
|
||||
scm_t_wchar codepoint = 0;
|
||||
scm_t_uint32 *u32;
|
||||
size_t u32len;
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
||||
c = scm_get_byte_or_eof (port);
|
||||
if (c == EOF)
|
||||
return (scm_t_wchar) EOF;
|
||||
|
||||
buf[0] = c;
|
||||
bufcount++;
|
||||
|
||||
if (pt->encoding == NULL)
|
||||
{
|
||||
/* The encoding is Latin-1: bytes are characters. */
|
||||
codepoint = buf[0];
|
||||
goto success;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
u32 = u32_conv_from_encoding (pt->encoding,
|
||||
(enum iconv_ilseq_handler) pt->ilseq_handler,
|
||||
buf, bufcount, NULL, NULL, &u32len);
|
||||
if (u32 == NULL || u32len == 0)
|
||||
{
|
||||
if (errno == ENOMEM)
|
||||
scm_memory_error ("Input decoding");
|
||||
|
||||
/* Otherwise errno is EILSEQ or EINVAL, so perhaps more
|
||||
bytes are needed. Keep looping. */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Complete codepoint found. */
|
||||
codepoint = u32[0];
|
||||
free (u32);
|
||||
goto success;
|
||||
}
|
||||
|
||||
if (bufcount == SCM_MBCHAR_BUF_SIZE)
|
||||
{
|
||||
/* We've read several bytes and didn't find a good
|
||||
codepoint. Give up. */
|
||||
goto failure;
|
||||
}
|
||||
|
||||
c = scm_get_byte_or_eof (port);
|
||||
|
||||
if (c == EOF)
|
||||
{
|
||||
/* EOF before a complete character was read. Push it all
|
||||
back and return EOF. */
|
||||
while (bufcount > 0)
|
||||
{
|
||||
/* FIXME: this will probably cause errors in the port column. */
|
||||
scm_unget_byte (buf[bufcount-1], port);
|
||||
bufcount --;
|
||||
}
|
||||
return EOF;
|
||||
}
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
/* It is always invalid to have EOL in the middle of a
|
||||
multibyte character. */
|
||||
scm_unget_byte ('\n', port);
|
||||
goto failure;
|
||||
}
|
||||
|
||||
buf[bufcount++] = c;
|
||||
}
|
||||
|
||||
success:
|
||||
switch (codepoint)
|
||||
{
|
||||
case '\a':
|
||||
break;
|
||||
case '\b':
|
||||
SCM_DECCOL (port);
|
||||
break;
|
||||
case '\n':
|
||||
SCM_INCLINE (port);
|
||||
break;
|
||||
case '\r':
|
||||
SCM_ZEROCOL (port);
|
||||
break;
|
||||
case '\t':
|
||||
SCM_TABCOL (port);
|
||||
break;
|
||||
default:
|
||||
SCM_INCCOL (port);
|
||||
break;
|
||||
}
|
||||
|
||||
return codepoint;
|
||||
|
||||
failure:
|
||||
{
|
||||
char *err_buf;
|
||||
SCM err_str = scm_i_make_string (bufcount, &err_buf);
|
||||
memcpy (err_buf, buf, bufcount);
|
||||
|
||||
if (errno == EILSEQ)
|
||||
scm_misc_error (NULL, "input encoding error for ~s: ~s",
|
||||
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
|
||||
err_str));
|
||||
else
|
||||
scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
|
||||
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
|
||||
err_str));
|
||||
}
|
||||
|
||||
/* Never gets here. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* this should only be called when the read buffer is empty. it
|
||||
tries to refill the read buffer. it returns the first char from
|
||||
the port, which is either EOF or *(pt->read_pos). */
|
||||
|
@ -1027,7 +1180,7 @@ scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
|
|||
stringbuf_write_mutex. So, one shouldn't use scm_lfwrite_substr
|
||||
if the stringbuf write mutex may still be held elsewhere. */
|
||||
buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
|
||||
NULL, SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
pt->encoding, pt->ilseq_handler);
|
||||
ptob->write (port, buf, len);
|
||||
free (buf);
|
||||
|
||||
|
@ -1056,7 +1209,7 @@ scm_lfwrite_str (SCM str, SCM port)
|
|||
scm_end_input (port);
|
||||
|
||||
buf = scm_to_stringn (str, &len,
|
||||
NULL, SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
pt->encoding, pt->ilseq_handler);
|
||||
ptob->write (port, buf, len);
|
||||
free (buf);
|
||||
|
||||
|
@ -1257,8 +1410,8 @@ scm_end_input (SCM port)
|
|||
|
||||
|
||||
void
|
||||
scm_ungetc (int c, SCM port)
|
||||
#define FUNC_NAME "scm_ungetc"
|
||||
scm_unget_byte (int c, SCM port)
|
||||
#define FUNC_NAME "scm_unget_byte"
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
||||
|
@ -1317,6 +1470,25 @@ scm_ungetc (int c, SCM port)
|
|||
|
||||
if (pt->rw_random)
|
||||
pt->rw_active = SCM_PORT_READ;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
void
|
||||
scm_ungetc (scm_t_wchar c, SCM port)
|
||||
#define FUNC_NAME "scm_ungetc"
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
scm_t_wchar *wbuf;
|
||||
SCM str = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
int i;
|
||||
|
||||
wbuf[0] = c;
|
||||
buf = scm_to_stringn (str, &len, pt->encoding, pt->ilseq_handler);
|
||||
|
||||
for (i = len - 1; i >= 0; i--)
|
||||
scm_unget_byte (buf[i], port);
|
||||
|
||||
if (c == '\n')
|
||||
{
|
||||
|
@ -1363,7 +1535,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
|
|||
"to @code{read-char} would have hung.")
|
||||
#define FUNC_NAME s_scm_peek_char
|
||||
{
|
||||
int c, column;
|
||||
scm_t_wchar c, column;
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
else
|
||||
|
@ -1409,13 +1581,17 @@ SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
|
|||
"@var{port} is not supplied, the current-input-port is used.")
|
||||
#define FUNC_NAME s_scm_unread_string
|
||||
{
|
||||
int n;
|
||||
SCM_VALIDATE_STRING (1, str);
|
||||
if (SCM_UNBNDP (port))
|
||||
port = scm_current_input_port ();
|
||||
else
|
||||
SCM_VALIDATE_OPINPORT (2, port);
|
||||
|
||||
scm_ungets (scm_i_string_chars (str), scm_i_string_length (str), port);
|
||||
n = scm_i_string_length (str);
|
||||
|
||||
while (n--)
|
||||
scm_ungetc (scm_i_string_ref (str, n), port);
|
||||
|
||||
return str;
|
||||
}
|
||||
|
@ -1670,6 +1846,328 @@ SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
/* The default port encoding for this locale. New ports will have this
|
||||
encoding. If it is a string, that is the encoding. If it #f, it
|
||||
is in the native (Latin-1) encoding. */
|
||||
SCM_GLOBAL_VARIABLE (scm_port_encoding_var, "%port-encoding");
|
||||
static int scm_port_encoding_init = 0;
|
||||
|
||||
/* Return a C string representation of the current encoding. */
|
||||
const char *
|
||||
scm_i_get_port_encoding (SCM port)
|
||||
{
|
||||
SCM encoding;
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
if (!scm_port_encoding_init)
|
||||
return NULL;
|
||||
else if (!scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
|
||||
return NULL;
|
||||
else
|
||||
{
|
||||
encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_port_encoding_var));
|
||||
if (!scm_is_string (encoding))
|
||||
return NULL;
|
||||
else
|
||||
return scm_i_string_chars (encoding);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_port *pt;
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
if (pt->encoding)
|
||||
return pt->encoding;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns ENC is if is a recognized encoding. If it isn't, it tries
|
||||
to find an alias of ENC that is valid. Otherwise, it returns
|
||||
NULL. */
|
||||
static const char *
|
||||
find_valid_encoding (const char *enc)
|
||||
{
|
||||
int isvalid = 0;
|
||||
const char str[] = " ";
|
||||
scm_t_uint32 *u32;
|
||||
size_t u32len;
|
||||
|
||||
u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
|
||||
NULL, NULL, &u32len);
|
||||
isvalid = (u32 != NULL);
|
||||
free (u32);
|
||||
|
||||
if (isvalid)
|
||||
return enc;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void
|
||||
scm_i_set_port_encoding_x (SCM port, const char *enc)
|
||||
{
|
||||
const char *valid_enc;
|
||||
scm_t_port *pt;
|
||||
|
||||
/* Null is shorthand for the native, Latin-1 encoding. */
|
||||
if (enc == NULL)
|
||||
valid_enc = NULL;
|
||||
else
|
||||
{
|
||||
valid_enc = find_valid_encoding (enc);
|
||||
if (valid_enc == NULL)
|
||||
{
|
||||
SCM err;
|
||||
err = scm_from_locale_string (enc);
|
||||
scm_misc_error (NULL, "invalid or unknown character encoding ~s",
|
||||
scm_list_1 (err));
|
||||
}
|
||||
}
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
/* Set the default encoding for future ports. */
|
||||
if (!scm_port_encoding_init
|
||||
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
|
||||
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
|
||||
SCM_EOL);
|
||||
|
||||
if (valid_enc == NULL
|
||||
|| !strcmp (valid_enc, "ASCII")
|
||||
|| !strcmp (valid_enc, "ANSI_X3.4-1968")
|
||||
|| !strcmp (valid_enc, "ISO-8859-1"))
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
|
||||
else
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var),
|
||||
scm_from_locale_string (valid_enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set the character encoding for this port. */
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
if (pt->encoding)
|
||||
free (pt->encoding);
|
||||
if (valid_enc == NULL)
|
||||
pt->encoding = NULL;
|
||||
else
|
||||
pt->encoding = strdup (valid_enc);
|
||||
}
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
|
||||
(SCM port),
|
||||
"Returns, as a string, the character encoding that @var{port}\n"
|
||||
"uses to interpret its input and output.\n")
|
||||
#define FUNC_NAME s_scm_port_encoding
|
||||
{
|
||||
scm_t_port *pt;
|
||||
const char *enc;
|
||||
|
||||
SCM_VALIDATE_PORT (1, port);
|
||||
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
enc = scm_i_get_port_encoding (port);
|
||||
if (enc)
|
||||
return scm_from_locale_string (pt->encoding);
|
||||
else
|
||||
return scm_from_locale_string ("NONE");
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
|
||||
(SCM port, SCM enc),
|
||||
"Sets the character encoding that will be used to interpret all\n"
|
||||
"port I/O. New ports are created with the encoding\n"
|
||||
"appropriate for the current locale if @code{setlocale} has \n"
|
||||
"been called or ISO-8859-1 otherwise\n"
|
||||
"and this procedure can be used to modify that encoding.\n")
|
||||
|
||||
#define FUNC_NAME s_scm_set_port_encoding_x
|
||||
{
|
||||
char *enc_str;
|
||||
const char *valid_enc_str;
|
||||
|
||||
SCM_VALIDATE_PORT (1, port);
|
||||
SCM_VALIDATE_STRING (2, enc);
|
||||
|
||||
enc_str = scm_to_locale_string (enc);
|
||||
valid_enc_str = find_valid_encoding (enc_str);
|
||||
if (valid_enc_str == NULL)
|
||||
{
|
||||
free (enc_str);
|
||||
scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
|
||||
scm_list_1 (enc));
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_i_set_port_encoding_x (port, valid_enc_str);
|
||||
free (enc_str);
|
||||
}
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
/* This determines how conversions handle unconvertible characters. */
|
||||
SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
|
||||
static int scm_conversion_strategy_init = 0;
|
||||
|
||||
scm_t_string_failed_conversion_handler
|
||||
scm_i_get_conversion_strategy (SCM port)
|
||||
{
|
||||
SCM encoding;
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
if (!scm_conversion_strategy_init
|
||||
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
|
||||
return SCM_FAILED_CONVERSION_QUESTION_MARK;
|
||||
else
|
||||
{
|
||||
encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
|
||||
if (scm_is_false (encoding))
|
||||
return SCM_FAILED_CONVERSION_QUESTION_MARK;
|
||||
else
|
||||
return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_port *pt;
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
return pt->ilseq_handler;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
scm_i_set_conversion_strategy_x (SCM port,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
SCM strategy;
|
||||
scm_t_port *pt;
|
||||
|
||||
strategy = scm_from_int ((int) handler);
|
||||
|
||||
if (scm_is_false (port))
|
||||
{
|
||||
/* Set the default encoding for future ports. */
|
||||
if (!scm_conversion_strategy
|
||||
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
|
||||
scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
|
||||
SCM_EOL);
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set the character encoding for this port. */
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
pt->ilseq_handler = handler;
|
||||
}
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
|
||||
1, 0, 0, (SCM port),
|
||||
"Returns the behavior of the port when handling a character that\n"
|
||||
"is not representable in the port's current encoding.\n"
|
||||
"It returns the symbol @code{error} if unrepresentable characters\n"
|
||||
"should cause exceptions, @code{substitute} if the port should\n"
|
||||
"try to replace unrepresentable characters with question marks or\n"
|
||||
"approximate characters, or @code{escape} if unrepresentable\n"
|
||||
"characters should be converted to string escapes.\n"
|
||||
"\n"
|
||||
"If @var{port} is @code{#f}, then the current default behavior\n"
|
||||
"will be returned. New ports will have this default behavior\n"
|
||||
"when they are created.\n")
|
||||
#define FUNC_NAME s_scm_port_conversion_strategy
|
||||
{
|
||||
scm_t_string_failed_conversion_handler h;
|
||||
|
||||
SCM_VALIDATE_OPPORT (1, port);
|
||||
|
||||
if (!scm_is_false (port))
|
||||
{
|
||||
SCM_VALIDATE_OPPORT (1, port);
|
||||
}
|
||||
|
||||
h = scm_i_get_conversion_strategy (port);
|
||||
if (h == SCM_FAILED_CONVERSION_ERROR)
|
||||
return scm_from_locale_symbol ("error");
|
||||
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
|
||||
return scm_from_locale_symbol ("substitute");
|
||||
else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
return scm_from_locale_symbol ("escape");
|
||||
else
|
||||
abort ();
|
||||
|
||||
/* Never gets here. */
|
||||
return SCM_UNDEFINED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
|
||||
2, 0, 0,
|
||||
(SCM port, SCM sym),
|
||||
"Sets the behavior of the interpreter when outputting a character\n"
|
||||
"that is not representable in the port's current encoding.\n"
|
||||
"@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
|
||||
"@code{'escape}. If it is @code{'error}, an error will be thrown\n"
|
||||
"when an unconvertible character is encountered. If it is\n"
|
||||
"@code{'substitute}, then unconvertible characters will \n"
|
||||
"be replaced with approximate characters, or with question marks\n"
|
||||
"if no approximately correct character is available.\n"
|
||||
"If it is @code{'escape},\n"
|
||||
"it will appear as a hex escape when output.\n"
|
||||
"\n"
|
||||
"If @var{port} is an open port, the conversion error behavior\n"
|
||||
"is set for that port. If it is @code{#f}, it is set as the\n"
|
||||
"default behavior for any future ports that get created in\n"
|
||||
"this thread.\n")
|
||||
#define FUNC_NAME s_scm_set_port_conversion_strategy_x
|
||||
{
|
||||
SCM err;
|
||||
SCM qm;
|
||||
SCM esc;
|
||||
|
||||
if (!scm_is_false (port))
|
||||
{
|
||||
SCM_VALIDATE_OPPORT (1, port);
|
||||
}
|
||||
|
||||
err = scm_from_locale_symbol ("error");
|
||||
if (scm_is_true (scm_eqv_p (sym, err)))
|
||||
{
|
||||
scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
qm = scm_from_locale_symbol ("substitute");
|
||||
if (scm_is_true (scm_eqv_p (sym, qm)))
|
||||
{
|
||||
scm_i_set_conversion_strategy_x (port,
|
||||
SCM_FAILED_CONVERSION_QUESTION_MARK);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
esc = scm_from_locale_symbol ("escape");
|
||||
if (scm_is_true (scm_eqv_p (sym, esc)))
|
||||
{
|
||||
scm_i_set_conversion_strategy_x (port,
|
||||
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
||||
SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
|
||||
void
|
||||
scm_print_port_mode (SCM exp, SCM port)
|
||||
{
|
||||
|
@ -1780,8 +2278,17 @@ scm_init_ports ()
|
|||
cur_loadport_fluid = scm_permanent_object (scm_make_fluid ());
|
||||
|
||||
scm_i_port_weak_hash = scm_permanent_object (scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)));
|
||||
|
||||
#include "libguile/ports.x"
|
||||
|
||||
SCM_VARIABLE_SET (scm_port_encoding_var, scm_make_fluid ());
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
|
||||
scm_port_encoding_init = 1;
|
||||
|
||||
SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
|
||||
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
|
||||
scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));
|
||||
scm_conversion_strategy_init = 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -28,7 +28,7 @@
|
|||
#include "libguile/print.h"
|
||||
#include "libguile/struct.h"
|
||||
#include "libguile/threads.h"
|
||||
|
||||
#include "libguile/strings.h"
|
||||
|
||||
|
||||
|
||||
|
@ -56,6 +56,10 @@ typedef struct
|
|||
long line_number; /* debugging support. */
|
||||
int column_number; /* debugging support. */
|
||||
|
||||
/* Character encoding support */
|
||||
char *encoding;
|
||||
scm_t_string_failed_conversion_handler ilseq_handler;
|
||||
|
||||
/* port buffers. the buffer(s) are set up for all ports.
|
||||
in the case of string ports, the buffer is the string itself.
|
||||
in the case of unbuffered file ports, the buffer is a
|
||||
|
@ -266,6 +270,7 @@ SCM_API SCM scm_eof_object_p (SCM x);
|
|||
SCM_API SCM scm_force_output (SCM port);
|
||||
SCM_API SCM scm_flush_all_ports (void);
|
||||
SCM_API SCM scm_read_char (SCM port);
|
||||
SCM_API scm_t_wchar scm_getc (SCM port);
|
||||
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
|
||||
SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
|
||||
SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
|
||||
|
@ -275,7 +280,8 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
|
|||
SCM_API void scm_flush (SCM port);
|
||||
SCM_API void scm_end_input (SCM port);
|
||||
SCM_API int scm_fill_input (SCM port);
|
||||
SCM_API void scm_ungetc (int c, SCM port);
|
||||
SCM_INTERNAL void scm_unget_byte (int c, SCM port);
|
||||
SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
|
||||
SCM_API void scm_ungets (const char *s, int n, SCM port);
|
||||
SCM_API SCM scm_peek_char (SCM port);
|
||||
SCM_API SCM scm_unread_char (SCM cobj, SCM port);
|
||||
|
@ -288,6 +294,15 @@ SCM_API SCM scm_port_column (SCM port);
|
|||
SCM_API SCM scm_set_port_column_x (SCM port, SCM line);
|
||||
SCM_API SCM scm_port_filename (SCM port);
|
||||
SCM_API SCM scm_set_port_filename_x (SCM port, SCM filename);
|
||||
SCM_INTERNAL const char *scm_i_get_port_encoding (SCM port);
|
||||
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
|
||||
SCM_API SCM scm_port_encoding (SCM port);
|
||||
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
|
||||
SCM_INTERNAL scm_t_string_failed_conversion_handler scm_i_get_conversion_strategy (SCM port);
|
||||
SCM_INTERNAL void scm_i_set_conversion_strategy_x (SCM port,
|
||||
scm_t_string_failed_conversion_handler h);
|
||||
SCM_API SCM scm_port_conversion_strategy (SCM port);
|
||||
SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
|
||||
SCM_API int scm_port_print (SCM exp, SCM port, scm_print_state *);
|
||||
SCM_API void scm_print_port_mode (SCM exp, SCM port);
|
||||
SCM_API void scm_ports_prehistory (void);
|
||||
|
@ -295,7 +310,6 @@ SCM_API SCM scm_void_port (char * mode_str);
|
|||
SCM_API SCM scm_sys_make_void_port (SCM mode);
|
||||
SCM_INTERNAL void scm_init_ports (void);
|
||||
|
||||
|
||||
#if SCM_ENABLE_DEPRECATED==1
|
||||
SCM_API scm_t_port * scm_add_to_port_table (SCM port);
|
||||
#endif
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <uniconv.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/dynwind.h"
|
||||
|
@ -1528,12 +1529,17 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
|
|||
"Otherwise the specified locale category is set to the string\n"
|
||||
"@var{locale} and the new value is returned as a\n"
|
||||
"system-dependent string. If @var{locale} is an empty string,\n"
|
||||
"the locale will be set using environment variables.")
|
||||
"the locale will be set using environment variables.\n"
|
||||
"\n"
|
||||
"When the locale is changed, the character encoding of the new\n"
|
||||
"locale (UTF-8, ISO-8859-1, etc.) is used for the current\n"
|
||||
"input, output, and error ports\n")
|
||||
#define FUNC_NAME s_scm_setlocale
|
||||
{
|
||||
int c_category;
|
||||
char *clocale;
|
||||
char *rv;
|
||||
const char *enc;
|
||||
|
||||
scm_dynwind_begin (0);
|
||||
|
||||
|
@ -1562,15 +1568,47 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
|
|||
SCM_SYSERROR;
|
||||
}
|
||||
|
||||
/* Recompute the standard SRFI-14 character sets in a locale-dependent
|
||||
(actually charset-dependent) way. */
|
||||
scm_srfi_14_compute_char_sets ();
|
||||
enc = locale_charset ();
|
||||
/* Set the default encoding for new ports. */
|
||||
scm_i_set_port_encoding_x (SCM_BOOL_F, enc);
|
||||
/* Set the encoding for the stdio ports. */
|
||||
scm_i_set_port_encoding_x (scm_current_input_port (), enc);
|
||||
scm_i_set_port_encoding_x (scm_current_output_port (), enc);
|
||||
scm_i_set_port_encoding_x (scm_current_error_port (), enc);
|
||||
|
||||
scm_dynwind_end ();
|
||||
return scm_from_locale_string (rv);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
#endif /* HAVE_SETLOCALE */
|
||||
SCM_DEFINE (scm_setbinary, "setbinary", 0, 0, 0,
|
||||
(void),
|
||||
"Sets the encoding for the current input, output, and error\n"
|
||||
"ports to ISO-8859-1. That character encoding allows\n"
|
||||
"ports to operate on binary data.\n"
|
||||
"\n"
|
||||
"It also sets the default encoding for newly created ports\n"
|
||||
"to ISO-8859-1.\n"
|
||||
"\n"
|
||||
"The previous default encoding for new ports is returned\n")
|
||||
#define FUNC_NAME s_scm_setbinary
|
||||
{
|
||||
const char *enc = scm_i_get_port_encoding (SCM_BOOL_F);
|
||||
|
||||
/* Set the default encoding for new ports. */
|
||||
scm_i_set_port_encoding_x (SCM_BOOL_F, NULL);
|
||||
/* Set the encoding for the stdio ports. */
|
||||
scm_i_set_port_encoding_x (scm_current_input_port (), NULL);
|
||||
scm_i_set_port_encoding_x (scm_current_output_port (), NULL);
|
||||
scm_i_set_port_encoding_x (scm_current_error_port (), NULL);
|
||||
|
||||
if (enc)
|
||||
return scm_from_locale_string (enc);
|
||||
|
||||
return scm_from_locale_string ("ISO-8859-1");
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
#ifdef HAVE_MKNOD
|
||||
SCM_DEFINE (scm_mknod, "mknod", 4, 0, 0,
|
||||
|
|
|
@ -74,6 +74,7 @@ SCM_API SCM scm_access (SCM path, SCM how);
|
|||
SCM_API SCM scm_getpid (void);
|
||||
SCM_API SCM scm_putenv (SCM str);
|
||||
SCM_API SCM scm_setlocale (SCM category, SCM locale);
|
||||
SCM_API SCM scm_setbinary (void);
|
||||
SCM_API SCM scm_mknod (SCM path, SCM type, SCM perms, SCM dev);
|
||||
SCM_API SCM scm_nice (SCM incr);
|
||||
SCM_API SCM scm_sync (void);
|
||||
|
|
103
libguile/print.c
103
libguile/print.c
|
@ -462,20 +462,45 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
| UC_CATEGORY_MASK_S))
|
||||
/* Print the character if is graphic character. */
|
||||
{
|
||||
if (i<256)
|
||||
/* Character is graphic. Print it. */
|
||||
scm_putc (i, port);
|
||||
scm_t_wchar *wbuf;
|
||||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
const char *enc;
|
||||
|
||||
enc = scm_i_get_port_encoding (port);
|
||||
wbuf[0] = i;
|
||||
if (enc == NULL && i <= 0xFF)
|
||||
{
|
||||
/* Character is graphic and Latin-1. Print it */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
}
|
||||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
{
|
||||
buf = u32_conv_to_encoding (enc,
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1,
|
||||
NULL,
|
||||
NULL, &len);
|
||||
if (buf != NULL)
|
||||
{
|
||||
/* Character is graphic. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
}
|
||||
else
|
||||
/* Character is graphic but unrepresentable in
|
||||
this port's encoding. */
|
||||
scm_intprint (i, 8, port);
|
||||
}
|
||||
}
|
||||
else
|
||||
/* Character is a non-graphical character. */
|
||||
scm_intprint (i, 8, port);
|
||||
}
|
||||
else
|
||||
scm_putc (i, port);
|
||||
scm_i_charprint (i, port);
|
||||
}
|
||||
else if (SCM_IFLAGP (exp)
|
||||
&& ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
|
||||
|
@ -607,21 +632,32 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
|
|||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
char *buf;
|
||||
size_t len;
|
||||
|
||||
wbuf[0] = ch;
|
||||
|
||||
buf = u32_conv_to_encoding ("ISO-8859-1",
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1, NULL, NULL, &len);
|
||||
if (buf != NULL)
|
||||
|
||||
if (scm_i_get_port_encoding (port))
|
||||
{
|
||||
/* Character is graphic and representable in
|
||||
this encoding. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
printed = 1;
|
||||
wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
wbuf[0] = ch;
|
||||
buf = u32_conv_to_encoding (scm_i_get_port_encoding (port),
|
||||
iconveh_error,
|
||||
(scm_t_uint32 *) wbuf,
|
||||
1 ,
|
||||
NULL,
|
||||
NULL, &len);
|
||||
if (buf != NULL)
|
||||
{
|
||||
/* Character is graphic and representable in
|
||||
this encoding. Print it. */
|
||||
scm_lfwrite_str (wstr, port);
|
||||
free (buf);
|
||||
printed = 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (ch <= 0xFF)
|
||||
{
|
||||
scm_putc (ch, port);
|
||||
printed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!printed)
|
||||
|
@ -834,7 +870,7 @@ scm_prin1 (SCM exp, SCM port, int writingp)
|
|||
/* Print a character.
|
||||
*/
|
||||
void
|
||||
scm_i_charprint (scm_t_uint32 ch, SCM port)
|
||||
scm_i_charprint (scm_t_wchar ch, SCM port)
|
||||
{
|
||||
scm_t_wchar *wbuf;
|
||||
SCM wstr = scm_i_make_wide_string (1, &wbuf);
|
||||
|
@ -1056,9 +1092,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
|
|||
SCM port, answer = SCM_UNSPECIFIED;
|
||||
int fReturnString = 0;
|
||||
int writingp;
|
||||
const char *start;
|
||||
const char *end;
|
||||
const char *p;
|
||||
size_t start, p, end;
|
||||
|
||||
if (scm_is_eq (destination, SCM_BOOL_T))
|
||||
{
|
||||
|
@ -1081,15 +1115,16 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
|
|||
SCM_VALIDATE_STRING (2, message);
|
||||
SCM_VALIDATE_REST_ARGUMENT (args);
|
||||
|
||||
start = scm_i_string_chars (message);
|
||||
end = start + scm_i_string_length (message);
|
||||
p = 0;
|
||||
start = 0;
|
||||
end = scm_i_string_length (message);
|
||||
for (p = start; p != end; ++p)
|
||||
if (*p == '~')
|
||||
if (scm_i_string_ref (message, p) == '~')
|
||||
{
|
||||
if (++p == end)
|
||||
break;
|
||||
|
||||
switch (*p)
|
||||
switch (scm_i_string_ref (message, p))
|
||||
{
|
||||
case 'A': case 'a':
|
||||
writingp = 0;
|
||||
|
@ -1098,33 +1133,33 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
|
|||
writingp = 1;
|
||||
break;
|
||||
case '~':
|
||||
scm_lfwrite (start, p - start, port);
|
||||
scm_lfwrite_substr (message, start, p, port);
|
||||
start = p + 1;
|
||||
continue;
|
||||
case '%':
|
||||
scm_lfwrite (start, p - start - 1, port);
|
||||
scm_lfwrite_substr (message, start, p - 1, port);
|
||||
scm_newline (port);
|
||||
start = p + 1;
|
||||
continue;
|
||||
default:
|
||||
SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use (ice-9 format) instead",
|
||||
scm_list_1 (SCM_MAKE_CHAR (*p)));
|
||||
scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p))));
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!scm_is_pair (args))
|
||||
SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A",
|
||||
scm_list_1 (SCM_MAKE_CHAR (*p)));
|
||||
scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p))));
|
||||
|
||||
scm_lfwrite (start, p - start - 1, port);
|
||||
scm_lfwrite_substr (message, start, p - 1, port);
|
||||
/* we pass destination here */
|
||||
scm_prin1 (SCM_CAR (args), destination, writingp);
|
||||
args = SCM_CDR (args);
|
||||
start = p + 1;
|
||||
}
|
||||
|
||||
scm_lfwrite (start, p - start, port);
|
||||
scm_lfwrite_substr (message, start, p, port);
|
||||
if (!scm_is_eq (args, SCM_EOL))
|
||||
SCM_MISC_ERROR ("FORMAT: ~A superfluous arguments",
|
||||
scm_list_1 (scm_length (args)));
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
#include "libguile/__scm.h"
|
||||
|
||||
#include "libguile/chars.h"
|
||||
#include "libguile/options.h"
|
||||
|
||||
|
||||
|
@ -77,7 +78,7 @@ SCM_API SCM scm_print_options (SCM setting);
|
|||
SCM_API SCM scm_make_print_state (void);
|
||||
SCM_API void scm_free_print_state (SCM print_state);
|
||||
SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state);
|
||||
SCM_INTERNAL void scm_i_charprint (scm_t_uint32 c, SCM port);
|
||||
SCM_INTERNAL void scm_i_charprint (scm_t_wchar c, SCM port);
|
||||
SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
|
||||
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
|
||||
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);
|
||||
|
|
|
@ -59,12 +59,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
|
|||
size_t j;
|
||||
size_t cstart;
|
||||
size_t cend;
|
||||
int c;
|
||||
const char *cdelims;
|
||||
scm_t_wchar c;
|
||||
size_t num_delims;
|
||||
|
||||
SCM_VALIDATE_STRING (1, delims);
|
||||
cdelims = scm_i_string_chars (delims);
|
||||
num_delims = scm_i_string_length (delims);
|
||||
|
||||
SCM_VALIDATE_STRING (2, str);
|
||||
|
@ -83,7 +81,7 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
|
|||
c = scm_getc (port);
|
||||
for (k = 0; k < num_delims; k++)
|
||||
{
|
||||
if (cdelims[k] == c)
|
||||
if (scm_i_string_ref (delims, k) == c)
|
||||
{
|
||||
if (scm_is_false (gobble))
|
||||
scm_ungetc (c, port);
|
||||
|
|
385
libguile/read.c
385
libguile/read.c
|
@ -27,6 +27,8 @@
|
|||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <unicase.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/bytevectors.h"
|
||||
|
@ -178,11 +180,6 @@ static SCM *scm_read_hash_procedures;
|
|||
(((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
|
||||
|| ((_chr) == 'd') || ((_chr) == 'l'))
|
||||
|
||||
/* An inlinable version of `scm_c_downcase ()'. */
|
||||
#define CHAR_DOWNCASE(_chr) \
|
||||
(((_chr) <= UCHAR_MAX) ? tolower ((int) (_chr)) : (_chr))
|
||||
|
||||
|
||||
/* Read an SCSH block comment. */
|
||||
static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
|
||||
static SCM scm_read_commented_expression (int chr, SCM port);
|
||||
|
@ -190,41 +187,69 @@ static SCM scm_read_commented_expression (int chr, SCM port);
|
|||
/* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
|
||||
zero if the whole token fits in BUF, non-zero otherwise. */
|
||||
static inline int
|
||||
read_token (SCM port, char *buf, size_t buf_size, size_t *read)
|
||||
read_token (SCM port, SCM buf, size_t *read)
|
||||
{
|
||||
scm_t_wchar chr;
|
||||
*read = 0;
|
||||
|
||||
while (*read < buf_size)
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
while (*read < scm_i_string_length (buf))
|
||||
{
|
||||
int chr;
|
||||
|
||||
chr = scm_getc (port);
|
||||
chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
|
||||
|
||||
if (chr == EOF)
|
||||
return 0;
|
||||
else if (CHAR_IS_DELIMITER (chr))
|
||||
{
|
||||
scm_i_string_stop_writing ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);
|
||||
|
||||
if (CHAR_IS_DELIMITER (chr))
|
||||
{
|
||||
scm_i_string_stop_writing ();
|
||||
scm_ungetc (chr, port);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf = (char) chr;
|
||||
buf++, (*read)++;
|
||||
}
|
||||
|
||||
scm_i_string_set_x (buf, *read, chr);
|
||||
(*read)++;
|
||||
}
|
||||
scm_i_string_stop_writing ();
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static SCM
|
||||
read_complete_token (SCM port, size_t *read)
|
||||
{
|
||||
SCM buffer, str = SCM_EOL;
|
||||
size_t len;
|
||||
int overflow;
|
||||
|
||||
buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
|
||||
overflow = read_token (port, buffer, read);
|
||||
if (!overflow)
|
||||
return scm_i_substring (buffer, 0, *read);
|
||||
|
||||
str = scm_string_copy (buffer);
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, &len);
|
||||
str = scm_string_append (scm_list_2 (str, buffer));
|
||||
*read += len;
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
return scm_i_substring (str, 0, *read);
|
||||
}
|
||||
|
||||
/* Skip whitespace from PORT and return the first non-whitespace character
|
||||
read. Raise an error on end-of-file. */
|
||||
static int
|
||||
flush_ws (SCM port, const char *eoferr)
|
||||
{
|
||||
register int c;
|
||||
register scm_t_wchar c;
|
||||
while (1)
|
||||
switch (c = scm_getc (port))
|
||||
{
|
||||
|
@ -293,7 +318,7 @@ static SCM recsexpr (SCM obj, long line, int column, SCM filename);
|
|||
|
||||
|
||||
static SCM
|
||||
scm_read_sexp (int chr, SCM port)
|
||||
scm_read_sexp (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_i_lreadparen"
|
||||
{
|
||||
register int c;
|
||||
|
@ -554,107 +579,52 @@ scm_read_string (int chr, SCM port)
|
|||
|
||||
|
||||
static SCM
|
||||
scm_read_number (int chr, SCM port)
|
||||
scm_read_number (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
SCM result, str = SCM_EOL;
|
||||
char buffer[READER_BUFFER_SIZE];
|
||||
SCM result;
|
||||
SCM buffer;
|
||||
size_t read;
|
||||
int overflow = 0;
|
||||
|
||||
scm_ungetc (chr, port);
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, sizeof (buffer), &read);
|
||||
|
||||
if ((overflow) || (scm_is_pair (str)))
|
||||
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
if (scm_is_pair (str))
|
||||
{
|
||||
/* The slow path. */
|
||||
|
||||
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
|
||||
result = scm_string_to_number (str, SCM_UNDEFINED);
|
||||
if (!scm_is_true (result))
|
||||
/* Return a symbol instead of a number. */
|
||||
result = scm_string_to_symbol (str);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = scm_c_locale_stringn_to_number (buffer, read, 10);
|
||||
if (!scm_is_true (result))
|
||||
/* Return a symbol instead of a number. */
|
||||
result = scm_from_locale_symboln (buffer, read);
|
||||
}
|
||||
buffer = read_complete_token (port, &read);
|
||||
result = scm_string_to_number (buffer, SCM_UNDEFINED);
|
||||
if (!scm_is_true (result))
|
||||
/* Return a symbol instead of a number. */
|
||||
result = scm_string_to_symbol (buffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_mixed_case_symbol (int chr, SCM port)
|
||||
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
SCM result, str = SCM_EOL;
|
||||
int overflow = 0, ends_with_colon = 0;
|
||||
char buffer[READER_BUFFER_SIZE];
|
||||
SCM result;
|
||||
int ends_with_colon = 0;
|
||||
SCM buffer;
|
||||
size_t read = 0;
|
||||
int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
|
||||
|
||||
scm_ungetc (chr, port);
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, sizeof (buffer), &read);
|
||||
buffer = read_complete_token (port, &read);
|
||||
if (read > 0)
|
||||
ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';
|
||||
|
||||
if (read > 0)
|
||||
ends_with_colon = (buffer[read - 1] == ':');
|
||||
|
||||
if ((overflow) || (scm_is_pair (str)))
|
||||
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
if (scm_is_pair (str))
|
||||
{
|
||||
size_t len;
|
||||
|
||||
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
|
||||
len = scm_c_string_length (str);
|
||||
|
||||
/* Per SRFI-88, `:' alone is an identifier, not a keyword. */
|
||||
if (postfix && ends_with_colon && (len > 1))
|
||||
{
|
||||
/* Strip off colon. */
|
||||
str = scm_c_substring (str, 0, len-1);
|
||||
result = scm_string_to_symbol (str);
|
||||
result = scm_symbol_to_keyword (result);
|
||||
}
|
||||
else
|
||||
result = scm_string_to_symbol (str);
|
||||
}
|
||||
if (postfix && ends_with_colon && (read > 1))
|
||||
result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
|
||||
else
|
||||
{
|
||||
/* For symbols smaller than `sizeof (buffer)', we don't need to recur
|
||||
to Scheme strings. Therefore, we only create one Scheme object (a
|
||||
symbol) per symbol read. */
|
||||
if (postfix && ends_with_colon && (read > 1))
|
||||
result = scm_from_locale_keywordn (buffer, read - 1);
|
||||
else
|
||||
result = scm_from_locale_symboln (buffer, read);
|
||||
}
|
||||
result = scm_string_to_symbol (buffer);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_read_number_and_radix (int chr, SCM port)
|
||||
scm_read_number_and_radix (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM result, str = SCM_EOL;
|
||||
SCM result;
|
||||
size_t read;
|
||||
char buffer[READER_BUFFER_SIZE];
|
||||
SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
|
||||
unsigned int radix;
|
||||
int overflow = 0;
|
||||
|
||||
switch (chr)
|
||||
{
|
||||
|
@ -684,22 +654,8 @@ scm_read_number_and_radix (int chr, SCM port)
|
|||
radix = 10;
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
overflow = read_token (port, buffer, sizeof (buffer), &read);
|
||||
|
||||
if ((overflow) || (scm_is_pair (str)))
|
||||
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
|
||||
}
|
||||
while (overflow);
|
||||
|
||||
if (scm_is_pair (str))
|
||||
{
|
||||
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
|
||||
result = scm_string_to_number (str, scm_from_uint (radix));
|
||||
}
|
||||
else
|
||||
result = scm_c_locale_stringn_to_number (buffer, read, radix);
|
||||
buffer = read_complete_token (port, &read);
|
||||
result = scm_string_to_number (buffer, scm_from_uint (radix));
|
||||
|
||||
if (scm_is_true (result))
|
||||
return result;
|
||||
|
@ -729,7 +685,7 @@ scm_read_quote (int chr, SCM port)
|
|||
|
||||
case ',':
|
||||
{
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
|
||||
c = scm_getc (port);
|
||||
if ('@' == c)
|
||||
|
@ -828,7 +784,10 @@ scm_read_semicolon_comment (int chr, SCM port)
|
|||
{
|
||||
int c;
|
||||
|
||||
for (c = scm_getc (port);
|
||||
/* We use the get_byte here because there is no need to get the
|
||||
locale correct with comment input. This presumes that newline
|
||||
always represents itself no matter what the encoding is. */
|
||||
for (c = scm_get_byte_or_eof (port);
|
||||
(c != EOF) && (c != '\n');
|
||||
c = scm_getc (port));
|
||||
|
||||
|
@ -856,14 +815,19 @@ scm_read_boolean (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_character (int chr, SCM port)
|
||||
scm_read_character (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM ch;
|
||||
char charname[READER_CHAR_NAME_MAX_SIZE];
|
||||
SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
|
||||
size_t charname_len;
|
||||
scm_t_wchar cp;
|
||||
int overflow;
|
||||
|
||||
if (read_token (port, charname, sizeof (charname), &charname_len))
|
||||
overflow = read_token (port, charname, &charname_len);
|
||||
charname = scm_c_substring (charname, 0, charname_len);
|
||||
|
||||
if (overflow)
|
||||
goto char_error;
|
||||
|
||||
if (charname_len == 0)
|
||||
|
@ -878,28 +842,33 @@ scm_read_character (int chr, SCM port)
|
|||
}
|
||||
|
||||
if (charname_len == 1)
|
||||
return SCM_MAKE_CHAR (charname[0]);
|
||||
return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
|
||||
|
||||
if (*charname >= '0' && *charname < '8')
|
||||
cp = scm_i_string_ref (charname, 0);
|
||||
if (cp >= '0' && cp < '8')
|
||||
{
|
||||
/* Dirk:FIXME:: This type of character syntax is not R5RS
|
||||
* compliant. Further, it should be verified that the constant
|
||||
* does only consist of octal digits. Finally, it should be
|
||||
* checked whether the resulting fixnum is in the range of
|
||||
* characters. */
|
||||
SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
|
||||
SCM p = scm_string_to_number (charname, scm_from_uint (8));
|
||||
if (SCM_I_INUMP (p))
|
||||
return SCM_MAKE_CHAR (SCM_I_INUM (p));
|
||||
}
|
||||
|
||||
ch = scm_i_charname_to_char (charname, charname_len);
|
||||
/* The names of characters should never have non-Latin1
|
||||
characters. */
|
||||
if (scm_i_is_narrow_string (charname)
|
||||
|| scm_i_try_narrow_string (charname))
|
||||
ch = scm_i_charname_to_char (scm_i_string_chars (charname),
|
||||
charname_len);
|
||||
if (scm_is_true (ch))
|
||||
return ch;
|
||||
|
||||
char_error:
|
||||
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
|
||||
scm_list_1 (scm_from_locale_stringn (charname,
|
||||
charname_len)));
|
||||
scm_list_1 (charname));
|
||||
|
||||
return SCM_UNSPECIFIED;
|
||||
}
|
||||
|
@ -941,7 +910,7 @@ scm_read_srfi4_vector (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_bytevector (int chr, SCM port)
|
||||
scm_read_bytevector (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
chr = scm_getc (port);
|
||||
if (chr != 'u')
|
||||
|
@ -965,7 +934,7 @@ scm_read_bytevector (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_guile_bit_vector (int chr, SCM port)
|
||||
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
|
||||
terribly inefficient but who cares? */
|
||||
|
@ -985,13 +954,17 @@ scm_read_guile_bit_vector (int chr, SCM port)
|
|||
}
|
||||
|
||||
static inline SCM
|
||||
scm_read_scsh_block_comment (int chr, SCM port)
|
||||
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
int bang_seen = 0;
|
||||
|
||||
/* We can use the get_byte here because there is no need to get the
|
||||
locale correct when reading comments. This presumes that
|
||||
hash and exclamation points always represent themselves no
|
||||
matter what the source encoding is.*/
|
||||
for (;;)
|
||||
{
|
||||
int c = scm_getc (port);
|
||||
int c = scm_get_byte_or_eof (port);
|
||||
|
||||
if (c == EOF)
|
||||
scm_i_input_error ("skip_block_comment", port,
|
||||
|
@ -1009,9 +982,9 @@ scm_read_scsh_block_comment (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_commented_expression (int chr, SCM port)
|
||||
scm_read_commented_expression (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
int c;
|
||||
scm_t_wchar c;
|
||||
|
||||
c = flush_ws (port, (char *) NULL);
|
||||
if (EOF == c)
|
||||
|
@ -1023,19 +996,18 @@ scm_read_commented_expression (int chr, SCM port)
|
|||
}
|
||||
|
||||
static SCM
|
||||
scm_read_extended_symbol (int chr, SCM port)
|
||||
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
|
||||
{
|
||||
/* Guile's extended symbol read syntax looks like this:
|
||||
|
||||
#{This is all a symbol name}#
|
||||
|
||||
So here, CHR is expected to be `{'. */
|
||||
SCM result;
|
||||
int saw_brace = 0, finished = 0;
|
||||
size_t len = 0;
|
||||
char buf[1024];
|
||||
SCM buf = scm_i_make_string (1024, NULL);
|
||||
|
||||
result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
|
||||
while ((chr = scm_getc (port)) != EOF)
|
||||
{
|
||||
|
@ -1049,32 +1021,30 @@ scm_read_extended_symbol (int chr, SCM port)
|
|||
else
|
||||
{
|
||||
saw_brace = 0;
|
||||
buf[len++] = '}';
|
||||
buf[len++] = chr;
|
||||
scm_i_string_set_x (buf, len++, '}');
|
||||
scm_i_string_set_x (buf, len++, chr);
|
||||
}
|
||||
}
|
||||
else if (chr == '}')
|
||||
saw_brace = 1;
|
||||
else
|
||||
buf[len++] = chr;
|
||||
scm_i_string_set_x (buf, len++, chr);
|
||||
|
||||
if (len >= sizeof (buf) - 2)
|
||||
if (len >= scm_i_string_length (buf) - 2)
|
||||
{
|
||||
scm_string_append (scm_list_2 (result,
|
||||
scm_from_locale_stringn (buf, len)));
|
||||
scm_i_string_stop_writing ();
|
||||
SCM addy = scm_i_make_string (1024, NULL);
|
||||
buf = scm_string_append (scm_list_2 (buf, addy));
|
||||
len = 0;
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
}
|
||||
|
||||
if (finished)
|
||||
break;
|
||||
}
|
||||
scm_i_string_stop_writing ();
|
||||
|
||||
if (len)
|
||||
result = scm_string_append (scm_list_2
|
||||
(result,
|
||||
scm_from_locale_stringn (buf, len)));
|
||||
|
||||
return (scm_string_to_symbol (result));
|
||||
return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
|
||||
}
|
||||
|
||||
|
||||
|
@ -1110,7 +1080,7 @@ scm_read_sharp_extension (int chr, SCM port)
|
|||
/* The reader for the sharp `#' character. It basically dispatches reads
|
||||
among the above token readers. */
|
||||
static SCM
|
||||
scm_read_sharp (int chr, SCM port)
|
||||
scm_read_sharp (scm_t_wchar chr, SCM port)
|
||||
#define FUNC_NAME "scm_lreadr"
|
||||
{
|
||||
SCM result;
|
||||
|
@ -1162,7 +1132,7 @@ scm_read_sharp (int chr, SCM port)
|
|||
{
|
||||
/* When next char is '(', it really is an old-style
|
||||
uniform array. */
|
||||
int next_c = scm_getc (port);
|
||||
scm_t_wchar next_c = scm_getc (port);
|
||||
if (next_c != EOF)
|
||||
scm_ungetc (next_c, port);
|
||||
if (next_c == '(')
|
||||
|
@ -1210,7 +1180,7 @@ scm_read_expression (SCM port)
|
|||
{
|
||||
while (1)
|
||||
{
|
||||
register int chr;
|
||||
register scm_t_wchar chr;
|
||||
|
||||
chr = scm_getc (port);
|
||||
|
||||
|
@ -1421,6 +1391,127 @@ scm_get_hash_procedure (int c)
|
|||
}
|
||||
}
|
||||
|
||||
#define SCM_ENCODING_SEARCH_SIZE (500)
|
||||
|
||||
/* Search the first few hundred characters of a file for
|
||||
an emacs-like coding declaration. */
|
||||
char *
|
||||
scm_scan_for_encoding (SCM port)
|
||||
{
|
||||
char header[SCM_ENCODING_SEARCH_SIZE+1];
|
||||
size_t bytes_read;
|
||||
char *encoding = NULL;
|
||||
int utf8_bom = 0;
|
||||
char *pos;
|
||||
int i;
|
||||
int in_comment;
|
||||
|
||||
bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
|
||||
scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
|
||||
|
||||
if (bytes_read > 3
|
||||
&& header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
|
||||
utf8_bom = 1;
|
||||
|
||||
/* search past "coding[:=]" */
|
||||
pos = header;
|
||||
while (1)
|
||||
{
|
||||
if ((pos = strstr(pos, "coding")) == NULL)
|
||||
return NULL;
|
||||
|
||||
pos += strlen("coding");
|
||||
if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
|
||||
(*pos == ':' || *pos == '='))
|
||||
{
|
||||
pos ++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* skip spaces */
|
||||
while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
|
||||
(*pos == ' ' || *pos == '\t'))
|
||||
pos ++;
|
||||
|
||||
/* grab the next token */
|
||||
i = 0;
|
||||
while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
|
||||
&& (isalnum(pos[i]) || pos[i] == '_' || pos[i] == '-' || pos[i] == '.'))
|
||||
i++;
|
||||
|
||||
if (i == 0)
|
||||
return NULL;
|
||||
|
||||
encoding = scm_malloc (i+1);
|
||||
memcpy (encoding, pos, i);
|
||||
encoding[i] ='\0';
|
||||
for (i = 0; i < strlen(encoding); i++)
|
||||
encoding[i] = toupper(encoding[i]);
|
||||
|
||||
/* push backwards to make sure we were in a comment */
|
||||
in_comment = 0;
|
||||
while (pos - i - header > 0)
|
||||
{
|
||||
if (*(pos - i) == '\n')
|
||||
{
|
||||
/* This wasn't in a semicolon comment. Check for a
|
||||
hash-bang comment. */
|
||||
char *beg = strstr (header, "#!");
|
||||
char *end = strstr (header, "!#");
|
||||
if (beg < pos && pos < end)
|
||||
in_comment = 1;
|
||||
break;
|
||||
}
|
||||
if (*(pos - i) == ';')
|
||||
{
|
||||
in_comment = 1;
|
||||
break;
|
||||
}
|
||||
i ++;
|
||||
}
|
||||
if (!in_comment)
|
||||
{
|
||||
/* This wasn't in a comment */
|
||||
free (encoding);
|
||||
return NULL;
|
||||
}
|
||||
if (utf8_bom && strcmp(encoding, "UTF-8"))
|
||||
scm_misc_error (NULL,
|
||||
"the port input declares the encoding ~s but is encoded as UTF-8",
|
||||
scm_list_1 (scm_from_locale_string (encoding)));
|
||||
|
||||
return encoding;
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
|
||||
(SCM port),
|
||||
"Scans the port for an EMACS-like character coding declaration\n"
|
||||
"near the top of the contents of a port with random-acessible contents.\n"
|
||||
"The coding declaration is of the form\n"
|
||||
"@code{coding: XXXXX} and must appear in a scheme comment.\n"
|
||||
"\n"
|
||||
"Returns a string containing the character encoding of the file\n"
|
||||
"if a declaration was found, or @code{#f} otherwise.\n")
|
||||
#define FUNC_NAME s_scm_file_encoding
|
||||
{
|
||||
char *enc;
|
||||
SCM s_enc;
|
||||
|
||||
enc = scm_scan_for_encoding (port);
|
||||
if (enc == NULL)
|
||||
return SCM_BOOL_F;
|
||||
else
|
||||
{
|
||||
s_enc = scm_from_locale_string (enc);
|
||||
free (enc);
|
||||
return s_enc;
|
||||
}
|
||||
|
||||
return SCM_BOOL_F;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
void
|
||||
scm_init_read ()
|
||||
{
|
||||
|
|
|
@ -56,6 +56,8 @@ SCM_API SCM scm_read_options (SCM setting);
|
|||
SCM_API SCM scm_read (SCM port);
|
||||
SCM_API size_t scm_read_token (int ic, SCM * tok_buf, SCM port, int weird);
|
||||
SCM_API SCM scm_read_hash_extend (SCM chr, SCM proc);
|
||||
SCM_INTERNAL char *scm_scan_for_encoding (SCM port);
|
||||
SCM_API SCM scm_file_encoding (SCM port);
|
||||
|
||||
SCM_INTERNAL void scm_i_input_error (const char *func, SCM port,
|
||||
const char *message, SCM arg)
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "libguile/strings.h"
|
||||
#include "libguile/vectors.h"
|
||||
#include "libguile/dynwind.h"
|
||||
#include "libguile/srfi-13.h"
|
||||
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/socket.h"
|
||||
|
@ -1414,6 +1415,8 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
"protocols, if a packet larger than this limit is encountered\n"
|
||||
"then some data\n"
|
||||
"will be irrevocably lost.\n\n"
|
||||
"The data is assumed to be binary, and there is no decoding of\n"
|
||||
"of locale-encoded strings.\n\n"
|
||||
"The optional @var{flags} argument is a value or\n"
|
||||
"bitwise OR of MSG_OOB, MSG_PEEK, MSG_DONTROUTE etc.\n\n"
|
||||
"The value returned is the number of bytes read from the\n"
|
||||
|
@ -1428,6 +1431,7 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
int flg;
|
||||
char *dest;
|
||||
size_t len;
|
||||
SCM msg;
|
||||
|
||||
SCM_VALIDATE_OPFPORT (1, sock);
|
||||
SCM_VALIDATE_STRING (2, buf);
|
||||
|
@ -1437,16 +1441,16 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
|
|||
flg = scm_to_int (flags);
|
||||
fd = SCM_FPORT_FDES (sock);
|
||||
|
||||
len = scm_i_string_length (buf);
|
||||
buf = scm_i_string_start_writing (buf);
|
||||
dest = scm_i_string_writable_chars (buf);
|
||||
len = scm_i_string_length (buf);
|
||||
msg = scm_i_make_string (len, &dest);
|
||||
SCM_SYSCALL (rv = recv (fd, dest, len, flg));
|
||||
scm_i_string_stop_writing ();
|
||||
scm_string_copy_x (buf, scm_from_int (0),
|
||||
msg, scm_from_int (0), scm_from_size_t (len));
|
||||
|
||||
if (rv == -1)
|
||||
SCM_SYSERROR;
|
||||
|
||||
scm_remember_upto_here_1 (buf);
|
||||
scm_remember_upto_here_2 (buf, msg);
|
||||
return scm_from_int (rv);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
@ -1464,18 +1468,28 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0,
|
|||
"bitwise OR of MSG_OOB, MSG_PEEK, MSG_DONTROUTE etc.\n\n"
|
||||
"Note that the data is written directly to the socket\n"
|
||||
"file descriptor:\n"
|
||||
"any unflushed buffered port data is ignored.")
|
||||
"any unflushed buffered port data is ignored.\n\n"
|
||||
"This operation is defined only for strings containing codepoints\n"
|
||||
"zero to 255.")
|
||||
#define FUNC_NAME s_scm_send
|
||||
{
|
||||
int rv;
|
||||
int fd;
|
||||
int flg;
|
||||
const char *src;
|
||||
char *src;
|
||||
size_t len;
|
||||
|
||||
sock = SCM_COERCE_OUTPORT (sock);
|
||||
SCM_VALIDATE_OPFPORT (1, sock);
|
||||
SCM_VALIDATE_STRING (2, message);
|
||||
|
||||
/* If the string is wide, see if it can be coerced into
|
||||
a narrow string. */
|
||||
if (!scm_i_is_narrow_string (message)
|
||||
|| scm_i_try_narrow_string (message))
|
||||
SCM_MISC_ERROR ("the message string is not 8-bit: ~s",
|
||||
scm_list_1 (message));
|
||||
|
||||
if (SCM_UNBNDP (flags))
|
||||
flg = 0;
|
||||
else
|
||||
|
@ -1592,7 +1606,9 @@ SCM_DEFINE (scm_sendto, "sendto", 3, 1, 1,
|
|||
"set to be non-blocking.\n"
|
||||
"Note that the data is written directly to the socket\n"
|
||||
"file descriptor:\n"
|
||||
"any unflushed buffered port data is ignored.")
|
||||
"any unflushed buffered port data is ignored.\n"
|
||||
"This operation is defined only for strings containing codepoints\n"
|
||||
"zero to 255.")
|
||||
#define FUNC_NAME s_scm_sendto
|
||||
{
|
||||
int rv;
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <strftime.h>
|
||||
#include <unistr.h>
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/async.h"
|
||||
|
@ -53,6 +54,7 @@
|
|||
#include "libguile/strings.h"
|
||||
#include "libguile/vectors.h"
|
||||
#include "libguile/dynwind.h"
|
||||
#include "libguile/strings.h"
|
||||
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/stime.h"
|
||||
|
@ -624,18 +626,20 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
{
|
||||
struct tm t;
|
||||
|
||||
char *tbuf;
|
||||
scm_t_uint8 *tbuf;
|
||||
int size = 50;
|
||||
const char *fmt;
|
||||
char *myfmt;
|
||||
scm_t_uint8 *fmt;
|
||||
scm_t_uint8 *myfmt;
|
||||
int len;
|
||||
SCM result;
|
||||
|
||||
SCM_VALIDATE_STRING (1, format);
|
||||
bdtime2c (stime, &t, SCM_ARG2, FUNC_NAME);
|
||||
|
||||
fmt = scm_i_string_chars (format);
|
||||
len = scm_i_string_length (format);
|
||||
/* Convert string to UTF-8 so that non-ASCII characters in the
|
||||
format are passed through unchanged. */
|
||||
fmt = scm_i_to_utf8_string (format);
|
||||
len = strlen ((const char *) fmt);
|
||||
|
||||
/* Ugly hack: strftime can return 0 if its buffer is too small,
|
||||
but some valid time strings (e.g. "%p") can sometimes produce
|
||||
|
@ -643,9 +647,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
character to the format string, so that valid returns are always
|
||||
nonzero. */
|
||||
myfmt = scm_malloc (len+2);
|
||||
*myfmt = 'x';
|
||||
strncpy(myfmt+1, fmt, len);
|
||||
myfmt[len+1] = 0;
|
||||
*myfmt = (scm_t_uint8) 'x';
|
||||
strncpy ((char *) myfmt + 1, (const char *) fmt, len);
|
||||
myfmt[len + 1] = 0;
|
||||
scm_remember_upto_here_1 (format);
|
||||
free (fmt);
|
||||
|
||||
tbuf = scm_malloc (size);
|
||||
{
|
||||
|
@ -680,7 +686,8 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
|
||||
/* Use `nstrftime ()' from Gnulib, which supports all GNU extensions
|
||||
supported by glibc. */
|
||||
while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0)
|
||||
while ((len = nstrftime ((char *) tbuf, size,
|
||||
(const char *) myfmt, &t, 0, 0)) == 0)
|
||||
{
|
||||
free (tbuf);
|
||||
size *= 2;
|
||||
|
@ -696,7 +703,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
|
|||
#endif
|
||||
}
|
||||
|
||||
result = scm_from_locale_stringn (tbuf + 1, len - 1);
|
||||
result = scm_i_from_utf8_string ((const scm_t_uint8 *) tbuf + 1);
|
||||
free (tbuf);
|
||||
free (myfmt);
|
||||
#if HAVE_STRUCT_TM_TM_ZONE
|
||||
|
@ -722,14 +729,17 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
#define FUNC_NAME s_scm_strptime
|
||||
{
|
||||
struct tm t;
|
||||
const char *fmt, *str, *rest;
|
||||
scm_t_uint8 *fmt, *str, *rest;
|
||||
size_t used_len;
|
||||
long zoff;
|
||||
|
||||
SCM_VALIDATE_STRING (1, format);
|
||||
SCM_VALIDATE_STRING (2, string);
|
||||
|
||||
fmt = scm_i_string_chars (format);
|
||||
str = scm_i_string_chars (string);
|
||||
/* Convert strings to UTF-8 so that non-ASCII characters are passed
|
||||
through unchanged. */
|
||||
fmt = scm_i_to_utf8_string (format);
|
||||
str = scm_i_to_utf8_string (string);
|
||||
|
||||
/* initialize the struct tm */
|
||||
#define tm_init(field) t.field = 0
|
||||
|
@ -751,7 +761,8 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
fields, hence the use of SCM_CRITICAL_SECTION_START. */
|
||||
t.tm_isdst = -1;
|
||||
SCM_CRITICAL_SECTION_START;
|
||||
rest = strptime (str, fmt, &t);
|
||||
rest = (scm_t_uint8 *) strptime ((const char *) str,
|
||||
(const char *) fmt, &t);
|
||||
SCM_CRITICAL_SECTION_END;
|
||||
if (rest == NULL)
|
||||
{
|
||||
|
@ -759,6 +770,9 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
instance it doesn't. Force a sensible value for our error
|
||||
message. */
|
||||
errno = EINVAL;
|
||||
scm_remember_upto_here_2 (format, string);
|
||||
free (str);
|
||||
free (fmt);
|
||||
SCM_SYSERROR;
|
||||
}
|
||||
|
||||
|
@ -770,8 +784,14 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
|
|||
zoff = 0;
|
||||
#endif
|
||||
|
||||
/* Compute the number of UTF-8 characters. */
|
||||
used_len = u8_strnlen (str, rest-str);
|
||||
scm_remember_upto_here_2 (format, string);
|
||||
free (str);
|
||||
free (fmt);
|
||||
|
||||
return scm_cons (filltime (&t, zoff, NULL),
|
||||
scm_from_signed_integer (rest - str));
|
||||
scm_from_signed_integer (used_len));
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
#endif /* HAVE_STRPTIME */
|
||||
|
|
|
@ -28,6 +28,8 @@
|
|||
#include <unistr.h>
|
||||
#include <uniconv.h>
|
||||
|
||||
#include "striconveh.h"
|
||||
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/chars.h"
|
||||
#include "libguile/root.h"
|
||||
|
@ -240,6 +242,36 @@ widen_stringbuf (SCM buf)
|
|||
}
|
||||
}
|
||||
|
||||
/* Convert a stringbuf of 32-bit UCS-4-encoded characters to one
|
||||
containing 8-bit Latin-1-encoded characters, if possible. */
|
||||
static void
|
||||
narrow_stringbuf (SCM buf)
|
||||
{
|
||||
size_t i, len;
|
||||
scm_t_wchar *wmem;
|
||||
char *mem;
|
||||
|
||||
if (!STRINGBUF_WIDE (buf))
|
||||
return;
|
||||
|
||||
len = STRINGBUF_OUTLINE_LENGTH (buf);
|
||||
i = 0;
|
||||
wmem = STRINGBUF_WIDE_CHARS (buf);
|
||||
while (i < len)
|
||||
if (wmem[i++] > 0xFF)
|
||||
return;
|
||||
|
||||
mem = scm_gc_malloc (sizeof (char) * (len + 1), "string");
|
||||
for (i = 0; i < len; i++)
|
||||
mem[i] = (unsigned char) wmem[i];
|
||||
|
||||
scm_gc_free (wmem, sizeof (scm_t_wchar) * (len + 1), "string");
|
||||
|
||||
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_WIDE);
|
||||
SCM_SET_CELL_WORD_1 (buf, mem);
|
||||
SCM_SET_CELL_WORD_2 (buf, len);
|
||||
}
|
||||
|
||||
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
/* Copy-on-write strings.
|
||||
|
@ -460,6 +492,18 @@ scm_i_is_narrow_string (SCM str)
|
|||
return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
|
||||
}
|
||||
|
||||
/* Try to coerce a string to be narrow. It if is narrow already, do
|
||||
nothing. If it is wide, shrink it to narrow if none of its
|
||||
characters are above 0xFF. Return true if the string is narrow or
|
||||
was made to be narrow. */
|
||||
int
|
||||
scm_i_try_narrow_string (SCM str)
|
||||
{
|
||||
narrow_stringbuf (STRING_STRINGBUF (str));
|
||||
|
||||
return scm_i_is_narrow_string (str);
|
||||
}
|
||||
|
||||
/* Returns a pointer to the 8-bit Latin-1 encoded character array of
|
||||
STR. */
|
||||
const char *
|
||||
|
@ -591,6 +635,37 @@ scm_i_string_ref (SCM str, size_t x)
|
|||
return scm_i_string_wide_chars (str)[x];
|
||||
}
|
||||
|
||||
/* Returns index+1 of the first char in STR that matches C, or
|
||||
0 if the char is not found. */
|
||||
int
|
||||
scm_i_string_contains_char (SCM str, char ch)
|
||||
{
|
||||
size_t i;
|
||||
size_t len = scm_i_string_length (str);
|
||||
|
||||
i = 0;
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
while (i < len)
|
||||
{
|
||||
if (scm_i_string_chars (str)[i] == ch)
|
||||
return i+1;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (i < len)
|
||||
{
|
||||
if (scm_i_string_wide_chars (str)[i]
|
||||
== (unsigned char) ch)
|
||||
return i+1;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
|
||||
{
|
||||
|
@ -624,7 +699,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
|
|||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
char *dst = scm_i_string_writable_chars (str);
|
||||
dst[p] = (char) (unsigned char) chr;
|
||||
dst[p] = chr;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -634,7 +709,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
|
|||
}
|
||||
|
||||
/* Symbols.
|
||||
|
||||
|
||||
Basic symbol creation and accessing is done here, the rest is in
|
||||
symbols.[hc]. This has been done to keep stringbufs and the
|
||||
internals of strings and string-like objects confined to this file.
|
||||
|
@ -867,7 +942,7 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
|
|||
else
|
||||
e5 = scm_cons (scm_from_locale_symbol ("read-only"),
|
||||
SCM_BOOL_F);
|
||||
|
||||
|
||||
/* Stringbuf info */
|
||||
if (!STRINGBUF_WIDE (buf))
|
||||
{
|
||||
|
@ -1402,20 +1477,105 @@ scm_is_string (SCM obj)
|
|||
return IS_STRING (obj);
|
||||
}
|
||||
|
||||
static SCM
|
||||
scm_from_stringn (const char *str, size_t len, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
size_t u32len, i;
|
||||
scm_t_wchar *u32;
|
||||
int wide = 0;
|
||||
SCM res;
|
||||
|
||||
if (encoding == NULL)
|
||||
{
|
||||
/* If encoding is null, use Latin-1. */
|
||||
char *buf;
|
||||
res = scm_i_make_string (len, &buf);
|
||||
memcpy (buf, str, len);
|
||||
return res;
|
||||
}
|
||||
|
||||
u32len = 0;
|
||||
u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
|
||||
(enum iconv_ilseq_handler)
|
||||
handler,
|
||||
str, len,
|
||||
NULL,
|
||||
NULL, &u32len);
|
||||
|
||||
if (u32 == NULL)
|
||||
{
|
||||
if (errno == ENOMEM)
|
||||
scm_memory_error ("locale string conversion");
|
||||
else
|
||||
{
|
||||
/* There are invalid sequences in the input string. */
|
||||
SCM errstr;
|
||||
char *dst;
|
||||
errstr = scm_i_make_string (len, &dst);
|
||||
memcpy (dst, str, len);
|
||||
scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
|
||||
scm_list_2 (scm_from_locale_string (encoding),
|
||||
errstr));
|
||||
scm_remember_upto_here_1 (errstr);
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < u32len)
|
||||
if (u32[i++] > 0xFF)
|
||||
{
|
||||
wide = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!wide)
|
||||
{
|
||||
char *dst;
|
||||
res = scm_i_make_string (u32len, &dst);
|
||||
for (i = 0; i < u32len; i ++)
|
||||
dst[i] = (unsigned char) u32[i];
|
||||
dst[u32len] = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
scm_t_wchar *wdst;
|
||||
res = scm_i_make_wide_string (u32len, &wdst);
|
||||
u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
|
||||
wdst[u32len] = 0;
|
||||
}
|
||||
|
||||
free (u32);
|
||||
return res;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_from_locale_stringn (const char *str, size_t len)
|
||||
{
|
||||
SCM res;
|
||||
char *dst;
|
||||
const char *enc;
|
||||
scm_t_string_failed_conversion_handler hndl;
|
||||
SCM inport;
|
||||
scm_t_port *pt;
|
||||
|
||||
if (len == (size_t) -1)
|
||||
len = strlen (str);
|
||||
if (len == 0)
|
||||
return scm_nullstr;
|
||||
|
||||
res = scm_i_make_string (len, &dst);
|
||||
memcpy (dst, str, len);
|
||||
return res;
|
||||
inport = scm_current_input_port ();
|
||||
if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
|
||||
{
|
||||
pt = SCM_PTAB_ENTRY (inport);
|
||||
enc = pt->encoding;
|
||||
hndl = pt->ilseq_handler;
|
||||
}
|
||||
else
|
||||
{
|
||||
enc = NULL;
|
||||
hndl = SCM_FAILED_CONVERSION_ERROR;
|
||||
}
|
||||
|
||||
return scm_from_stringn (str, len, enc, hndl);
|
||||
}
|
||||
|
||||
SCM
|
||||
|
@ -1427,6 +1587,14 @@ scm_from_locale_string (const char *str)
|
|||
return scm_from_locale_stringn (str, -1);
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_i_from_utf8_string (const scm_t_uint8 *str)
|
||||
{
|
||||
return scm_from_stringn ((const char *) str,
|
||||
strlen ((char *) str), "UTF-8",
|
||||
SCM_FAILED_CONVERSION_ERROR);
|
||||
}
|
||||
|
||||
/* Create a new scheme string from the C string STR. The memory of
|
||||
STR may be used directly as storage for the new string. */
|
||||
SCM
|
||||
|
@ -1515,23 +1683,33 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
|
|||
char *
|
||||
scm_to_locale_stringn (SCM str, size_t * lenp)
|
||||
{
|
||||
SCM outport;
|
||||
scm_t_port *pt;
|
||||
const char *enc;
|
||||
|
||||
/* In the future, enc will hold the port's encoding. */
|
||||
enc = NULL;
|
||||
outport = scm_current_output_port ();
|
||||
if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
|
||||
{
|
||||
pt = SCM_PTAB_ENTRY (outport);
|
||||
enc = pt->encoding;
|
||||
}
|
||||
else
|
||||
enc = NULL;
|
||||
|
||||
return scm_to_stringn (str, lenp, enc,
|
||||
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
return scm_to_stringn (str, lenp,
|
||||
enc,
|
||||
scm_i_get_conversion_strategy (SCM_BOOL_F));
|
||||
}
|
||||
|
||||
/* Low-level scheme to C string conversion function. */
|
||||
char *
|
||||
scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
||||
scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
|
||||
scm_t_string_failed_conversion_handler handler)
|
||||
{
|
||||
static const char iso[11] = "ISO-8859-1";
|
||||
char *buf;
|
||||
size_t ilen, len, i;
|
||||
int ret;
|
||||
const char *enc;
|
||||
|
||||
if (!scm_is_string (str))
|
||||
scm_wrong_type_arg_msg (NULL, 0, str, "string");
|
||||
|
@ -1545,7 +1723,7 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
|||
*lenp = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
if (lenp == NULL)
|
||||
for (i = 0; i < ilen; i++)
|
||||
if (scm_i_string_ref (str, i) == '\0')
|
||||
|
@ -1553,8 +1731,10 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
|||
"string contains #\\nul character: ~S",
|
||||
scm_list_1 (str));
|
||||
|
||||
if (scm_i_is_narrow_string (str))
|
||||
if (scm_i_is_narrow_string (str) && (encoding == NULL))
|
||||
{
|
||||
/* If using native Latin-1 encoding, just copy the string
|
||||
contents. */
|
||||
if (lenp)
|
||||
{
|
||||
buf = scm_malloc (ilen);
|
||||
|
@ -1571,20 +1751,44 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
buf = NULL;
|
||||
len = 0;
|
||||
buf = u32_conv_to_encoding (iso,
|
||||
(enum iconv_ilseq_handler) handler,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (str),
|
||||
ilen, NULL, NULL, &len);
|
||||
if (buf == NULL)
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (iso), str));
|
||||
enc = encoding;
|
||||
if (enc == NULL)
|
||||
enc = "ISO-8859-1";
|
||||
if (scm_i_is_narrow_string (str))
|
||||
{
|
||||
ret = mem_iconveh (scm_i_string_chars (str), ilen,
|
||||
"ISO-8859-1", enc,
|
||||
(enum iconv_ilseq_handler) handler, NULL,
|
||||
&buf, &len);
|
||||
|
||||
if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
unistring_escapes_to_guile_escapes (&buf, &len);
|
||||
if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
|
||||
unistring_escapes_to_guile_escapes (&buf, &len);
|
||||
|
||||
if (ret != 0)
|
||||
{
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (enc),
|
||||
str));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = u32_conv_to_encoding (enc,
|
||||
(enum iconv_ilseq_handler) handler,
|
||||
(scm_t_uint32 *) scm_i_string_wide_chars (str),
|
||||
ilen,
|
||||
NULL,
|
||||
NULL, &len);
|
||||
if (buf == NULL)
|
||||
{
|
||||
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
|
||||
scm_list_2 (scm_from_locale_string (enc),
|
||||
str));
|
||||
}
|
||||
}
|
||||
if (lenp)
|
||||
*lenp = len;
|
||||
else
|
||||
|
@ -1603,6 +1807,14 @@ scm_to_locale_string (SCM str)
|
|||
return scm_to_locale_stringn (str, NULL);
|
||||
}
|
||||
|
||||
scm_t_uint8 *
|
||||
scm_i_to_utf8_string (SCM str)
|
||||
{
|
||||
char *u8str;
|
||||
u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
|
||||
return (scm_t_uint8 *) u8str;
|
||||
}
|
||||
|
||||
size_t
|
||||
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
|
||||
{
|
||||
|
|
|
@ -124,6 +124,7 @@ SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
|
|||
SCM_API int scm_is_string (SCM x);
|
||||
SCM_API SCM scm_from_locale_string (const char *str);
|
||||
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
|
||||
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
|
||||
SCM_API SCM scm_take_locale_string (char *str);
|
||||
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
|
||||
SCM_API char *scm_to_locale_string (SCM str);
|
||||
|
@ -132,6 +133,7 @@ SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
|
|||
const char *encoding,
|
||||
scm_t_string_failed_conversion_handler
|
||||
handler);
|
||||
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
|
||||
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
|
||||
|
||||
SCM_API SCM scm_makfromstrs (int argc, char **argv);
|
||||
|
@ -152,6 +154,7 @@ SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
|
|||
SCM_INTERNAL void scm_i_string_stop_writing (void);
|
||||
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
|
||||
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
|
||||
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
|
||||
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
|
||||
/* internal functions related to symbols. */
|
||||
|
@ -168,6 +171,7 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
|
|||
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
|
||||
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
|
||||
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
|
||||
SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
|
||||
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
|
||||
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include "libguile/modules.h"
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/deprecation.h"
|
||||
#include "libguile/srfi-4.h"
|
||||
|
||||
#include "libguile/strports.h"
|
||||
|
||||
|
@ -289,42 +290,33 @@ st_truncate (SCM port, scm_t_off length)
|
|||
}
|
||||
|
||||
SCM
|
||||
scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
||||
scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, const char *caller)
|
||||
{
|
||||
SCM z;
|
||||
SCM z, str;
|
||||
scm_t_port *pt;
|
||||
size_t str_len, c_pos;
|
||||
size_t c_pos;
|
||||
char *buf;
|
||||
|
||||
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
|
||||
/* Because ports are inherently 8-bit, strings need to be converted
|
||||
to a locale representation for storage. But, since string ports
|
||||
rely on string functionality for their memory management, we need
|
||||
to create a new string that has the 8-bit locale representation
|
||||
of the underlying string. This violates the guideline that the
|
||||
internal encoding of characters in strings is in unicode
|
||||
codepoints. */
|
||||
str = scm_i_make_string (str_len, &buf);
|
||||
memcpy (buf, locale_str, str_len);
|
||||
|
||||
str_len = scm_i_string_length (str);
|
||||
c_pos = scm_to_unsigned_integer (pos, 0, str_len);
|
||||
|
||||
if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
|
||||
scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
|
||||
|
||||
/* XXX
|
||||
|
||||
Make a new string to isolate us from changes to the original.
|
||||
This is done so that we can rely on scm_i_string_chars to stay in
|
||||
place even across SCM_TICKs.
|
||||
|
||||
Additionally, when we are going to write to the string, we make a
|
||||
copy so that we can write to it without having to use
|
||||
scm_i_string_writable_chars.
|
||||
*/
|
||||
|
||||
if (modes & SCM_WRTNG)
|
||||
str = scm_c_substring_copy (str, 0, str_len);
|
||||
else
|
||||
str = scm_c_substring (str, 0, str_len);
|
||||
|
||||
scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
|
||||
z = scm_new_port_table_entry (scm_tc16_strport);
|
||||
pt = SCM_PTAB_ENTRY(z);
|
||||
SCM_SETSTREAM (z, SCM_UNPACK (str));
|
||||
SCM_SET_CELL_TYPE(z, scm_tc16_strport|modes);
|
||||
/* see above why we can use scm_i_string_chars here. */
|
||||
pt->write_buf = pt->read_buf = (unsigned char *) scm_i_string_chars (str);
|
||||
pt->read_pos = pt->write_pos = pt->read_buf + c_pos;
|
||||
pt->write_buf_size = pt->read_buf_size = str_len;
|
||||
|
@ -340,22 +332,60 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
|||
return z;
|
||||
}
|
||||
|
||||
SCM
|
||||
scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
|
||||
{
|
||||
SCM z;
|
||||
size_t str_len;
|
||||
char *buf;
|
||||
|
||||
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
|
||||
|
||||
/* Because ports are inherently 8-bit, strings need to be converted
|
||||
to a locale representation for storage. But, since string ports
|
||||
rely on string functionality for their memory management, we need
|
||||
to create a new string that has the 8-bit locale representation
|
||||
of the underlying string. This violates the guideline that the
|
||||
internal encoding of characters in strings is in unicode
|
||||
codepoints. */
|
||||
buf = scm_to_locale_stringn (str, &str_len);
|
||||
z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
|
||||
free (buf);
|
||||
return z;
|
||||
}
|
||||
|
||||
/* create a new string from a string port's buffer. */
|
||||
SCM scm_strport_to_string (SCM port)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
SCM str;
|
||||
char *dst;
|
||||
|
||||
if (pt->rw_active == SCM_PORT_WRITE)
|
||||
st_flush (port);
|
||||
|
||||
str = scm_i_make_string (pt->read_buf_size, &dst);
|
||||
memcpy (dst, (char *) pt->read_buf, pt->read_buf_size);
|
||||
str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
|
||||
scm_remember_upto_here_1 (port);
|
||||
return str;
|
||||
}
|
||||
|
||||
/* Create a vector containing the locale representation of the string in the
|
||||
port's buffer. */
|
||||
SCM scm_strport_to_locale_u8vector (SCM port)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
SCM vec;
|
||||
char *buf;
|
||||
|
||||
if (pt->rw_active == SCM_PORT_WRITE)
|
||||
st_flush (port);
|
||||
|
||||
buf = scm_malloc (pt->read_buf_size);
|
||||
memcpy (buf, pt->read_buf, pt->read_buf_size);
|
||||
vec = scm_take_u8vector ((unsigned char *) buf, pt->read_buf_size);
|
||||
scm_remember_upto_here_1 (port);
|
||||
return vec;
|
||||
}
|
||||
|
||||
SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
|
||||
(SCM obj, SCM printer),
|
||||
"Return a Scheme string obtained by printing @var{obj}.\n"
|
||||
|
@ -380,6 +410,25 @@ SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_call_with_output_locale_u8vector, "call-with-output-locale-u8vector", 1, 0, 0,
|
||||
(SCM proc),
|
||||
"Calls the one-argument procedure @var{proc} with a newly created output\n"
|
||||
"port. When the function returns, a vector containing the bytes of a\n"
|
||||
"locale representation of the characters written into the port is returned\n")
|
||||
#define FUNC_NAME s_scm_call_with_output_locale_u8vector
|
||||
{
|
||||
SCM p;
|
||||
|
||||
p = scm_mkstrport (SCM_INUM0,
|
||||
scm_make_string (SCM_INUM0, SCM_UNDEFINED),
|
||||
SCM_OPN | SCM_WRTNG,
|
||||
FUNC_NAME);
|
||||
scm_call_1 (proc, p);
|
||||
|
||||
return scm_get_output_locale_u8vector (p);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0,
|
||||
(SCM proc),
|
||||
"Calls the one-argument procedure @var{proc} with a newly created output\n"
|
||||
|
@ -424,6 +473,27 @@ SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 0, 0,
|
|||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_open_input_locale_u8vector, "open-input-locale-u8vector", 1, 0, 0,
|
||||
(SCM vec),
|
||||
"Take a u8vector containing the bytes of a string encoded in the\n"
|
||||
"current locale and return an input port that delivers characters\n"
|
||||
"from the string. The port can be closed by\n"
|
||||
"@code{close-input-port}, though its storage will be reclaimed\n"
|
||||
"by the garbage collector if it becomes inaccessible.")
|
||||
#define FUNC_NAME s_scm_open_input_locale_u8vector
|
||||
{
|
||||
scm_t_array_handle hnd;
|
||||
ssize_t inc;
|
||||
size_t len;
|
||||
const scm_t_uint8 *buf;
|
||||
|
||||
buf = scm_u8vector_elements (vec, &hnd, &len, &inc);
|
||||
SCM p = scm_i_mkstrport(SCM_INUM0, (const char *) buf, len, SCM_OPN | SCM_RDNG, FUNC_NAME);
|
||||
scm_array_handle_release (&hnd);
|
||||
return p;
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0,
|
||||
(void),
|
||||
"Return an output port that will accumulate characters for\n"
|
||||
|
@ -456,11 +526,26 @@ SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 0, 0,
|
|||
#undef FUNC_NAME
|
||||
|
||||
|
||||
SCM_DEFINE (scm_get_output_locale_u8vector, "get-output-locale-u8vector", 1, 0, 0,
|
||||
(SCM port),
|
||||
"Given an output port created by @code{open-output-string},\n"
|
||||
"return a u8 vector containing the characters of the string\n"
|
||||
"encoded in the current locale.")
|
||||
#define FUNC_NAME s_scm_get_output_locale_u8vector
|
||||
{
|
||||
SCM_VALIDATE_OPOUTSTRPORT (1, port);
|
||||
return scm_strport_to_locale_u8vector (port);
|
||||
}
|
||||
#undef FUNC_NAME
|
||||
|
||||
|
||||
/* Given a null-terminated string EXPR containing a Scheme expression
|
||||
read it, and return it as an SCM value. */
|
||||
SCM
|
||||
scm_c_read_string (const char *expr)
|
||||
{
|
||||
/* FIXME: the c string gets packed into a string, only to get
|
||||
immediately unpacked in scm_mkstrport. */
|
||||
SCM port = scm_mkstrport (SCM_INUM0,
|
||||
scm_from_locale_string (expr),
|
||||
SCM_OPN | SCM_RDNG,
|
||||
|
|
|
@ -44,13 +44,19 @@ SCM_API scm_t_bits scm_tc16_strport;
|
|||
|
||||
|
||||
SCM_API SCM scm_mkstrport (SCM pos, SCM str, long modes, const char * caller);
|
||||
SCM_INTERNAL SCM scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len,
|
||||
long modes, const char *caller);
|
||||
SCM_API SCM scm_strport_to_string (SCM port);
|
||||
SCM_API SCM scm_strport_to_locale_u8vector (SCM port);
|
||||
SCM_API SCM scm_object_to_string (SCM obj, SCM printer);
|
||||
SCM_API SCM scm_call_with_output_string (SCM proc);
|
||||
SCM_API SCM scm_call_with_output_locale_u8vector (SCM proc);
|
||||
SCM_API SCM scm_call_with_input_string (SCM str, SCM proc);
|
||||
SCM_API SCM scm_open_input_string (SCM str);
|
||||
SCM_API SCM scm_open_input_locale_u8vector (SCM str);
|
||||
SCM_API SCM scm_open_output_string (void);
|
||||
SCM_API SCM scm_get_output_string (SCM port);
|
||||
SCM_API SCM scm_get_output_locale_u8vector (SCM port);
|
||||
SCM_API SCM scm_c_read_string (const char *expr);
|
||||
SCM_API SCM scm_c_eval_string (const char *expr);
|
||||
SCM_API SCM scm_c_eval_string_in_module (const char *expr, SCM module);
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "libguile/hashtab.h"
|
||||
#include "libguile/ports.h"
|
||||
#include "libguile/strings.h"
|
||||
#include "libguile/srfi-13.h"
|
||||
|
||||
#include "libguile/validate.h"
|
||||
#include "libguile/struct.h"
|
||||
|
@ -61,9 +62,9 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
|
|||
{
|
||||
SCM new_sym;
|
||||
SCM_VALIDATE_STRING (1, fields);
|
||||
scm_t_wchar c;
|
||||
|
||||
{ /* scope */
|
||||
const char * field_desc;
|
||||
size_t len;
|
||||
int x;
|
||||
|
||||
|
@ -72,11 +73,9 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
|
|||
SCM_MISC_ERROR ("odd length field specification: ~S",
|
||||
scm_list_1 (fields));
|
||||
|
||||
field_desc = scm_i_string_chars (fields);
|
||||
|
||||
for (x = 0; x < len; x += 2)
|
||||
{
|
||||
switch (field_desc[x])
|
||||
switch (c = scm_i_string_ref (fields, x))
|
||||
{
|
||||
case 'u':
|
||||
case 'p':
|
||||
|
@ -88,13 +87,13 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
|
|||
break;
|
||||
default:
|
||||
SCM_MISC_ERROR ("unrecognized field type: ~S",
|
||||
scm_list_1 (SCM_MAKE_CHAR (field_desc[x])));
|
||||
scm_list_1 (SCM_MAKE_CHAR (c)));
|
||||
}
|
||||
|
||||
switch (field_desc[x + 1])
|
||||
switch (c = scm_i_string_ref (fields, x + 1))
|
||||
{
|
||||
case 'w':
|
||||
if (field_desc[x] == 's')
|
||||
if (scm_i_string_ref (fields, x) == 's')
|
||||
SCM_MISC_ERROR ("self fields not writable", SCM_EOL);
|
||||
case 'r':
|
||||
case 'o':
|
||||
|
@ -102,7 +101,7 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
|
|||
case 'R':
|
||||
case 'W':
|
||||
case 'O':
|
||||
if (field_desc[x] == 's')
|
||||
if (scm_i_string_ref (fields, x) == 's')
|
||||
SCM_MISC_ERROR ("self fields not allowed in tail array",
|
||||
SCM_EOL);
|
||||
if (x != len - 2)
|
||||
|
@ -111,12 +110,12 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
|
|||
break;
|
||||
default:
|
||||
SCM_MISC_ERROR ("unrecognized ref specification: ~S",
|
||||
scm_list_1 (SCM_MAKE_CHAR (field_desc[x + 1])));
|
||||
scm_list_1 (SCM_MAKE_CHAR (c)));
|
||||
}
|
||||
#if 0
|
||||
if (field_desc[x] == 'd')
|
||||
if (scm_i_string_ref (fields, x, 'd'))
|
||||
{
|
||||
if (field_desc[x + 2] != '-')
|
||||
if (!scm_i_string_ref (fields, x+2, '-'))
|
||||
SCM_MISC_ERROR ("missing dash field at position ~A",
|
||||
scm_list_1 (scm_from_int (x / 2)));
|
||||
x += 2;
|
||||
|
@ -138,18 +137,18 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
|
|||
static void
|
||||
scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM inits)
|
||||
{
|
||||
unsigned const char *fields_desc =
|
||||
(unsigned const char *) scm_i_symbol_chars (layout) - 2;
|
||||
unsigned char prot = 0;
|
||||
scm_t_wchar prot = 0;
|
||||
int n_fields = scm_i_symbol_length (layout) / 2;
|
||||
int tailp = 0;
|
||||
int i;
|
||||
|
||||
i = -2;
|
||||
while (n_fields)
|
||||
{
|
||||
if (!tailp)
|
||||
{
|
||||
fields_desc += 2;
|
||||
prot = fields_desc[1];
|
||||
i += 2;
|
||||
prot = scm_i_symbol_ref (layout, i+1);
|
||||
if (SCM_LAYOUT_TAILP (prot))
|
||||
{
|
||||
tailp = 1;
|
||||
|
@ -160,8 +159,7 @@ scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM in
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (*fields_desc)
|
||||
switch (scm_i_symbol_ref (layout, i))
|
||||
{
|
||||
#if 0
|
||||
case 'i':
|
||||
|
@ -237,7 +235,8 @@ SCM_DEFINE (scm_struct_vtable_p, "struct-vtable?", 1, 0, 0,
|
|||
{
|
||||
SCM layout;
|
||||
scm_t_bits * mem;
|
||||
int tmp;
|
||||
SCM tmp;
|
||||
size_t len;
|
||||
|
||||
if (!SCM_STRUCTP (x))
|
||||
return SCM_BOOL_F;
|
||||
|
@ -248,11 +247,14 @@ SCM_DEFINE (scm_struct_vtable_p, "struct-vtable?", 1, 0, 0,
|
|||
< scm_i_string_length (required_vtable_fields))
|
||||
return SCM_BOOL_F;
|
||||
|
||||
tmp = strncmp (scm_i_symbol_chars (layout),
|
||||
scm_i_string_chars (required_vtable_fields),
|
||||
scm_i_string_length (required_vtable_fields));
|
||||
scm_remember_upto_here_1 (required_vtable_fields);
|
||||
if (tmp)
|
||||
len = scm_i_string_length (required_vtable_fields);
|
||||
tmp = scm_string_eq (scm_symbol_to_string (layout),
|
||||
required_vtable_fields,
|
||||
scm_from_size_t (0),
|
||||
scm_from_size_t (len),
|
||||
scm_from_size_t (0),
|
||||
scm_from_size_t (len));
|
||||
if (scm_is_false (tmp))
|
||||
return SCM_BOOL_F;
|
||||
|
||||
mem = SCM_STRUCT_DATA (x);
|
||||
|
@ -646,8 +648,7 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
|
|||
size_t layout_len;
|
||||
size_t p;
|
||||
scm_t_bits n_fields;
|
||||
const char *fields_desc;
|
||||
char field_type = 0;
|
||||
scm_t_wchar field_type = 0;
|
||||
|
||||
|
||||
SCM_VALIDATE_STRUCT (1, handle);
|
||||
|
@ -656,7 +657,6 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
|
|||
data = SCM_STRUCT_DATA (handle);
|
||||
p = scm_to_size_t (pos);
|
||||
|
||||
fields_desc = scm_i_symbol_chars (layout);
|
||||
layout_len = scm_i_symbol_length (layout);
|
||||
if (SCM_STRUCT_VTABLE_FLAGS (handle) & SCM_STRUCTF_LIGHT)
|
||||
/* no extra words */
|
||||
|
@ -668,9 +668,9 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
|
|||
|
||||
if (p * 2 < layout_len)
|
||||
{
|
||||
char ref;
|
||||
field_type = fields_desc[p * 2];
|
||||
ref = fields_desc[p * 2 + 1];
|
||||
scm_t_wchar ref;
|
||||
field_type = scm_i_symbol_ref (layout, p * 2);
|
||||
ref = scm_i_symbol_ref (layout, p * 2 + 1);
|
||||
if ((ref != 'r') && (ref != 'w'))
|
||||
{
|
||||
if ((ref == 'R') || (ref == 'W'))
|
||||
|
@ -679,8 +679,8 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
|
|||
SCM_MISC_ERROR ("ref denied for field ~A", scm_list_1 (pos));
|
||||
}
|
||||
}
|
||||
else if (fields_desc[layout_len - 1] != 'O')
|
||||
field_type = fields_desc[layout_len - 2];
|
||||
else if (scm_i_symbol_ref (layout, layout_len - 1) != 'O')
|
||||
field_type = scm_i_symbol_ref(layout, layout_len - 2);
|
||||
else
|
||||
SCM_MISC_ERROR ("ref denied for field ~A", scm_list_1 (pos));
|
||||
|
||||
|
@ -728,8 +728,7 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
|
|||
size_t layout_len;
|
||||
size_t p;
|
||||
int n_fields;
|
||||
const char *fields_desc;
|
||||
char field_type = 0;
|
||||
scm_t_wchar field_type = 0;
|
||||
|
||||
SCM_VALIDATE_STRUCT (1, handle);
|
||||
|
||||
|
@ -737,7 +736,6 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
|
|||
data = SCM_STRUCT_DATA (handle);
|
||||
p = scm_to_size_t (pos);
|
||||
|
||||
fields_desc = scm_i_symbol_chars (layout);
|
||||
layout_len = scm_i_symbol_length (layout);
|
||||
if (SCM_STRUCT_VTABLE_FLAGS (handle) & SCM_STRUCTF_LIGHT)
|
||||
/* no extra words */
|
||||
|
@ -750,13 +748,13 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
|
|||
if (p * 2 < layout_len)
|
||||
{
|
||||
char set_x;
|
||||
field_type = fields_desc[p * 2];
|
||||
set_x = fields_desc [p * 2 + 1];
|
||||
field_type = scm_i_symbol_ref (layout, p * 2);
|
||||
set_x = scm_i_symbol_ref (layout, p * 2 + 1);
|
||||
if (set_x != 'w')
|
||||
SCM_MISC_ERROR ("set! denied for field ~A", scm_list_1 (pos));
|
||||
}
|
||||
else if (fields_desc[layout_len - 1] == 'W')
|
||||
field_type = fields_desc[layout_len - 2];
|
||||
else if (scm_i_symbol_ref (layout, layout_len - 1) == 'W')
|
||||
field_type = scm_i_symbol_ref (layout, layout_len - 2);
|
||||
else
|
||||
SCM_MISC_ERROR ("set! denied for field ~A", scm_list_1 (pos));
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistdio.h>
|
||||
#include "libguile/_scm.h"
|
||||
#include "libguile/async.h"
|
||||
#include "libguile/smob.h"
|
||||
|
@ -744,8 +745,12 @@ scm_ithrow (SCM key, SCM args, int noreturn SCM_UNUSED)
|
|||
*/
|
||||
fprintf (stderr, "throw from within critical section.\n");
|
||||
if (scm_is_symbol (key))
|
||||
fprintf (stderr, "error key: %s\n", scm_i_symbol_chars (key));
|
||||
|
||||
{
|
||||
if (scm_i_is_narrow_symbol (key))
|
||||
fprintf (stderr, "error key: %s\n", scm_i_symbol_chars (key));
|
||||
else
|
||||
ulc_fprintf (stderr, "error key: %llU\n", scm_i_symbol_wide_chars (key));
|
||||
}
|
||||
|
||||
for (; scm_is_pair (s); s = scm_cdr (s), i++)
|
||||
{
|
||||
|
|
|
@ -145,8 +145,11 @@
|
|||
(from (current-language))
|
||||
(to 'objcode)
|
||||
(opts '()))
|
||||
(let ((comp (or output-file (compiled-file-name file)))
|
||||
(in (open-input-file file)))
|
||||
(let* ((comp (or output-file (compiled-file-name file)))
|
||||
(in (open-input-file file))
|
||||
(enc (file-encoding in)))
|
||||
(if enc
|
||||
(set-port-encoding! in enc))
|
||||
(ensure-writable-dir (dirname comp))
|
||||
(call-with-output-file/atomic comp
|
||||
(lambda (port)
|
||||
|
|
139
test-suite/tests/encoding-escapes.test
Normal file
139
test-suite/tests/encoding-escapes.test
Normal file
|
@ -0,0 +1,139 @@
|
|||
;;;; encoding-escapes.test --- test suite for Guile's string encodings -*- mode: scheme; coding: utf-8 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(define s1 "última")
|
||||
(define s2 "cédula")
|
||||
(define s3 "años")
|
||||
(define s4 "羅生門")
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "ultima"
|
||||
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
|
||||
|
||||
(pass-if "cedula"
|
||||
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
|
||||
|
||||
(pass-if "anos"
|
||||
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
|
||||
|
||||
(pass-if "Rashomon"
|
||||
(string=? s4 (string-ints #x7f85 #x751f #x9580))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "ultima"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\372 #\l #\t #\i #\m #\a)))
|
||||
|
||||
(pass-if "cedula"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\c #\351 #\d #\u #\l #\a)))
|
||||
|
||||
(pass-if "anos"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\a #\361 #\o #\s)))
|
||||
|
||||
(pass-if "Rashomon"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\77605 #\72437 #\112600))))
|
||||
|
||||
|
||||
;; Check that an error is flagged on display output when the output
|
||||
;; error strategy is 'error
|
||||
|
||||
(with-test-prefix "display output errors"
|
||||
|
||||
(pass-if-exception "ultima"
|
||||
exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display s1 pt)))
|
||||
|
||||
(pass-if-exception "Rashomon"
|
||||
exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display s4 pt))))
|
||||
|
||||
;; Check that questions marks or substitutions appear when the conversion
|
||||
;; mode is substitute
|
||||
(with-test-prefix "display output substitutions"
|
||||
|
||||
(pass-if "ultima"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'substitute)
|
||||
(display s1 pt)
|
||||
(string=? "?ltima"
|
||||
(get-output-string pt))))
|
||||
|
||||
(pass-if "Rashomon"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'substitute)
|
||||
(display s4 pt)
|
||||
(string=? "???"
|
||||
(get-output-string pt)))))
|
||||
|
||||
|
||||
;; Check that hex escapes appear in the write output and that no error
|
||||
;; is thrown. The output error strategy should be irrelevant here.
|
||||
(with-test-prefix "display output escapes"
|
||||
|
||||
(pass-if "ultima"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'escape)
|
||||
(display s1 pt)
|
||||
(string=? "\\xfaltima"
|
||||
(get-output-string pt))))
|
||||
(pass-if "Rashomon"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ASCII")
|
||||
(set-port-conversion-strategy! pt 'escape)
|
||||
(display s4 pt)
|
||||
(string=? "\\u7F85\\u751F\\u9580"
|
||||
(get-output-string pt)))))
|
||||
|
||||
(setlocale LC_ALL "en_US.utf8")
|
||||
|
||||
(with-test-prefix "input escapes"
|
||||
|
||||
(pass-if "última"
|
||||
(string=? "última"
|
||||
(with-input-from-string "\"\\xfaltima\"" read)))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(string=? "羅生門"
|
||||
(with-input-from-string "\"\\u7F85\\u751F\\u9580\"" read))))
|
||||
|
135
test-suite/tests/encoding-iso88591.test
Normal file
135
test-suite/tests/encoding-iso88591.test
Normal file
|
@ -0,0 +1,135 @@
|
|||
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: iso-8859-1 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(setlocale LC_ALL "")
|
||||
|
||||
(define s1 "última")
|
||||
(define s2 "cédula")
|
||||
(define s3 "años")
|
||||
(define s4 "¿Cómo?")
|
||||
|
||||
(with-test-prefix "string length"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string-length s1) 6))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string-length s2) 6))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string-length s3) 4))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(eq? (string-length s4) 6)))
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "última"
|
||||
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
|
||||
|
||||
(pass-if "años"
|
||||
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(string=? s4 (string-ints #xbf #x43 #xf3 #x6d #x6f #x3f))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "última"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\ú #\l #\t #\i #\m #\a)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\c #\é #\d #\u #\l #\a)))
|
||||
|
||||
(pass-if "años"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\a #\ñ #\o #\s)))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\¿ #\C #\ó #\m #\o #\?))))
|
||||
|
||||
;; Check that the output is in ISO-8859-1 encoding
|
||||
(with-test-prefix "display"
|
||||
|
||||
(pass-if "s1"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-1")
|
||||
(display s1 pt)
|
||||
(list= eqv?
|
||||
(list #xfa #x6c #x74 #x69 #x6d #x61)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt)))))
|
||||
|
||||
(pass-if "s2"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-1")
|
||||
(display s2 pt)
|
||||
(list= eqv?
|
||||
(list #x63 #xe9 #x64 #x75 #x6c #x61)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt))))))
|
||||
|
||||
(with-test-prefix "symbols == strings"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string->symbol s1) 'última))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string->symbol s2) 'cédula))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string->symbol s3) 'años))
|
||||
|
||||
(pass-if "¿Cómo?"
|
||||
(eq? (string->symbol s4) '¿Cómo?)))
|
||||
|
||||
(with-test-prefix "non-ascii variable names"
|
||||
|
||||
(pass-if "1"
|
||||
(let ((á 1)
|
||||
(ñ 2))
|
||||
(eq? (+ á ñ) 3))))
|
||||
|
||||
(with-test-prefix "output errors"
|
||||
|
||||
(pass-if-exception "char 256" exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-1")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display (string-ints 256) pt))))
|
||||
|
||||
;; Reset locales
|
||||
(setlocale LC_ALL "C")
|
136
test-suite/tests/encoding-iso88597.test
Normal file
136
test-suite/tests/encoding-iso88597.test
Normal file
|
@ -0,0 +1,136 @@
|
|||
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: iso-8859-7 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(setlocale LC_ALL "")
|
||||
|
||||
(define s1 "Ðåñß")
|
||||
(define s2 "ôçò")
|
||||
(define s3 "êñéôéêÞò")
|
||||
(define s4 "êáé")
|
||||
|
||||
(with-test-prefix "string length"
|
||||
|
||||
(pass-if "s1"
|
||||
(eq? (string-length s1) 4))
|
||||
|
||||
(pass-if "s2"
|
||||
(eq? (string-length s2) 3))
|
||||
|
||||
(pass-if "s3"
|
||||
(eq? (string-length s3) 8))
|
||||
|
||||
(pass-if "s4"
|
||||
(eq? (string-length s4) 3)))
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "s1"
|
||||
(string=? s1 (string-ints #x03a0 #x03b5 #x03c1 #x03af)))
|
||||
|
||||
(pass-if "s2"
|
||||
(string=? s2 (string-ints #x03c4 #x03b7 #x03c2)))
|
||||
|
||||
(pass-if "s3"
|
||||
(string=? s3 (string-ints #x03ba #x03c1 #x03b9 #x03c4 #x03b9 #x03ba #x03ae #x03c2)))
|
||||
|
||||
(pass-if "s4"
|
||||
(string=? s4 (string-ints #x03ba #x03b1 #x03b9))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "s1"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\Ð #\å #\ñ #\ß)))
|
||||
|
||||
(pass-if "s2"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\ô #\ç #\ò)))
|
||||
|
||||
(pass-if "s3"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\ê #\ñ #\é #\ô #\é #\ê #\Þ #\ò)))
|
||||
|
||||
(pass-if "s4"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\ê #\á #\é))))
|
||||
|
||||
;; Testing that the display of the string is output in the ISO-8859-7
|
||||
;; encoding
|
||||
(with-test-prefix "display"
|
||||
|
||||
(pass-if "s1"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-7")
|
||||
(display s1 pt)
|
||||
(list= eqv?
|
||||
(list #xd0 #xe5 #xf1 #xdf)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt)))))
|
||||
(pass-if "s2"
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-7")
|
||||
(display s2 pt)
|
||||
(list= eqv?
|
||||
(list #xf4 #xe7 #xf2)
|
||||
(u8vector->list
|
||||
(get-output-locale-u8vector pt))))))
|
||||
|
||||
(with-test-prefix "symbols == strings"
|
||||
|
||||
(pass-if "Ðåñß"
|
||||
(eq? (string->symbol s1) 'Ðåñß))
|
||||
|
||||
(pass-if "ôçò"
|
||||
(eq? (string->symbol s2) 'ôçò))
|
||||
|
||||
(pass-if "êñéôéêÞò"
|
||||
(eq? (string->symbol s3) 'êñéôéêÞò))
|
||||
|
||||
(pass-if "êáé"
|
||||
(eq? (string->symbol s4) 'êáé)))
|
||||
|
||||
(with-test-prefix "non-ascii variable names"
|
||||
|
||||
(pass-if "1"
|
||||
(let ((á 1)
|
||||
(ñ 2))
|
||||
(eq? (+ á ñ) 3))))
|
||||
|
||||
(with-test-prefix "output errors"
|
||||
|
||||
(pass-if-exception "char #x0400"
|
||||
exception:conversion
|
||||
(let ((pt (open-output-string)))
|
||||
(set-port-encoding! pt "ISO-8859-7")
|
||||
(set-port-conversion-strategy! pt 'error)
|
||||
(display (string-ints #x0400) pt))))
|
||||
|
||||
;; Reset locale
|
||||
(setlocale LC_ALL "C")
|
105
test-suite/tests/encoding-utf8.test
Normal file
105
test-suite/tests/encoding-utf8.test
Normal file
|
@ -0,0 +1,105 @@
|
|||
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: utf-8 -*-
|
||||
;;;;
|
||||
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
;;;;
|
||||
;;;; This program is free software; you can redistribute it and/or modify
|
||||
;;;; it under the terms of the GNU General Public License as published by
|
||||
;;;; the Free Software Foundation; either version 2, or (at your option)
|
||||
;;;; any later version.
|
||||
;;;;
|
||||
;;;; This program is distributed in the hope that it will be useful,
|
||||
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;;;; GNU General Public License for more details.
|
||||
;;;;
|
||||
;;;; You should have received a copy of the GNU General Public License
|
||||
;;;; along with this software; see the file COPYING. If not, write to
|
||||
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
;;;; Boston, MA 02110-1301 USA
|
||||
|
||||
(define-module (test-strings)
|
||||
#:use-module (test-suite lib)
|
||||
#:use-module (srfi srfi-1))
|
||||
|
||||
(define exception:conversion
|
||||
(cons 'misc-error "^cannot convert to output locale"))
|
||||
|
||||
;; Create a string from integer char values, eg. (string-ints 65) => "A"
|
||||
(define (string-ints . args)
|
||||
(apply string (map integer->char args)))
|
||||
|
||||
(setlocale LC_ALL "")
|
||||
|
||||
(define s1 "última")
|
||||
(define s2 "cédula")
|
||||
(define s3 "años")
|
||||
(define s4 "羅生門")
|
||||
|
||||
(with-test-prefix "string length"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string-length s1) 6))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string-length s2) 6))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string-length s3) 4))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(eq? (string-length s4) 3)))
|
||||
|
||||
(with-test-prefix "internal encoding"
|
||||
|
||||
(pass-if "última"
|
||||
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
|
||||
|
||||
(pass-if "años"
|
||||
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(string=? s4 (string-ints #x7f85 #x751f #x9580))))
|
||||
|
||||
(with-test-prefix "chars"
|
||||
|
||||
(pass-if "última"
|
||||
(list= eqv? (string->list s1)
|
||||
(list #\ú #\l #\t #\i #\m #\a)))
|
||||
|
||||
(pass-if "cédula"
|
||||
(list= eqv? (string->list s2)
|
||||
(list #\c #\é #\d #\u #\l #\a)))
|
||||
|
||||
(pass-if "años"
|
||||
(list= eqv? (string->list s3)
|
||||
(list #\a #\ñ #\o #\s)))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(list= eqv? (string->list s4)
|
||||
(list #\羅 #\生 #\門))))
|
||||
|
||||
(with-test-prefix "symbols == strings"
|
||||
|
||||
(pass-if "última"
|
||||
(eq? (string->symbol s1) 'última))
|
||||
|
||||
(pass-if "cédula"
|
||||
(eq? (string->symbol s2) 'cédula))
|
||||
|
||||
(pass-if "años"
|
||||
(eq? (string->symbol s3) 'años))
|
||||
|
||||
(pass-if "羅生門"
|
||||
(eq? (string->symbol s4) '羅生門)))
|
||||
|
||||
(with-test-prefix "non-ascii variable names"
|
||||
|
||||
(pass-if "1"
|
||||
(let ((芥川龍之介 1)
|
||||
(ñ 2))
|
||||
(eq? (+ 芥川龍之介 ñ) 3))))
|
||||
|
||||
|
|
@ -22,6 +22,7 @@
|
|||
;;;
|
||||
;;; miscellaneous
|
||||
;;;
|
||||
(setbinary)
|
||||
|
||||
(define exception:numerical-overflow
|
||||
(cons 'numerical-overflow "^Numerical overflow"))
|
||||
|
|
|
@ -33,6 +33,9 @@
|
|||
|
||||
;;;; Some general utilities for testing ports.
|
||||
|
||||
;;; Make sure we are set up for 8-bit data
|
||||
(setbinary)
|
||||
|
||||
;;; Read from PORT until EOF, and return the result as a string.
|
||||
(define (read-all port)
|
||||
(let loop ((chars '()))
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
;;; All these tests assume Guile 1.8's port system, where characters are
|
||||
;;; treated as octets.
|
||||
|
||||
;;; Set the default encoding of future ports to be binary
|
||||
(setbinary)
|
||||
|
||||
|
||||
(with-test-prefix "7.2.5 End-of-File Object"
|
||||
|
||||
|
|
|
@ -202,6 +202,11 @@
|
|||
(string=? (strftime "%Z" t)
|
||||
"ZOW")))
|
||||
|
||||
(pass-if "strftime passes wide characters"
|
||||
(let ((t (localtime (current-time))))
|
||||
(string=? (substring (strftime "\u0100%Z" t) 0 1)
|
||||
"\u0100")))
|
||||
|
||||
(with-test-prefix "C99 %z format"
|
||||
|
||||
;; %z here is quite possibly affected by the same tm:gmtoff vs current
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue