1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-20 11:40:18 +02:00

Merge commit 'origin/master'

Conflicts:
	libguile/unif.c
This commit is contained in:
Andy Wingo 2009-08-25 21:43:00 +02:00
commit c6a1380bde
33 changed files with 1980 additions and 375 deletions

View file

@ -24,7 +24,11 @@
#include "libguile/__scm.h"
#include "libguile/numbers.h"
#ifndef SCM_T_WCHAR_DEFINED
typedef scm_t_int32 scm_t_wchar;
#define SCM_T_WCHAR_DEFINED
#endif /* SCM_T_WCHAR_DEFINED */
/* Immediate Characters

View file

@ -1076,7 +1076,8 @@ SCM_DEFINE (scm_gentemp, "gentemp", 0, 2, 0,
{
char buf[MAX_PREFIX_LENGTH + SCM_INTBUFLEN];
char *name = buf;
int len, n_digits;
int n_digits;
size_t len;
scm_c_issue_deprecation_warning ("`gentemp' is deprecated. "
"Use `gensym' instead.");
@ -1090,7 +1091,7 @@ SCM_DEFINE (scm_gentemp, "gentemp", 0, 2, 0,
{
SCM_VALIDATE_STRING (1, prefix);
len = scm_i_string_length (prefix);
name = scm_to_locale_stringn (prefix, (size_t *)(&len));
name = scm_to_locale_stringn (prefix, &len);
name = scm_realloc (name, len + SCM_INTBUFLEN);
}

View file

@ -587,7 +587,7 @@ static void fport_flush (SCM port);
/* fill a port's read-buffer with a single read. returns the first
char or EOF if end of file. */
static int
static scm_t_wchar
fport_fill_input (SCM port)
{
long count;
@ -601,7 +601,7 @@ fport_fill_input (SCM port)
if (count == -1)
scm_syserror ("fport_fill_input");
if (count == 0)
return EOF;
return (scm_t_wchar) EOF;
else
{
pt->read_pos = pt->read_buf;

View file

@ -176,6 +176,8 @@ static SCM scm_unbound_p (SCM obj);
static SCM scm_assert_bound (SCM value, SCM obj);
static SCM scm_at_assert_bound_ref (SCM obj, SCM index);
static SCM scm_sys_goops_loaded (void);
static SCM scm_make_extended_class_from_symbol (SCM type_name_sym,
int applicablep);
/* This function is used for efficient type dispatch. */
SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0,
@ -281,9 +283,9 @@ SCM_DEFINE (scm_class_of, "class-of", 1, 0, 0,
else
{
SCM name = SCM_STRUCT_TABLE_NAME (SCM_CDR (handle));
SCM class = scm_make_extended_class (scm_is_true (name)
? scm_i_symbol_chars (name)
: 0,
SCM class = scm_make_extended_class_from_symbol (scm_is_true (name)
? name
: scm_nullstr,
SCM_I_OPERATORP (x));
SCM_SET_STRUCT_TABLE_CLASS (SCM_CDR (handle), class);
return class;
@ -1526,11 +1528,11 @@ wrap_init (SCM class, SCM *m, long n)
{
long i;
scm_t_bits slayout = SCM_STRUCT_DATA (class)[scm_vtable_index_layout];
const char *layout = scm_i_symbol_chars (SCM_PACK (slayout));
SCM layout = SCM_PACK (slayout);
/* Set all SCM-holding slots to unbound */
for (i = 0; i < n; i++)
if (layout[i*2] == 'p')
if (scm_i_symbol_ref (layout, i*2) == 'p')
m[i] = SCM_GOOPS_UNBOUND;
else
m[i] = 0;
@ -2680,6 +2682,34 @@ make_class_from_template (char const *template, char const *type_name, SCM super
return class;
}
static SCM
make_class_from_symbol (SCM type_name_sym, SCM supers, int applicablep)
{
SCM class, name;
if (type_name_sym != SCM_BOOL_F)
{
name = scm_string_append (scm_list_3 (scm_from_locale_string ("<"),
scm_symbol_to_string (type_name_sym),
scm_from_locale_string (">")));
name = scm_string_to_symbol (name);
}
else
name = SCM_GOOPS_UNBOUND;
class = scm_permanent_object (scm_basic_make_class (applicablep
? scm_class_procedure_class
: scm_class_class,
name,
supers,
SCM_EOL));
/* Only define name if doesn't already exist. */
if (!SCM_GOOPS_UNBOUNDP (name)
&& scm_is_false (scm_module_variable (scm_module_goops, name)))
DEFVAR (name, class);
return class;
}
SCM
scm_make_extended_class (char const *type_name, int applicablep)
{
@ -2691,6 +2721,16 @@ scm_make_extended_class (char const *type_name, int applicablep)
applicablep);
}
static SCM
scm_make_extended_class_from_symbol (SCM type_name_sym, int applicablep)
{
return make_class_from_symbol (type_name_sym,
scm_list_1 (applicablep
? scm_class_applicable
: scm_class_top),
applicablep);
}
void
scm_i_inherit_applicable (SCM c)
{
@ -2783,11 +2823,16 @@ static SCM
make_struct_class (void *closure SCM_UNUSED,
SCM vtable, SCM data, SCM prev SCM_UNUSED)
{
if (scm_is_true (SCM_STRUCT_TABLE_NAME (data)))
SCM_SET_STRUCT_TABLE_CLASS (data,
scm_make_extended_class
(scm_i_symbol_chars (SCM_STRUCT_TABLE_NAME (data)),
SCM_CLASS_FLAGS (vtable) & SCM_CLASSF_OPERATOR));
SCM sym = SCM_STRUCT_TABLE_NAME (data);
if (scm_is_true (sym))
{
int applicablep = SCM_CLASS_FLAGS (vtable) & SCM_CLASSF_OPERATOR;
SCM_SET_STRUCT_TABLE_CLASS (data,
scm_make_extended_class_from_symbol (sym, applicablep));
}
scm_remember_upto_here_2 (data, vtable);
return SCM_UNSPECIFIED;
}

View file

@ -87,7 +87,7 @@ SCM_API void scm_array_handle_set (scm_t_array_handle *h, ssize_t pos, SCM val);
SCM_API int scm_is_pair (SCM x);
SCM_API int scm_getc (SCM port);
SCM_API int scm_get_byte_or_eof (SCM port);
SCM_API void scm_putc (char c, SCM port);
SCM_API void scm_puts (const char *str_data, SCM port);
@ -299,7 +299,7 @@ scm_is_pair (SCM x)
SCM_C_EXTERN_INLINE
#endif
int
scm_getc (SCM port)
scm_get_byte_or_eof (SCM port)
{
int c;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
@ -319,27 +319,6 @@ scm_getc (SCM port)
c = *(pt->read_pos++);
switch (c)
{
case '\a':
break;
case '\b':
SCM_DECCOL (port);
break;
case '\n':
SCM_INCLINE (port);
break;
case '\r':
SCM_ZEROCOL (port);
break;
case '\t':
SCM_TABCOL (port);
break;
default:
SCM_INCCOL (port);
break;
}
return c;
}

View file

@ -85,6 +85,7 @@ SCM_DEFINE (scm_primitive_load, "primitive-load", 1, 0, 0,
#define FUNC_NAME s_scm_primitive_load
{
SCM hook = *scm_loc_load_hook;
char *encoding;
SCM_VALIDATE_STRING (1, filename);
if (scm_is_true (hook) && scm_is_false (scm_procedure_p (hook)))
SCM_MISC_ERROR ("value of %load-hook is neither a procedure nor #f",
@ -97,7 +98,15 @@ SCM_DEFINE (scm_primitive_load, "primitive-load", 1, 0, 0,
SCM port = scm_open_file (filename, scm_from_locale_string ("r"));
scm_dynwind_begin (SCM_F_DYNWIND_REWINDABLE);
scm_i_dynwind_current_load_port (port);
encoding = scm_scan_for_encoding (port);
if (encoding)
{
scm_i_set_port_encoding_x (port, encoding);
free (encoding);
}
else
/* The file has no encoding declaraed. We'll presume Latin-1. */
scm_i_set_port_encoding_x (port, NULL);
while (1)
{
SCM reader, form;

View file

@ -28,6 +28,11 @@
#include "libguile/__scm.h"
#include "libguile/print.h"
#ifndef SCM_T_WCHAR_DEFINED
typedef scm_t_int32 scm_t_wchar;
#define SCM_T_WCHAR_DEFINED
#endif /* SCM_T_WCHAR_DEFINED */
#if SCM_HAVE_FLOATINGPOINT_H
# include <floatingpoint.h>
#endif
@ -174,7 +179,6 @@ typedef struct scm_t_complex
double imag;
} scm_t_complex;
typedef scm_t_int32 scm_t_wchar;

View file

@ -30,6 +30,9 @@
#include <errno.h>
#include <fcntl.h> /* for chsize on mingw */
#include <assert.h>
#include <uniconv.h>
#include <unistr.h>
#include <striconveh.h>
#include "libguile/_scm.h"
#include "libguile/async.h"
@ -51,6 +54,7 @@
#include "libguile/vectors.h"
#include "libguile/weaks.h"
#include "libguile/fluids.h"
#include "libguile/eq.h"
#ifdef HAVE_STRING_H
#include <string.h>
@ -358,10 +362,10 @@ SCM_DEFINE (scm_drain_input, "drain-input", 1, 0, 0,
/* Standard ports --- current input, output, error, and more(!). */
static SCM cur_inport_fluid;
static SCM cur_outport_fluid;
static SCM cur_errport_fluid;
static SCM cur_loadport_fluid;
static SCM cur_inport_fluid = 0;
static SCM cur_outport_fluid = 0;
static SCM cur_errport_fluid = 0;
static SCM cur_loadport_fluid = 0;
SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
(),
@ -370,7 +374,10 @@ SCM_DEFINE (scm_current_input_port, "current-input-port", 0, 0, 0,
"returns the @dfn{standard input} in Unix and C terminology.")
#define FUNC_NAME s_scm_current_input_port
{
return scm_fluid_ref (cur_inport_fluid);
if (cur_inport_fluid)
return scm_fluid_ref (cur_inport_fluid);
else
return SCM_BOOL_F;
}
#undef FUNC_NAME
@ -382,7 +389,10 @@ SCM_DEFINE (scm_current_output_port, "current-output-port", 0, 0, 0,
"Unix and C terminology.")
#define FUNC_NAME s_scm_current_output_port
{
return scm_fluid_ref (cur_outport_fluid);
if (cur_outport_fluid)
return scm_fluid_ref (cur_outport_fluid);
else
return SCM_BOOL_F;
}
#undef FUNC_NAME
@ -392,7 +402,10 @@ SCM_DEFINE (scm_current_error_port, "current-error-port", 0, 0, 0,
"@dfn{standard error} in Unix and C terminology).")
#define FUNC_NAME s_scm_current_error_port
{
return scm_fluid_ref (cur_errport_fluid);
if (cur_errport_fluid)
return scm_fluid_ref (cur_errport_fluid);
else
return SCM_BOOL_F;
}
#undef FUNC_NAME
@ -509,10 +522,18 @@ scm_new_port_table_entry (scm_t_bits tag)
SCM z = scm_cons (SCM_EOL, SCM_EOL);
scm_t_port *entry = (scm_t_port *) scm_gc_calloc (sizeof (scm_t_port), "port");
const char *enc;
entry->file_name = SCM_BOOL_F;
entry->rw_active = SCM_PORT_NEITHER;
entry->port = z;
/* Initialize this port with the thread's current default
encoding. */
if ((enc = scm_i_get_port_encoding (SCM_BOOL_F)) == NULL)
entry->encoding = NULL;
else
entry->encoding = strdup (enc);
entry->ilseq_handler = scm_i_get_conversion_strategy (SCM_BOOL_F);
SCM_SET_CELL_TYPE (z, tag);
SCM_SETPTAB_ENTRY (z, entry);
@ -549,6 +570,11 @@ scm_i_remove_port (SCM port)
scm_t_port *p = SCM_PTAB_ENTRY (port);
if (p->putback_buf)
scm_gc_free (p->putback_buf, p->putback_buf_size, "putback buffer");
if (p->encoding)
{
free (p->encoding);
p->encoding = NULL;
}
scm_gc_free (p, sizeof (scm_t_port), "port");
SCM_SETPTAB_ENTRY (port, 0);
@ -632,21 +658,22 @@ SCM_DEFINE (scm_set_port_revealed_x, "set-port-revealed!", 2, 0, 0,
*/
static long
scm_i_mode_bits_n (const char *modes, size_t n)
scm_i_mode_bits_n (SCM modes)
{
return (SCM_OPN
| (memchr (modes, 'r', n) || memchr (modes, '+', n) ? SCM_RDNG : 0)
| ( memchr (modes, 'w', n)
|| memchr (modes, 'a', n)
|| memchr (modes, '+', n) ? SCM_WRTNG : 0)
| (memchr (modes, '0', n) ? SCM_BUF0 : 0)
| (memchr (modes, 'l', n) ? SCM_BUFLINE : 0));
| (scm_i_string_contains_char (modes, 'r')
|| scm_i_string_contains_char (modes, '+') ? SCM_RDNG : 0)
| (scm_i_string_contains_char (modes, 'w')
|| scm_i_string_contains_char (modes, 'a')
|| scm_i_string_contains_char (modes, '+') ? SCM_WRTNG : 0)
| (scm_i_string_contains_char (modes, '0') ? SCM_BUF0 : 0)
| (scm_i_string_contains_char (modes, 'l') ? SCM_BUFLINE : 0));
}
long
scm_mode_bits (char *modes)
{
return scm_i_mode_bits_n (modes, strlen (modes));
return scm_i_mode_bits (scm_from_locale_string (modes));
}
long
@ -657,8 +684,7 @@ scm_i_mode_bits (SCM modes)
if (!scm_is_string (modes))
scm_wrong_type_arg_msg (NULL, 0, modes, "string");
bits = scm_i_mode_bits_n (scm_i_string_chars (modes),
scm_i_string_length (modes));
bits = scm_i_mode_bits_n (modes);
scm_remember_upto_here_1 (modes);
return bits;
}
@ -929,7 +955,7 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
"characters are available, the end-of-file object is returned.")
#define FUNC_NAME s_scm_read_char
{
int c;
scm_t_wchar c;
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
SCM_VALIDATE_OPINPORT (1, port);
@ -940,6 +966,133 @@ SCM_DEFINE (scm_read_char, "read-char", 0, 1, 0,
}
#undef FUNC_NAME
#define SCM_MBCHAR_BUF_SIZE (4)
/* Get one codepoint from a file, using the port's encoding. */
scm_t_wchar
scm_getc (SCM port)
{
int c;
unsigned int bufcount = 0;
char buf[SCM_MBCHAR_BUF_SIZE];
scm_t_wchar codepoint = 0;
scm_t_uint32 *u32;
size_t u32len;
scm_t_port *pt = SCM_PTAB_ENTRY (port);
c = scm_get_byte_or_eof (port);
if (c == EOF)
return (scm_t_wchar) EOF;
buf[0] = c;
bufcount++;
if (pt->encoding == NULL)
{
/* The encoding is Latin-1: bytes are characters. */
codepoint = buf[0];
goto success;
}
for (;;)
{
u32 = u32_conv_from_encoding (pt->encoding,
(enum iconv_ilseq_handler) pt->ilseq_handler,
buf, bufcount, NULL, NULL, &u32len);
if (u32 == NULL || u32len == 0)
{
if (errno == ENOMEM)
scm_memory_error ("Input decoding");
/* Otherwise errno is EILSEQ or EINVAL, so perhaps more
bytes are needed. Keep looping. */
}
else
{
/* Complete codepoint found. */
codepoint = u32[0];
free (u32);
goto success;
}
if (bufcount == SCM_MBCHAR_BUF_SIZE)
{
/* We've read several bytes and didn't find a good
codepoint. Give up. */
goto failure;
}
c = scm_get_byte_or_eof (port);
if (c == EOF)
{
/* EOF before a complete character was read. Push it all
back and return EOF. */
while (bufcount > 0)
{
/* FIXME: this will probably cause errors in the port column. */
scm_unget_byte (buf[bufcount-1], port);
bufcount --;
}
return EOF;
}
if (c == '\n')
{
/* It is always invalid to have EOL in the middle of a
multibyte character. */
scm_unget_byte ('\n', port);
goto failure;
}
buf[bufcount++] = c;
}
success:
switch (codepoint)
{
case '\a':
break;
case '\b':
SCM_DECCOL (port);
break;
case '\n':
SCM_INCLINE (port);
break;
case '\r':
SCM_ZEROCOL (port);
break;
case '\t':
SCM_TABCOL (port);
break;
default:
SCM_INCCOL (port);
break;
}
return codepoint;
failure:
{
char *err_buf;
SCM err_str = scm_i_make_string (bufcount, &err_buf);
memcpy (err_buf, buf, bufcount);
if (errno == EILSEQ)
scm_misc_error (NULL, "input encoding error for ~s: ~s",
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
err_str));
else
scm_misc_error (NULL, "input encoding error (invalid) for ~s: ~s\n",
scm_list_2 (scm_from_locale_string (scm_i_get_port_encoding (port)),
err_str));
}
/* Never gets here. */
return 0;
}
/* this should only be called when the read buffer is empty. it
tries to refill the read buffer. it returns the first char from
the port, which is either EOF or *(pt->read_pos). */
@ -1027,7 +1180,7 @@ scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM port)
stringbuf_write_mutex. So, one shouldn't use scm_lfwrite_substr
if the stringbuf write mutex may still be held elsewhere. */
buf = scm_to_stringn (scm_c_substring (str, start, end), &len,
NULL, SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
pt->encoding, pt->ilseq_handler);
ptob->write (port, buf, len);
free (buf);
@ -1056,7 +1209,7 @@ scm_lfwrite_str (SCM str, SCM port)
scm_end_input (port);
buf = scm_to_stringn (str, &len,
NULL, SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
pt->encoding, pt->ilseq_handler);
ptob->write (port, buf, len);
free (buf);
@ -1257,8 +1410,8 @@ scm_end_input (SCM port)
void
scm_ungetc (int c, SCM port)
#define FUNC_NAME "scm_ungetc"
scm_unget_byte (int c, SCM port)
#define FUNC_NAME "scm_unget_byte"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
@ -1317,6 +1470,25 @@ scm_ungetc (int c, SCM port)
if (pt->rw_random)
pt->rw_active = SCM_PORT_READ;
}
#undef FUNC_NAME
void
scm_ungetc (scm_t_wchar c, SCM port)
#define FUNC_NAME "scm_ungetc"
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
scm_t_wchar *wbuf;
SCM str = scm_i_make_wide_string (1, &wbuf);
char *buf;
size_t len;
int i;
wbuf[0] = c;
buf = scm_to_stringn (str, &len, pt->encoding, pt->ilseq_handler);
for (i = len - 1; i >= 0; i--)
scm_unget_byte (buf[i], port);
if (c == '\n')
{
@ -1363,7 +1535,7 @@ SCM_DEFINE (scm_peek_char, "peek-char", 0, 1, 0,
"to @code{read-char} would have hung.")
#define FUNC_NAME s_scm_peek_char
{
int c, column;
scm_t_wchar c, column;
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
else
@ -1409,13 +1581,17 @@ SCM_DEFINE (scm_unread_string, "unread-string", 2, 0, 0,
"@var{port} is not supplied, the current-input-port is used.")
#define FUNC_NAME s_scm_unread_string
{
int n;
SCM_VALIDATE_STRING (1, str);
if (SCM_UNBNDP (port))
port = scm_current_input_port ();
else
SCM_VALIDATE_OPINPORT (2, port);
scm_ungets (scm_i_string_chars (str), scm_i_string_length (str), port);
n = scm_i_string_length (str);
while (n--)
scm_ungetc (scm_i_string_ref (str, n), port);
return str;
}
@ -1670,6 +1846,328 @@ SCM_DEFINE (scm_set_port_filename_x, "set-port-filename!", 2, 0, 0,
}
#undef FUNC_NAME
/* The default port encoding for this locale. New ports will have this
encoding. If it is a string, that is the encoding. If it #f, it
is in the native (Latin-1) encoding. */
SCM_GLOBAL_VARIABLE (scm_port_encoding_var, "%port-encoding");
static int scm_port_encoding_init = 0;
/* Return a C string representation of the current encoding. */
const char *
scm_i_get_port_encoding (SCM port)
{
SCM encoding;
if (scm_is_false (port))
{
if (!scm_port_encoding_init)
return NULL;
else if (!scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
return NULL;
else
{
encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_port_encoding_var));
if (!scm_is_string (encoding))
return NULL;
else
return scm_i_string_chars (encoding);
}
}
else
{
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
if (pt->encoding)
return pt->encoding;
else
return NULL;
}
}
/* Returns ENC is if is a recognized encoding. If it isn't, it tries
to find an alias of ENC that is valid. Otherwise, it returns
NULL. */
static const char *
find_valid_encoding (const char *enc)
{
int isvalid = 0;
const char str[] = " ";
scm_t_uint32 *u32;
size_t u32len;
u32 = u32_conv_from_encoding (enc, iconveh_error, str, 1,
NULL, NULL, &u32len);
isvalid = (u32 != NULL);
free (u32);
if (isvalid)
return enc;
return NULL;
}
void
scm_i_set_port_encoding_x (SCM port, const char *enc)
{
const char *valid_enc;
scm_t_port *pt;
/* Null is shorthand for the native, Latin-1 encoding. */
if (enc == NULL)
valid_enc = NULL;
else
{
valid_enc = find_valid_encoding (enc);
if (valid_enc == NULL)
{
SCM err;
err = scm_from_locale_string (enc);
scm_misc_error (NULL, "invalid or unknown character encoding ~s",
scm_list_1 (err));
}
}
if (scm_is_false (port))
{
/* Set the default encoding for future ports. */
if (!scm_port_encoding_init
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_port_encoding_var)))
scm_misc_error (NULL, "tried to set port encoding fluid before it is initialized",
SCM_EOL);
if (valid_enc == NULL
|| !strcmp (valid_enc, "ASCII")
|| !strcmp (valid_enc, "ANSI_X3.4-1968")
|| !strcmp (valid_enc, "ISO-8859-1"))
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
else
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var),
scm_from_locale_string (valid_enc));
}
else
{
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
if (pt->encoding)
free (pt->encoding);
if (valid_enc == NULL)
pt->encoding = NULL;
else
pt->encoding = strdup (valid_enc);
}
}
SCM_DEFINE (scm_port_encoding, "port-encoding", 1, 0, 0,
(SCM port),
"Returns, as a string, the character encoding that @var{port}\n"
"uses to interpret its input and output.\n")
#define FUNC_NAME s_scm_port_encoding
{
scm_t_port *pt;
const char *enc;
SCM_VALIDATE_PORT (1, port);
pt = SCM_PTAB_ENTRY (port);
enc = scm_i_get_port_encoding (port);
if (enc)
return scm_from_locale_string (pt->encoding);
else
return scm_from_locale_string ("NONE");
}
#undef FUNC_NAME
SCM_DEFINE (scm_set_port_encoding_x, "set-port-encoding!", 2, 0, 0,
(SCM port, SCM enc),
"Sets the character encoding that will be used to interpret all\n"
"port I/O. New ports are created with the encoding\n"
"appropriate for the current locale if @code{setlocale} has \n"
"been called or ISO-8859-1 otherwise\n"
"and this procedure can be used to modify that encoding.\n")
#define FUNC_NAME s_scm_set_port_encoding_x
{
char *enc_str;
const char *valid_enc_str;
SCM_VALIDATE_PORT (1, port);
SCM_VALIDATE_STRING (2, enc);
enc_str = scm_to_locale_string (enc);
valid_enc_str = find_valid_encoding (enc_str);
if (valid_enc_str == NULL)
{
free (enc_str);
scm_misc_error (FUNC_NAME, "invalid or unknown character encoding ~s",
scm_list_1 (enc));
}
else
{
scm_i_set_port_encoding_x (port, valid_enc_str);
free (enc_str);
}
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
/* This determines how conversions handle unconvertible characters. */
SCM_GLOBAL_VARIABLE (scm_conversion_strategy, "%port-conversion-strategy");
static int scm_conversion_strategy_init = 0;
scm_t_string_failed_conversion_handler
scm_i_get_conversion_strategy (SCM port)
{
SCM encoding;
if (scm_is_false (port))
{
if (!scm_conversion_strategy_init
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
return SCM_FAILED_CONVERSION_QUESTION_MARK;
else
{
encoding = scm_fluid_ref (SCM_VARIABLE_REF (scm_conversion_strategy));
if (scm_is_false (encoding))
return SCM_FAILED_CONVERSION_QUESTION_MARK;
else
return (scm_t_string_failed_conversion_handler) scm_to_int (encoding);
}
}
else
{
scm_t_port *pt;
pt = SCM_PTAB_ENTRY (port);
return pt->ilseq_handler;
}
}
void
scm_i_set_conversion_strategy_x (SCM port,
scm_t_string_failed_conversion_handler handler)
{
SCM strategy;
scm_t_port *pt;
strategy = scm_from_int ((int) handler);
if (scm_is_false (port))
{
/* Set the default encoding for future ports. */
if (!scm_conversion_strategy
|| !scm_is_fluid (SCM_VARIABLE_REF (scm_conversion_strategy)))
scm_misc_error (NULL, "tried to set conversion strategy fluid before it is initialized",
SCM_EOL);
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy), strategy);
}
else
{
/* Set the character encoding for this port. */
pt = SCM_PTAB_ENTRY (port);
pt->ilseq_handler = handler;
}
}
SCM_DEFINE (scm_port_conversion_strategy, "port-conversion-strategy",
1, 0, 0, (SCM port),
"Returns the behavior of the port when handling a character that\n"
"is not representable in the port's current encoding.\n"
"It returns the symbol @code{error} if unrepresentable characters\n"
"should cause exceptions, @code{substitute} if the port should\n"
"try to replace unrepresentable characters with question marks or\n"
"approximate characters, or @code{escape} if unrepresentable\n"
"characters should be converted to string escapes.\n"
"\n"
"If @var{port} is @code{#f}, then the current default behavior\n"
"will be returned. New ports will have this default behavior\n"
"when they are created.\n")
#define FUNC_NAME s_scm_port_conversion_strategy
{
scm_t_string_failed_conversion_handler h;
SCM_VALIDATE_OPPORT (1, port);
if (!scm_is_false (port))
{
SCM_VALIDATE_OPPORT (1, port);
}
h = scm_i_get_conversion_strategy (port);
if (h == SCM_FAILED_CONVERSION_ERROR)
return scm_from_locale_symbol ("error");
else if (h == SCM_FAILED_CONVERSION_QUESTION_MARK)
return scm_from_locale_symbol ("substitute");
else if (h == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
return scm_from_locale_symbol ("escape");
else
abort ();
/* Never gets here. */
return SCM_UNDEFINED;
}
#undef FUNC_NAME
SCM_DEFINE (scm_set_port_conversion_strategy_x, "set-port-conversion-strategy!",
2, 0, 0,
(SCM port, SCM sym),
"Sets the behavior of the interpreter when outputting a character\n"
"that is not representable in the port's current encoding.\n"
"@var{sym} can be either @code{'error}, @code{'substitute}, or\n"
"@code{'escape}. If it is @code{'error}, an error will be thrown\n"
"when an unconvertible character is encountered. If it is\n"
"@code{'substitute}, then unconvertible characters will \n"
"be replaced with approximate characters, or with question marks\n"
"if no approximately correct character is available.\n"
"If it is @code{'escape},\n"
"it will appear as a hex escape when output.\n"
"\n"
"If @var{port} is an open port, the conversion error behavior\n"
"is set for that port. If it is @code{#f}, it is set as the\n"
"default behavior for any future ports that get created in\n"
"this thread.\n")
#define FUNC_NAME s_scm_set_port_conversion_strategy_x
{
SCM err;
SCM qm;
SCM esc;
if (!scm_is_false (port))
{
SCM_VALIDATE_OPPORT (1, port);
}
err = scm_from_locale_symbol ("error");
if (scm_is_true (scm_eqv_p (sym, err)))
{
scm_i_set_conversion_strategy_x (port, SCM_FAILED_CONVERSION_ERROR);
return SCM_UNSPECIFIED;
}
qm = scm_from_locale_symbol ("substitute");
if (scm_is_true (scm_eqv_p (sym, qm)))
{
scm_i_set_conversion_strategy_x (port,
SCM_FAILED_CONVERSION_QUESTION_MARK);
return SCM_UNSPECIFIED;
}
esc = scm_from_locale_symbol ("escape");
if (scm_is_true (scm_eqv_p (sym, esc)))
{
scm_i_set_conversion_strategy_x (port,
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
return SCM_UNSPECIFIED;
}
SCM_MISC_ERROR ("unknown conversion behavior ~s", scm_list_1 (sym));
return SCM_UNSPECIFIED;
}
#undef FUNC_NAME
void
scm_print_port_mode (SCM exp, SCM port)
{
@ -1780,8 +2278,17 @@ scm_init_ports ()
cur_loadport_fluid = scm_permanent_object (scm_make_fluid ());
scm_i_port_weak_hash = scm_permanent_object (scm_make_weak_key_hash_table (SCM_I_MAKINUM(31)));
#include "libguile/ports.x"
SCM_VARIABLE_SET (scm_port_encoding_var, scm_make_fluid ());
scm_fluid_set_x (SCM_VARIABLE_REF (scm_port_encoding_var), SCM_BOOL_F);
scm_port_encoding_init = 1;
SCM_VARIABLE_SET (scm_conversion_strategy, scm_make_fluid ());
scm_fluid_set_x (SCM_VARIABLE_REF (scm_conversion_strategy),
scm_from_int ((int) SCM_FAILED_CONVERSION_QUESTION_MARK));
scm_conversion_strategy_init = 1;
}
/*

View file

@ -28,7 +28,7 @@
#include "libguile/print.h"
#include "libguile/struct.h"
#include "libguile/threads.h"
#include "libguile/strings.h"
@ -56,6 +56,10 @@ typedef struct
long line_number; /* debugging support. */
int column_number; /* debugging support. */
/* Character encoding support */
char *encoding;
scm_t_string_failed_conversion_handler ilseq_handler;
/* port buffers. the buffer(s) are set up for all ports.
in the case of string ports, the buffer is the string itself.
in the case of unbuffered file ports, the buffer is a
@ -266,6 +270,7 @@ SCM_API SCM scm_eof_object_p (SCM x);
SCM_API SCM scm_force_output (SCM port);
SCM_API SCM scm_flush_all_ports (void);
SCM_API SCM scm_read_char (SCM port);
SCM_API scm_t_wchar scm_getc (SCM port);
SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
@ -275,7 +280,8 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end,
SCM_API void scm_flush (SCM port);
SCM_API void scm_end_input (SCM port);
SCM_API int scm_fill_input (SCM port);
SCM_API void scm_ungetc (int c, SCM port);
SCM_INTERNAL void scm_unget_byte (int c, SCM port);
SCM_API void scm_ungetc (scm_t_wchar c, SCM port);
SCM_API void scm_ungets (const char *s, int n, SCM port);
SCM_API SCM scm_peek_char (SCM port);
SCM_API SCM scm_unread_char (SCM cobj, SCM port);
@ -288,6 +294,15 @@ SCM_API SCM scm_port_column (SCM port);
SCM_API SCM scm_set_port_column_x (SCM port, SCM line);
SCM_API SCM scm_port_filename (SCM port);
SCM_API SCM scm_set_port_filename_x (SCM port, SCM filename);
SCM_INTERNAL const char *scm_i_get_port_encoding (SCM port);
SCM_INTERNAL void scm_i_set_port_encoding_x (SCM port, const char *str);
SCM_API SCM scm_port_encoding (SCM port);
SCM_API SCM scm_set_port_encoding_x (SCM port, SCM encoding);
SCM_INTERNAL scm_t_string_failed_conversion_handler scm_i_get_conversion_strategy (SCM port);
SCM_INTERNAL void scm_i_set_conversion_strategy_x (SCM port,
scm_t_string_failed_conversion_handler h);
SCM_API SCM scm_port_conversion_strategy (SCM port);
SCM_API SCM scm_set_port_conversion_strategy_x (SCM port, SCM behavior);
SCM_API int scm_port_print (SCM exp, SCM port, scm_print_state *);
SCM_API void scm_print_port_mode (SCM exp, SCM port);
SCM_API void scm_ports_prehistory (void);
@ -295,7 +310,6 @@ SCM_API SCM scm_void_port (char * mode_str);
SCM_API SCM scm_sys_make_void_port (SCM mode);
SCM_INTERNAL void scm_init_ports (void);
#if SCM_ENABLE_DEPRECATED==1
SCM_API scm_t_port * scm_add_to_port_table (SCM port);
#endif

View file

@ -25,6 +25,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <uniconv.h>
#include "libguile/_scm.h"
#include "libguile/dynwind.h"
@ -1528,12 +1529,17 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
"Otherwise the specified locale category is set to the string\n"
"@var{locale} and the new value is returned as a\n"
"system-dependent string. If @var{locale} is an empty string,\n"
"the locale will be set using environment variables.")
"the locale will be set using environment variables.\n"
"\n"
"When the locale is changed, the character encoding of the new\n"
"locale (UTF-8, ISO-8859-1, etc.) is used for the current\n"
"input, output, and error ports\n")
#define FUNC_NAME s_scm_setlocale
{
int c_category;
char *clocale;
char *rv;
const char *enc;
scm_dynwind_begin (0);
@ -1562,15 +1568,47 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
SCM_SYSERROR;
}
/* Recompute the standard SRFI-14 character sets in a locale-dependent
(actually charset-dependent) way. */
scm_srfi_14_compute_char_sets ();
enc = locale_charset ();
/* Set the default encoding for new ports. */
scm_i_set_port_encoding_x (SCM_BOOL_F, enc);
/* Set the encoding for the stdio ports. */
scm_i_set_port_encoding_x (scm_current_input_port (), enc);
scm_i_set_port_encoding_x (scm_current_output_port (), enc);
scm_i_set_port_encoding_x (scm_current_error_port (), enc);
scm_dynwind_end ();
return scm_from_locale_string (rv);
}
#undef FUNC_NAME
#endif /* HAVE_SETLOCALE */
SCM_DEFINE (scm_setbinary, "setbinary", 0, 0, 0,
(void),
"Sets the encoding for the current input, output, and error\n"
"ports to ISO-8859-1. That character encoding allows\n"
"ports to operate on binary data.\n"
"\n"
"It also sets the default encoding for newly created ports\n"
"to ISO-8859-1.\n"
"\n"
"The previous default encoding for new ports is returned\n")
#define FUNC_NAME s_scm_setbinary
{
const char *enc = scm_i_get_port_encoding (SCM_BOOL_F);
/* Set the default encoding for new ports. */
scm_i_set_port_encoding_x (SCM_BOOL_F, NULL);
/* Set the encoding for the stdio ports. */
scm_i_set_port_encoding_x (scm_current_input_port (), NULL);
scm_i_set_port_encoding_x (scm_current_output_port (), NULL);
scm_i_set_port_encoding_x (scm_current_error_port (), NULL);
if (enc)
return scm_from_locale_string (enc);
return scm_from_locale_string ("ISO-8859-1");
}
#undef FUNC_NAME
#ifdef HAVE_MKNOD
SCM_DEFINE (scm_mknod, "mknod", 4, 0, 0,

View file

@ -74,6 +74,7 @@ SCM_API SCM scm_access (SCM path, SCM how);
SCM_API SCM scm_getpid (void);
SCM_API SCM scm_putenv (SCM str);
SCM_API SCM scm_setlocale (SCM category, SCM locale);
SCM_API SCM scm_setbinary (void);
SCM_API SCM scm_mknod (SCM path, SCM type, SCM perms, SCM dev);
SCM_API SCM scm_nice (SCM incr);
SCM_API SCM scm_sync (void);

View file

@ -462,20 +462,45 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
| UC_CATEGORY_MASK_S))
/* Print the character if is graphic character. */
{
if (i<256)
/* Character is graphic. Print it. */
scm_putc (i, port);
scm_t_wchar *wbuf;
SCM wstr = scm_i_make_wide_string (1, &wbuf);
char *buf;
size_t len;
const char *enc;
enc = scm_i_get_port_encoding (port);
wbuf[0] = i;
if (enc == NULL && i <= 0xFF)
{
/* Character is graphic and Latin-1. Print it */
scm_lfwrite_str (wstr, port);
}
else
/* Character is graphic but unrepresentable in
this port's encoding. */
scm_intprint (i, 8, port);
{
buf = u32_conv_to_encoding (enc,
iconveh_error,
(scm_t_uint32 *) wbuf,
1,
NULL,
NULL, &len);
if (buf != NULL)
{
/* Character is graphic. Print it. */
scm_lfwrite_str (wstr, port);
free (buf);
}
else
/* Character is graphic but unrepresentable in
this port's encoding. */
scm_intprint (i, 8, port);
}
}
else
/* Character is a non-graphical character. */
scm_intprint (i, 8, port);
}
else
scm_putc (i, port);
scm_i_charprint (i, port);
}
else if (SCM_IFLAGP (exp)
&& ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
@ -607,21 +632,32 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
SCM wstr = scm_i_make_wide_string (1, &wbuf);
char *buf;
size_t len;
wbuf[0] = ch;
buf = u32_conv_to_encoding ("ISO-8859-1",
iconveh_error,
(scm_t_uint32 *) wbuf,
1, NULL, NULL, &len);
if (buf != NULL)
if (scm_i_get_port_encoding (port))
{
/* Character is graphic and representable in
this encoding. Print it. */
scm_lfwrite_str (wstr, port);
free (buf);
printed = 1;
wstr = scm_i_make_wide_string (1, &wbuf);
wbuf[0] = ch;
buf = u32_conv_to_encoding (scm_i_get_port_encoding (port),
iconveh_error,
(scm_t_uint32 *) wbuf,
1 ,
NULL,
NULL, &len);
if (buf != NULL)
{
/* Character is graphic and representable in
this encoding. Print it. */
scm_lfwrite_str (wstr, port);
free (buf);
printed = 1;
}
}
else
if (ch <= 0xFF)
{
scm_putc (ch, port);
printed = 1;
}
}
if (!printed)
@ -834,7 +870,7 @@ scm_prin1 (SCM exp, SCM port, int writingp)
/* Print a character.
*/
void
scm_i_charprint (scm_t_uint32 ch, SCM port)
scm_i_charprint (scm_t_wchar ch, SCM port)
{
scm_t_wchar *wbuf;
SCM wstr = scm_i_make_wide_string (1, &wbuf);
@ -1056,9 +1092,7 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
SCM port, answer = SCM_UNSPECIFIED;
int fReturnString = 0;
int writingp;
const char *start;
const char *end;
const char *p;
size_t start, p, end;
if (scm_is_eq (destination, SCM_BOOL_T))
{
@ -1081,15 +1115,16 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
SCM_VALIDATE_STRING (2, message);
SCM_VALIDATE_REST_ARGUMENT (args);
start = scm_i_string_chars (message);
end = start + scm_i_string_length (message);
p = 0;
start = 0;
end = scm_i_string_length (message);
for (p = start; p != end; ++p)
if (*p == '~')
if (scm_i_string_ref (message, p) == '~')
{
if (++p == end)
break;
switch (*p)
switch (scm_i_string_ref (message, p))
{
case 'A': case 'a':
writingp = 0;
@ -1098,33 +1133,33 @@ SCM_DEFINE (scm_simple_format, "simple-format", 2, 0, 1,
writingp = 1;
break;
case '~':
scm_lfwrite (start, p - start, port);
scm_lfwrite_substr (message, start, p, port);
start = p + 1;
continue;
case '%':
scm_lfwrite (start, p - start - 1, port);
scm_lfwrite_substr (message, start, p - 1, port);
scm_newline (port);
start = p + 1;
continue;
default:
SCM_MISC_ERROR ("FORMAT: Unsupported format option ~~~A - use (ice-9 format) instead",
scm_list_1 (SCM_MAKE_CHAR (*p)));
scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p))));
}
if (!scm_is_pair (args))
SCM_MISC_ERROR ("FORMAT: Missing argument for ~~~A",
scm_list_1 (SCM_MAKE_CHAR (*p)));
scm_list_1 (SCM_MAKE_CHAR (scm_i_string_ref (message, p))));
scm_lfwrite (start, p - start - 1, port);
scm_lfwrite_substr (message, start, p - 1, port);
/* we pass destination here */
scm_prin1 (SCM_CAR (args), destination, writingp);
args = SCM_CDR (args);
start = p + 1;
}
scm_lfwrite (start, p - start, port);
scm_lfwrite_substr (message, start, p, port);
if (!scm_is_eq (args, SCM_EOL))
SCM_MISC_ERROR ("FORMAT: ~A superfluous arguments",
scm_list_1 (scm_length (args)));

View file

@ -25,6 +25,7 @@
#include "libguile/__scm.h"
#include "libguile/chars.h"
#include "libguile/options.h"
@ -77,7 +78,7 @@ SCM_API SCM scm_print_options (SCM setting);
SCM_API SCM scm_make_print_state (void);
SCM_API void scm_free_print_state (SCM print_state);
SCM_INTERNAL SCM scm_i_port_with_print_state (SCM port, SCM print_state);
SCM_INTERNAL void scm_i_charprint (scm_t_uint32 c, SCM port);
SCM_INTERNAL void scm_i_charprint (scm_t_wchar c, SCM port);
SCM_API void scm_intprint (scm_t_intmax n, int radix, SCM port);
SCM_API void scm_uintprint (scm_t_uintmax n, int radix, SCM port);
SCM_API void scm_ipruk (char *hdr, SCM ptr, SCM port);

View file

@ -59,12 +59,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
size_t j;
size_t cstart;
size_t cend;
int c;
const char *cdelims;
scm_t_wchar c;
size_t num_delims;
SCM_VALIDATE_STRING (1, delims);
cdelims = scm_i_string_chars (delims);
num_delims = scm_i_string_length (delims);
SCM_VALIDATE_STRING (2, str);
@ -83,7 +81,7 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
c = scm_getc (port);
for (k = 0; k < num_delims; k++)
{
if (cdelims[k] == c)
if (scm_i_string_ref (delims, k) == c)
{
if (scm_is_false (gobble))
scm_ungetc (c, port);

View file

@ -27,6 +27,8 @@
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include <unicase.h>
#include "libguile/_scm.h"
#include "libguile/bytevectors.h"
@ -178,11 +180,6 @@ static SCM *scm_read_hash_procedures;
(((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
|| ((_chr) == 'd') || ((_chr) == 'l'))
/* An inlinable version of `scm_c_downcase ()'. */
#define CHAR_DOWNCASE(_chr) \
(((_chr) <= UCHAR_MAX) ? tolower ((int) (_chr)) : (_chr))
/* Read an SCSH block comment. */
static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
static SCM scm_read_commented_expression (int chr, SCM port);
@ -190,41 +187,69 @@ static SCM scm_read_commented_expression (int chr, SCM port);
/* Read from PORT until a delimiter (e.g., a whitespace) is read. Return
zero if the whole token fits in BUF, non-zero otherwise. */
static inline int
read_token (SCM port, char *buf, size_t buf_size, size_t *read)
read_token (SCM port, SCM buf, size_t *read)
{
scm_t_wchar chr;
*read = 0;
while (*read < buf_size)
buf = scm_i_string_start_writing (buf);
while (*read < scm_i_string_length (buf))
{
int chr;
chr = scm_getc (port);
chr = (SCM_CASE_INSENSITIVE_P ? CHAR_DOWNCASE (chr) : chr);
if (chr == EOF)
return 0;
else if (CHAR_IS_DELIMITER (chr))
{
scm_i_string_stop_writing ();
return 0;
}
chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);
if (CHAR_IS_DELIMITER (chr))
{
scm_i_string_stop_writing ();
scm_ungetc (chr, port);
return 0;
}
else
{
*buf = (char) chr;
buf++, (*read)++;
}
scm_i_string_set_x (buf, *read, chr);
(*read)++;
}
scm_i_string_stop_writing ();
return 1;
}
static SCM
read_complete_token (SCM port, size_t *read)
{
SCM buffer, str = SCM_EOL;
size_t len;
int overflow;
buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
overflow = read_token (port, buffer, read);
if (!overflow)
return scm_i_substring (buffer, 0, *read);
str = scm_string_copy (buffer);
do
{
overflow = read_token (port, buffer, &len);
str = scm_string_append (scm_list_2 (str, buffer));
*read += len;
}
while (overflow);
return scm_i_substring (str, 0, *read);
}
/* Skip whitespace from PORT and return the first non-whitespace character
read. Raise an error on end-of-file. */
static int
flush_ws (SCM port, const char *eoferr)
{
register int c;
register scm_t_wchar c;
while (1)
switch (c = scm_getc (port))
{
@ -293,7 +318,7 @@ static SCM recsexpr (SCM obj, long line, int column, SCM filename);
static SCM
scm_read_sexp (int chr, SCM port)
scm_read_sexp (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_i_lreadparen"
{
register int c;
@ -554,107 +579,52 @@ scm_read_string (int chr, SCM port)
static SCM
scm_read_number (int chr, SCM port)
scm_read_number (scm_t_wchar chr, SCM port)
{
SCM result, str = SCM_EOL;
char buffer[READER_BUFFER_SIZE];
SCM result;
SCM buffer;
size_t read;
int overflow = 0;
scm_ungetc (chr, port);
do
{
overflow = read_token (port, buffer, sizeof (buffer), &read);
if ((overflow) || (scm_is_pair (str)))
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
}
while (overflow);
if (scm_is_pair (str))
{
/* The slow path. */
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
result = scm_string_to_number (str, SCM_UNDEFINED);
if (!scm_is_true (result))
/* Return a symbol instead of a number. */
result = scm_string_to_symbol (str);
}
else
{
result = scm_c_locale_stringn_to_number (buffer, read, 10);
if (!scm_is_true (result))
/* Return a symbol instead of a number. */
result = scm_from_locale_symboln (buffer, read);
}
buffer = read_complete_token (port, &read);
result = scm_string_to_number (buffer, SCM_UNDEFINED);
if (!scm_is_true (result))
/* Return a symbol instead of a number. */
result = scm_string_to_symbol (buffer);
return result;
}
static SCM
scm_read_mixed_case_symbol (int chr, SCM port)
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
{
SCM result, str = SCM_EOL;
int overflow = 0, ends_with_colon = 0;
char buffer[READER_BUFFER_SIZE];
SCM result;
int ends_with_colon = 0;
SCM buffer;
size_t read = 0;
int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
scm_ungetc (chr, port);
do
{
overflow = read_token (port, buffer, sizeof (buffer), &read);
buffer = read_complete_token (port, &read);
if (read > 0)
ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';
if (read > 0)
ends_with_colon = (buffer[read - 1] == ':');
if ((overflow) || (scm_is_pair (str)))
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
}
while (overflow);
if (scm_is_pair (str))
{
size_t len;
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
len = scm_c_string_length (str);
/* Per SRFI-88, `:' alone is an identifier, not a keyword. */
if (postfix && ends_with_colon && (len > 1))
{
/* Strip off colon. */
str = scm_c_substring (str, 0, len-1);
result = scm_string_to_symbol (str);
result = scm_symbol_to_keyword (result);
}
else
result = scm_string_to_symbol (str);
}
if (postfix && ends_with_colon && (read > 1))
result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
else
{
/* For symbols smaller than `sizeof (buffer)', we don't need to recur
to Scheme strings. Therefore, we only create one Scheme object (a
symbol) per symbol read. */
if (postfix && ends_with_colon && (read > 1))
result = scm_from_locale_keywordn (buffer, read - 1);
else
result = scm_from_locale_symboln (buffer, read);
}
result = scm_string_to_symbol (buffer);
return result;
}
static SCM
scm_read_number_and_radix (int chr, SCM port)
scm_read_number_and_radix (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
SCM result, str = SCM_EOL;
SCM result;
size_t read;
char buffer[READER_BUFFER_SIZE];
SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
unsigned int radix;
int overflow = 0;
switch (chr)
{
@ -684,22 +654,8 @@ scm_read_number_and_radix (int chr, SCM port)
radix = 10;
}
do
{
overflow = read_token (port, buffer, sizeof (buffer), &read);
if ((overflow) || (scm_is_pair (str)))
str = scm_cons (scm_from_locale_stringn (buffer, read), str);
}
while (overflow);
if (scm_is_pair (str))
{
str = scm_string_concatenate (scm_reverse_x (str, SCM_EOL));
result = scm_string_to_number (str, scm_from_uint (radix));
}
else
result = scm_c_locale_stringn_to_number (buffer, read, radix);
buffer = read_complete_token (port, &read);
result = scm_string_to_number (buffer, scm_from_uint (radix));
if (scm_is_true (result))
return result;
@ -729,7 +685,7 @@ scm_read_quote (int chr, SCM port)
case ',':
{
int c;
scm_t_wchar c;
c = scm_getc (port);
if ('@' == c)
@ -828,7 +784,10 @@ scm_read_semicolon_comment (int chr, SCM port)
{
int c;
for (c = scm_getc (port);
/* We use the get_byte here because there is no need to get the
locale correct with comment input. This presumes that newline
always represents itself no matter what the encoding is. */
for (c = scm_get_byte_or_eof (port);
(c != EOF) && (c != '\n');
c = scm_getc (port));
@ -856,14 +815,19 @@ scm_read_boolean (int chr, SCM port)
}
static SCM
scm_read_character (int chr, SCM port)
scm_read_character (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
SCM ch;
char charname[READER_CHAR_NAME_MAX_SIZE];
SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
size_t charname_len;
scm_t_wchar cp;
int overflow;
if (read_token (port, charname, sizeof (charname), &charname_len))
overflow = read_token (port, charname, &charname_len);
charname = scm_c_substring (charname, 0, charname_len);
if (overflow)
goto char_error;
if (charname_len == 0)
@ -878,28 +842,33 @@ scm_read_character (int chr, SCM port)
}
if (charname_len == 1)
return SCM_MAKE_CHAR (charname[0]);
return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
if (*charname >= '0' && *charname < '8')
cp = scm_i_string_ref (charname, 0);
if (cp >= '0' && cp < '8')
{
/* Dirk:FIXME:: This type of character syntax is not R5RS
* compliant. Further, it should be verified that the constant
* does only consist of octal digits. Finally, it should be
* checked whether the resulting fixnum is in the range of
* characters. */
SCM p = scm_c_locale_stringn_to_number (charname, charname_len, 8);
SCM p = scm_string_to_number (charname, scm_from_uint (8));
if (SCM_I_INUMP (p))
return SCM_MAKE_CHAR (SCM_I_INUM (p));
}
ch = scm_i_charname_to_char (charname, charname_len);
/* The names of characters should never have non-Latin1
characters. */
if (scm_i_is_narrow_string (charname)
|| scm_i_try_narrow_string (charname))
ch = scm_i_charname_to_char (scm_i_string_chars (charname),
charname_len);
if (scm_is_true (ch))
return ch;
char_error:
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
scm_list_1 (scm_from_locale_stringn (charname,
charname_len)));
scm_list_1 (charname));
return SCM_UNSPECIFIED;
}
@ -941,7 +910,7 @@ scm_read_srfi4_vector (int chr, SCM port)
}
static SCM
scm_read_bytevector (int chr, SCM port)
scm_read_bytevector (scm_t_wchar chr, SCM port)
{
chr = scm_getc (port);
if (chr != 'u')
@ -965,7 +934,7 @@ scm_read_bytevector (int chr, SCM port)
}
static SCM
scm_read_guile_bit_vector (int chr, SCM port)
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
{
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
terribly inefficient but who cares? */
@ -985,13 +954,17 @@ scm_read_guile_bit_vector (int chr, SCM port)
}
static inline SCM
scm_read_scsh_block_comment (int chr, SCM port)
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
{
int bang_seen = 0;
/* We can use the get_byte here because there is no need to get the
locale correct when reading comments. This presumes that
hash and exclamation points always represent themselves no
matter what the source encoding is.*/
for (;;)
{
int c = scm_getc (port);
int c = scm_get_byte_or_eof (port);
if (c == EOF)
scm_i_input_error ("skip_block_comment", port,
@ -1009,9 +982,9 @@ scm_read_scsh_block_comment (int chr, SCM port)
}
static SCM
scm_read_commented_expression (int chr, SCM port)
scm_read_commented_expression (scm_t_wchar chr, SCM port)
{
int c;
scm_t_wchar c;
c = flush_ws (port, (char *) NULL);
if (EOF == c)
@ -1023,19 +996,18 @@ scm_read_commented_expression (int chr, SCM port)
}
static SCM
scm_read_extended_symbol (int chr, SCM port)
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
{
/* Guile's extended symbol read syntax looks like this:
#{This is all a symbol name}#
So here, CHR is expected to be `{'. */
SCM result;
int saw_brace = 0, finished = 0;
size_t len = 0;
char buf[1024];
SCM buf = scm_i_make_string (1024, NULL);
result = scm_c_make_string (0, SCM_MAKE_CHAR ('X'));
buf = scm_i_string_start_writing (buf);
while ((chr = scm_getc (port)) != EOF)
{
@ -1049,32 +1021,30 @@ scm_read_extended_symbol (int chr, SCM port)
else
{
saw_brace = 0;
buf[len++] = '}';
buf[len++] = chr;
scm_i_string_set_x (buf, len++, '}');
scm_i_string_set_x (buf, len++, chr);
}
}
else if (chr == '}')
saw_brace = 1;
else
buf[len++] = chr;
scm_i_string_set_x (buf, len++, chr);
if (len >= sizeof (buf) - 2)
if (len >= scm_i_string_length (buf) - 2)
{
scm_string_append (scm_list_2 (result,
scm_from_locale_stringn (buf, len)));
scm_i_string_stop_writing ();
SCM addy = scm_i_make_string (1024, NULL);
buf = scm_string_append (scm_list_2 (buf, addy));
len = 0;
buf = scm_i_string_start_writing (buf);
}
if (finished)
break;
}
scm_i_string_stop_writing ();
if (len)
result = scm_string_append (scm_list_2
(result,
scm_from_locale_stringn (buf, len)));
return (scm_string_to_symbol (result));
return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
}
@ -1110,7 +1080,7 @@ scm_read_sharp_extension (int chr, SCM port)
/* The reader for the sharp `#' character. It basically dispatches reads
among the above token readers. */
static SCM
scm_read_sharp (int chr, SCM port)
scm_read_sharp (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
SCM result;
@ -1162,7 +1132,7 @@ scm_read_sharp (int chr, SCM port)
{
/* When next char is '(', it really is an old-style
uniform array. */
int next_c = scm_getc (port);
scm_t_wchar next_c = scm_getc (port);
if (next_c != EOF)
scm_ungetc (next_c, port);
if (next_c == '(')
@ -1210,7 +1180,7 @@ scm_read_expression (SCM port)
{
while (1)
{
register int chr;
register scm_t_wchar chr;
chr = scm_getc (port);
@ -1421,6 +1391,127 @@ scm_get_hash_procedure (int c)
}
}
#define SCM_ENCODING_SEARCH_SIZE (500)
/* Search the first few hundred characters of a file for
an emacs-like coding declaration. */
char *
scm_scan_for_encoding (SCM port)
{
char header[SCM_ENCODING_SEARCH_SIZE+1];
size_t bytes_read;
char *encoding = NULL;
int utf8_bom = 0;
char *pos;
int i;
int in_comment;
bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
if (bytes_read > 3
&& header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
utf8_bom = 1;
/* search past "coding[:=]" */
pos = header;
while (1)
{
if ((pos = strstr(pos, "coding")) == NULL)
return NULL;
pos += strlen("coding");
if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
(*pos == ':' || *pos == '='))
{
pos ++;
break;
}
}
/* skip spaces */
while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
(*pos == ' ' || *pos == '\t'))
pos ++;
/* grab the next token */
i = 0;
while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
&& (isalnum(pos[i]) || pos[i] == '_' || pos[i] == '-' || pos[i] == '.'))
i++;
if (i == 0)
return NULL;
encoding = scm_malloc (i+1);
memcpy (encoding, pos, i);
encoding[i] ='\0';
for (i = 0; i < strlen(encoding); i++)
encoding[i] = toupper(encoding[i]);
/* push backwards to make sure we were in a comment */
in_comment = 0;
while (pos - i - header > 0)
{
if (*(pos - i) == '\n')
{
/* This wasn't in a semicolon comment. Check for a
hash-bang comment. */
char *beg = strstr (header, "#!");
char *end = strstr (header, "!#");
if (beg < pos && pos < end)
in_comment = 1;
break;
}
if (*(pos - i) == ';')
{
in_comment = 1;
break;
}
i ++;
}
if (!in_comment)
{
/* This wasn't in a comment */
free (encoding);
return NULL;
}
if (utf8_bom && strcmp(encoding, "UTF-8"))
scm_misc_error (NULL,
"the port input declares the encoding ~s but is encoded as UTF-8",
scm_list_1 (scm_from_locale_string (encoding)));
return encoding;
}
SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
(SCM port),
"Scans the port for an EMACS-like character coding declaration\n"
"near the top of the contents of a port with random-acessible contents.\n"
"The coding declaration is of the form\n"
"@code{coding: XXXXX} and must appear in a scheme comment.\n"
"\n"
"Returns a string containing the character encoding of the file\n"
"if a declaration was found, or @code{#f} otherwise.\n")
#define FUNC_NAME s_scm_file_encoding
{
char *enc;
SCM s_enc;
enc = scm_scan_for_encoding (port);
if (enc == NULL)
return SCM_BOOL_F;
else
{
s_enc = scm_from_locale_string (enc);
free (enc);
return s_enc;
}
return SCM_BOOL_F;
}
#undef FUNC_NAME
void
scm_init_read ()
{

View file

@ -56,6 +56,8 @@ SCM_API SCM scm_read_options (SCM setting);
SCM_API SCM scm_read (SCM port);
SCM_API size_t scm_read_token (int ic, SCM * tok_buf, SCM port, int weird);
SCM_API SCM scm_read_hash_extend (SCM chr, SCM proc);
SCM_INTERNAL char *scm_scan_for_encoding (SCM port);
SCM_API SCM scm_file_encoding (SCM port);
SCM_INTERNAL void scm_i_input_error (const char *func, SCM port,
const char *message, SCM arg)

View file

@ -33,6 +33,7 @@
#include "libguile/strings.h"
#include "libguile/vectors.h"
#include "libguile/dynwind.h"
#include "libguile/srfi-13.h"
#include "libguile/validate.h"
#include "libguile/socket.h"
@ -1414,6 +1415,8 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
"protocols, if a packet larger than this limit is encountered\n"
"then some data\n"
"will be irrevocably lost.\n\n"
"The data is assumed to be binary, and there is no decoding of\n"
"of locale-encoded strings.\n\n"
"The optional @var{flags} argument is a value or\n"
"bitwise OR of MSG_OOB, MSG_PEEK, MSG_DONTROUTE etc.\n\n"
"The value returned is the number of bytes read from the\n"
@ -1428,6 +1431,7 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
int flg;
char *dest;
size_t len;
SCM msg;
SCM_VALIDATE_OPFPORT (1, sock);
SCM_VALIDATE_STRING (2, buf);
@ -1437,16 +1441,16 @@ SCM_DEFINE (scm_recv, "recv!", 2, 1, 0,
flg = scm_to_int (flags);
fd = SCM_FPORT_FDES (sock);
len = scm_i_string_length (buf);
buf = scm_i_string_start_writing (buf);
dest = scm_i_string_writable_chars (buf);
len = scm_i_string_length (buf);
msg = scm_i_make_string (len, &dest);
SCM_SYSCALL (rv = recv (fd, dest, len, flg));
scm_i_string_stop_writing ();
scm_string_copy_x (buf, scm_from_int (0),
msg, scm_from_int (0), scm_from_size_t (len));
if (rv == -1)
SCM_SYSERROR;
scm_remember_upto_here_1 (buf);
scm_remember_upto_here_2 (buf, msg);
return scm_from_int (rv);
}
#undef FUNC_NAME
@ -1464,18 +1468,28 @@ SCM_DEFINE (scm_send, "send", 2, 1, 0,
"bitwise OR of MSG_OOB, MSG_PEEK, MSG_DONTROUTE etc.\n\n"
"Note that the data is written directly to the socket\n"
"file descriptor:\n"
"any unflushed buffered port data is ignored.")
"any unflushed buffered port data is ignored.\n\n"
"This operation is defined only for strings containing codepoints\n"
"zero to 255.")
#define FUNC_NAME s_scm_send
{
int rv;
int fd;
int flg;
const char *src;
char *src;
size_t len;
sock = SCM_COERCE_OUTPORT (sock);
SCM_VALIDATE_OPFPORT (1, sock);
SCM_VALIDATE_STRING (2, message);
/* If the string is wide, see if it can be coerced into
a narrow string. */
if (!scm_i_is_narrow_string (message)
|| scm_i_try_narrow_string (message))
SCM_MISC_ERROR ("the message string is not 8-bit: ~s",
scm_list_1 (message));
if (SCM_UNBNDP (flags))
flg = 0;
else
@ -1592,7 +1606,9 @@ SCM_DEFINE (scm_sendto, "sendto", 3, 1, 1,
"set to be non-blocking.\n"
"Note that the data is written directly to the socket\n"
"file descriptor:\n"
"any unflushed buffered port data is ignored.")
"any unflushed buffered port data is ignored.\n"
"This operation is defined only for strings containing codepoints\n"
"zero to 255.")
#define FUNC_NAME s_scm_sendto
{
int rv;

View file

@ -46,6 +46,7 @@
#include <stdio.h>
#include <errno.h>
#include <strftime.h>
#include <unistr.h>
#include "libguile/_scm.h"
#include "libguile/async.h"
@ -53,6 +54,7 @@
#include "libguile/strings.h"
#include "libguile/vectors.h"
#include "libguile/dynwind.h"
#include "libguile/strings.h"
#include "libguile/validate.h"
#include "libguile/stime.h"
@ -624,18 +626,20 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
{
struct tm t;
char *tbuf;
scm_t_uint8 *tbuf;
int size = 50;
const char *fmt;
char *myfmt;
scm_t_uint8 *fmt;
scm_t_uint8 *myfmt;
int len;
SCM result;
SCM_VALIDATE_STRING (1, format);
bdtime2c (stime, &t, SCM_ARG2, FUNC_NAME);
fmt = scm_i_string_chars (format);
len = scm_i_string_length (format);
/* Convert string to UTF-8 so that non-ASCII characters in the
format are passed through unchanged. */
fmt = scm_i_to_utf8_string (format);
len = strlen ((const char *) fmt);
/* Ugly hack: strftime can return 0 if its buffer is too small,
but some valid time strings (e.g. "%p") can sometimes produce
@ -643,9 +647,11 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
character to the format string, so that valid returns are always
nonzero. */
myfmt = scm_malloc (len+2);
*myfmt = 'x';
strncpy(myfmt+1, fmt, len);
myfmt[len+1] = 0;
*myfmt = (scm_t_uint8) 'x';
strncpy ((char *) myfmt + 1, (const char *) fmt, len);
myfmt[len + 1] = 0;
scm_remember_upto_here_1 (format);
free (fmt);
tbuf = scm_malloc (size);
{
@ -680,7 +686,8 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
/* Use `nstrftime ()' from Gnulib, which supports all GNU extensions
supported by glibc. */
while ((len = nstrftime (tbuf, size, myfmt, &t, 0, 0)) == 0)
while ((len = nstrftime ((char *) tbuf, size,
(const char *) myfmt, &t, 0, 0)) == 0)
{
free (tbuf);
size *= 2;
@ -696,7 +703,7 @@ SCM_DEFINE (scm_strftime, "strftime", 2, 0, 0,
#endif
}
result = scm_from_locale_stringn (tbuf + 1, len - 1);
result = scm_i_from_utf8_string ((const scm_t_uint8 *) tbuf + 1);
free (tbuf);
free (myfmt);
#if HAVE_STRUCT_TM_TM_ZONE
@ -722,14 +729,17 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
#define FUNC_NAME s_scm_strptime
{
struct tm t;
const char *fmt, *str, *rest;
scm_t_uint8 *fmt, *str, *rest;
size_t used_len;
long zoff;
SCM_VALIDATE_STRING (1, format);
SCM_VALIDATE_STRING (2, string);
fmt = scm_i_string_chars (format);
str = scm_i_string_chars (string);
/* Convert strings to UTF-8 so that non-ASCII characters are passed
through unchanged. */
fmt = scm_i_to_utf8_string (format);
str = scm_i_to_utf8_string (string);
/* initialize the struct tm */
#define tm_init(field) t.field = 0
@ -751,7 +761,8 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
fields, hence the use of SCM_CRITICAL_SECTION_START. */
t.tm_isdst = -1;
SCM_CRITICAL_SECTION_START;
rest = strptime (str, fmt, &t);
rest = (scm_t_uint8 *) strptime ((const char *) str,
(const char *) fmt, &t);
SCM_CRITICAL_SECTION_END;
if (rest == NULL)
{
@ -759,6 +770,9 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
instance it doesn't. Force a sensible value for our error
message. */
errno = EINVAL;
scm_remember_upto_here_2 (format, string);
free (str);
free (fmt);
SCM_SYSERROR;
}
@ -770,8 +784,14 @@ SCM_DEFINE (scm_strptime, "strptime", 2, 0, 0,
zoff = 0;
#endif
/* Compute the number of UTF-8 characters. */
used_len = u8_strnlen (str, rest-str);
scm_remember_upto_here_2 (format, string);
free (str);
free (fmt);
return scm_cons (filltime (&t, zoff, NULL),
scm_from_signed_integer (rest - str));
scm_from_signed_integer (used_len));
}
#undef FUNC_NAME
#endif /* HAVE_STRPTIME */

View file

@ -28,6 +28,8 @@
#include <unistr.h>
#include <uniconv.h>
#include "striconveh.h"
#include "libguile/_scm.h"
#include "libguile/chars.h"
#include "libguile/root.h"
@ -240,6 +242,36 @@ widen_stringbuf (SCM buf)
}
}
/* Convert a stringbuf of 32-bit UCS-4-encoded characters to one
containing 8-bit Latin-1-encoded characters, if possible. */
static void
narrow_stringbuf (SCM buf)
{
size_t i, len;
scm_t_wchar *wmem;
char *mem;
if (!STRINGBUF_WIDE (buf))
return;
len = STRINGBUF_OUTLINE_LENGTH (buf);
i = 0;
wmem = STRINGBUF_WIDE_CHARS (buf);
while (i < len)
if (wmem[i++] > 0xFF)
return;
mem = scm_gc_malloc (sizeof (char) * (len + 1), "string");
for (i = 0; i < len; i++)
mem[i] = (unsigned char) wmem[i];
scm_gc_free (wmem, sizeof (scm_t_wchar) * (len + 1), "string");
SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_WIDE);
SCM_SET_CELL_WORD_1 (buf, mem);
SCM_SET_CELL_WORD_2 (buf, len);
}
scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
/* Copy-on-write strings.
@ -460,6 +492,18 @@ scm_i_is_narrow_string (SCM str)
return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
}
/* Try to coerce a string to be narrow. It if is narrow already, do
nothing. If it is wide, shrink it to narrow if none of its
characters are above 0xFF. Return true if the string is narrow or
was made to be narrow. */
int
scm_i_try_narrow_string (SCM str)
{
narrow_stringbuf (STRING_STRINGBUF (str));
return scm_i_is_narrow_string (str);
}
/* Returns a pointer to the 8-bit Latin-1 encoded character array of
STR. */
const char *
@ -591,6 +635,37 @@ scm_i_string_ref (SCM str, size_t x)
return scm_i_string_wide_chars (str)[x];
}
/* Returns index+1 of the first char in STR that matches C, or
0 if the char is not found. */
int
scm_i_string_contains_char (SCM str, char ch)
{
size_t i;
size_t len = scm_i_string_length (str);
i = 0;
if (scm_i_is_narrow_string (str))
{
while (i < len)
{
if (scm_i_string_chars (str)[i] == ch)
return i+1;
i++;
}
}
else
{
while (i < len)
{
if (scm_i_string_wide_chars (str)[i]
== (unsigned char) ch)
return i+1;
i++;
}
}
return 0;
}
int
scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
{
@ -624,7 +699,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
if (scm_i_is_narrow_string (str))
{
char *dst = scm_i_string_writable_chars (str);
dst[p] = (char) (unsigned char) chr;
dst[p] = chr;
}
else
{
@ -634,7 +709,7 @@ scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
}
/* Symbols.
Basic symbol creation and accessing is done here, the rest is in
symbols.[hc]. This has been done to keep stringbufs and the
internals of strings and string-like objects confined to this file.
@ -867,7 +942,7 @@ SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
else
e5 = scm_cons (scm_from_locale_symbol ("read-only"),
SCM_BOOL_F);
/* Stringbuf info */
if (!STRINGBUF_WIDE (buf))
{
@ -1402,20 +1477,105 @@ scm_is_string (SCM obj)
return IS_STRING (obj);
}
static SCM
scm_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler)
{
size_t u32len, i;
scm_t_wchar *u32;
int wide = 0;
SCM res;
if (encoding == NULL)
{
/* If encoding is null, use Latin-1. */
char *buf;
res = scm_i_make_string (len, &buf);
memcpy (buf, str, len);
return res;
}
u32len = 0;
u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
(enum iconv_ilseq_handler)
handler,
str, len,
NULL,
NULL, &u32len);
if (u32 == NULL)
{
if (errno == ENOMEM)
scm_memory_error ("locale string conversion");
else
{
/* There are invalid sequences in the input string. */
SCM errstr;
char *dst;
errstr = scm_i_make_string (len, &dst);
memcpy (dst, str, len);
scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
scm_list_2 (scm_from_locale_string (encoding),
errstr));
scm_remember_upto_here_1 (errstr);
}
}
i = 0;
while (i < u32len)
if (u32[i++] > 0xFF)
{
wide = 1;
break;
}
if (!wide)
{
char *dst;
res = scm_i_make_string (u32len, &dst);
for (i = 0; i < u32len; i ++)
dst[i] = (unsigned char) u32[i];
dst[u32len] = '\0';
}
else
{
scm_t_wchar *wdst;
res = scm_i_make_wide_string (u32len, &wdst);
u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
wdst[u32len] = 0;
}
free (u32);
return res;
}
SCM
scm_from_locale_stringn (const char *str, size_t len)
{
SCM res;
char *dst;
const char *enc;
scm_t_string_failed_conversion_handler hndl;
SCM inport;
scm_t_port *pt;
if (len == (size_t) -1)
len = strlen (str);
if (len == 0)
return scm_nullstr;
res = scm_i_make_string (len, &dst);
memcpy (dst, str, len);
return res;
inport = scm_current_input_port ();
if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
{
pt = SCM_PTAB_ENTRY (inport);
enc = pt->encoding;
hndl = pt->ilseq_handler;
}
else
{
enc = NULL;
hndl = SCM_FAILED_CONVERSION_ERROR;
}
return scm_from_stringn (str, len, enc, hndl);
}
SCM
@ -1427,6 +1587,14 @@ scm_from_locale_string (const char *str)
return scm_from_locale_stringn (str, -1);
}
SCM
scm_i_from_utf8_string (const scm_t_uint8 *str)
{
return scm_from_stringn ((const char *) str,
strlen ((char *) str), "UTF-8",
SCM_FAILED_CONVERSION_ERROR);
}
/* Create a new scheme string from the C string STR. The memory of
STR may be used directly as storage for the new string. */
SCM
@ -1515,23 +1683,33 @@ unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
char *
scm_to_locale_stringn (SCM str, size_t * lenp)
{
SCM outport;
scm_t_port *pt;
const char *enc;
/* In the future, enc will hold the port's encoding. */
enc = NULL;
outport = scm_current_output_port ();
if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
{
pt = SCM_PTAB_ENTRY (outport);
enc = pt->encoding;
}
else
enc = NULL;
return scm_to_stringn (str, lenp, enc,
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
return scm_to_stringn (str, lenp,
enc,
scm_i_get_conversion_strategy (SCM_BOOL_F));
}
/* Low-level scheme to C string conversion function. */
char *
scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
scm_t_string_failed_conversion_handler handler)
{
static const char iso[11] = "ISO-8859-1";
char *buf;
size_t ilen, len, i;
int ret;
const char *enc;
if (!scm_is_string (str))
scm_wrong_type_arg_msg (NULL, 0, str, "string");
@ -1545,7 +1723,7 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
*lenp = 0;
return buf;
}
if (lenp == NULL)
for (i = 0; i < ilen; i++)
if (scm_i_string_ref (str, i) == '\0')
@ -1553,8 +1731,10 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
"string contains #\\nul character: ~S",
scm_list_1 (str));
if (scm_i_is_narrow_string (str))
if (scm_i_is_narrow_string (str) && (encoding == NULL))
{
/* If using native Latin-1 encoding, just copy the string
contents. */
if (lenp)
{
buf = scm_malloc (ilen);
@ -1571,20 +1751,44 @@ scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
}
}
buf = NULL;
len = 0;
buf = u32_conv_to_encoding (iso,
(enum iconv_ilseq_handler) handler,
(scm_t_uint32 *) scm_i_string_wide_chars (str),
ilen, NULL, NULL, &len);
if (buf == NULL)
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
scm_list_2 (scm_from_locale_string (iso), str));
enc = encoding;
if (enc == NULL)
enc = "ISO-8859-1";
if (scm_i_is_narrow_string (str))
{
ret = mem_iconveh (scm_i_string_chars (str), ilen,
"ISO-8859-1", enc,
(enum iconv_ilseq_handler) handler, NULL,
&buf, &len);
if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
unistring_escapes_to_guile_escapes (&buf, &len);
if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
unistring_escapes_to_guile_escapes (&buf, &len);
if (ret != 0)
{
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
scm_list_2 (scm_from_locale_string (enc),
str));
}
}
else
{
buf = u32_conv_to_encoding (enc,
(enum iconv_ilseq_handler) handler,
(scm_t_uint32 *) scm_i_string_wide_chars (str),
ilen,
NULL,
NULL, &len);
if (buf == NULL)
{
scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
scm_list_2 (scm_from_locale_string (enc),
str));
}
}
if (lenp)
*lenp = len;
else
@ -1603,6 +1807,14 @@ scm_to_locale_string (SCM str)
return scm_to_locale_stringn (str, NULL);
}
scm_t_uint8 *
scm_i_to_utf8_string (SCM str)
{
char *u8str;
u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
return (scm_t_uint8 *) u8str;
}
size_t
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
{

View file

@ -124,6 +124,7 @@ SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
SCM_API int scm_is_string (SCM x);
SCM_API SCM scm_from_locale_string (const char *str);
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
SCM_INTERNAL SCM scm_i_from_utf8_string (const scm_t_uint8 *str);
SCM_API SCM scm_take_locale_string (char *str);
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
SCM_API char *scm_to_locale_string (SCM str);
@ -132,6 +133,7 @@ SCM_INTERNAL char *scm_to_stringn (SCM str, size_t *lenp,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
SCM_INTERNAL scm_t_uint8 *scm_i_to_utf8_string (SCM str);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
SCM_API SCM scm_makfromstrs (int argc, char **argv);
@ -152,6 +154,7 @@ SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void);
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
/* internal functions related to symbols. */
@ -168,6 +171,7 @@ SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);

View file

@ -39,6 +39,7 @@
#include "libguile/modules.h"
#include "libguile/validate.h"
#include "libguile/deprecation.h"
#include "libguile/srfi-4.h"
#include "libguile/strports.h"
@ -289,42 +290,33 @@ st_truncate (SCM port, scm_t_off length)
}
SCM
scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len, long modes, const char *caller)
{
SCM z;
SCM z, str;
scm_t_port *pt;
size_t str_len, c_pos;
size_t c_pos;
char *buf;
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
/* Because ports are inherently 8-bit, strings need to be converted
to a locale representation for storage. But, since string ports
rely on string functionality for their memory management, we need
to create a new string that has the 8-bit locale representation
of the underlying string. This violates the guideline that the
internal encoding of characters in strings is in unicode
codepoints. */
str = scm_i_make_string (str_len, &buf);
memcpy (buf, locale_str, str_len);
str_len = scm_i_string_length (str);
c_pos = scm_to_unsigned_integer (pos, 0, str_len);
if (!((modes & SCM_WRTNG) || (modes & SCM_RDNG)))
scm_misc_error ("scm_mkstrport", "port must read or write", SCM_EOL);
/* XXX
Make a new string to isolate us from changes to the original.
This is done so that we can rely on scm_i_string_chars to stay in
place even across SCM_TICKs.
Additionally, when we are going to write to the string, we make a
copy so that we can write to it without having to use
scm_i_string_writable_chars.
*/
if (modes & SCM_WRTNG)
str = scm_c_substring_copy (str, 0, str_len);
else
str = scm_c_substring (str, 0, str_len);
scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
z = scm_new_port_table_entry (scm_tc16_strport);
pt = SCM_PTAB_ENTRY(z);
SCM_SETSTREAM (z, SCM_UNPACK (str));
SCM_SET_CELL_TYPE(z, scm_tc16_strport|modes);
/* see above why we can use scm_i_string_chars here. */
pt->write_buf = pt->read_buf = (unsigned char *) scm_i_string_chars (str);
pt->read_pos = pt->write_pos = pt->read_buf + c_pos;
pt->write_buf_size = pt->read_buf_size = str_len;
@ -340,22 +332,60 @@ scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
return z;
}
SCM
scm_mkstrport (SCM pos, SCM str, long modes, const char *caller)
{
SCM z;
size_t str_len;
char *buf;
SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, caller);
/* Because ports are inherently 8-bit, strings need to be converted
to a locale representation for storage. But, since string ports
rely on string functionality for their memory management, we need
to create a new string that has the 8-bit locale representation
of the underlying string. This violates the guideline that the
internal encoding of characters in strings is in unicode
codepoints. */
buf = scm_to_locale_stringn (str, &str_len);
z = scm_i_mkstrport (pos, buf, str_len, modes, caller);
free (buf);
return z;
}
/* create a new string from a string port's buffer. */
SCM scm_strport_to_string (SCM port)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
SCM str;
char *dst;
if (pt->rw_active == SCM_PORT_WRITE)
st_flush (port);
str = scm_i_make_string (pt->read_buf_size, &dst);
memcpy (dst, (char *) pt->read_buf, pt->read_buf_size);
str = scm_from_locale_stringn ((char *)pt->read_buf, pt->read_buf_size);
scm_remember_upto_here_1 (port);
return str;
}
/* Create a vector containing the locale representation of the string in the
port's buffer. */
SCM scm_strport_to_locale_u8vector (SCM port)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
SCM vec;
char *buf;
if (pt->rw_active == SCM_PORT_WRITE)
st_flush (port);
buf = scm_malloc (pt->read_buf_size);
memcpy (buf, pt->read_buf, pt->read_buf_size);
vec = scm_take_u8vector ((unsigned char *) buf, pt->read_buf_size);
scm_remember_upto_here_1 (port);
return vec;
}
SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
(SCM obj, SCM printer),
"Return a Scheme string obtained by printing @var{obj}.\n"
@ -380,6 +410,25 @@ SCM_DEFINE (scm_object_to_string, "object->string", 1, 1, 0,
}
#undef FUNC_NAME
SCM_DEFINE (scm_call_with_output_locale_u8vector, "call-with-output-locale-u8vector", 1, 0, 0,
(SCM proc),
"Calls the one-argument procedure @var{proc} with a newly created output\n"
"port. When the function returns, a vector containing the bytes of a\n"
"locale representation of the characters written into the port is returned\n")
#define FUNC_NAME s_scm_call_with_output_locale_u8vector
{
SCM p;
p = scm_mkstrport (SCM_INUM0,
scm_make_string (SCM_INUM0, SCM_UNDEFINED),
SCM_OPN | SCM_WRTNG,
FUNC_NAME);
scm_call_1 (proc, p);
return scm_get_output_locale_u8vector (p);
}
#undef FUNC_NAME
SCM_DEFINE (scm_call_with_output_string, "call-with-output-string", 1, 0, 0,
(SCM proc),
"Calls the one-argument procedure @var{proc} with a newly created output\n"
@ -424,6 +473,27 @@ SCM_DEFINE (scm_open_input_string, "open-input-string", 1, 0, 0,
}
#undef FUNC_NAME
SCM_DEFINE (scm_open_input_locale_u8vector, "open-input-locale-u8vector", 1, 0, 0,
(SCM vec),
"Take a u8vector containing the bytes of a string encoded in the\n"
"current locale and return an input port that delivers characters\n"
"from the string. The port can be closed by\n"
"@code{close-input-port}, though its storage will be reclaimed\n"
"by the garbage collector if it becomes inaccessible.")
#define FUNC_NAME s_scm_open_input_locale_u8vector
{
scm_t_array_handle hnd;
ssize_t inc;
size_t len;
const scm_t_uint8 *buf;
buf = scm_u8vector_elements (vec, &hnd, &len, &inc);
SCM p = scm_i_mkstrport(SCM_INUM0, (const char *) buf, len, SCM_OPN | SCM_RDNG, FUNC_NAME);
scm_array_handle_release (&hnd);
return p;
}
#undef FUNC_NAME
SCM_DEFINE (scm_open_output_string, "open-output-string", 0, 0, 0,
(void),
"Return an output port that will accumulate characters for\n"
@ -456,11 +526,26 @@ SCM_DEFINE (scm_get_output_string, "get-output-string", 1, 0, 0,
#undef FUNC_NAME
SCM_DEFINE (scm_get_output_locale_u8vector, "get-output-locale-u8vector", 1, 0, 0,
(SCM port),
"Given an output port created by @code{open-output-string},\n"
"return a u8 vector containing the characters of the string\n"
"encoded in the current locale.")
#define FUNC_NAME s_scm_get_output_locale_u8vector
{
SCM_VALIDATE_OPOUTSTRPORT (1, port);
return scm_strport_to_locale_u8vector (port);
}
#undef FUNC_NAME
/* Given a null-terminated string EXPR containing a Scheme expression
read it, and return it as an SCM value. */
SCM
scm_c_read_string (const char *expr)
{
/* FIXME: the c string gets packed into a string, only to get
immediately unpacked in scm_mkstrport. */
SCM port = scm_mkstrport (SCM_INUM0,
scm_from_locale_string (expr),
SCM_OPN | SCM_RDNG,

View file

@ -44,13 +44,19 @@ SCM_API scm_t_bits scm_tc16_strport;
SCM_API SCM scm_mkstrport (SCM pos, SCM str, long modes, const char * caller);
SCM_INTERNAL SCM scm_i_mkstrport (SCM pos, const char *locale_str, size_t str_len,
long modes, const char *caller);
SCM_API SCM scm_strport_to_string (SCM port);
SCM_API SCM scm_strport_to_locale_u8vector (SCM port);
SCM_API SCM scm_object_to_string (SCM obj, SCM printer);
SCM_API SCM scm_call_with_output_string (SCM proc);
SCM_API SCM scm_call_with_output_locale_u8vector (SCM proc);
SCM_API SCM scm_call_with_input_string (SCM str, SCM proc);
SCM_API SCM scm_open_input_string (SCM str);
SCM_API SCM scm_open_input_locale_u8vector (SCM str);
SCM_API SCM scm_open_output_string (void);
SCM_API SCM scm_get_output_string (SCM port);
SCM_API SCM scm_get_output_locale_u8vector (SCM port);
SCM_API SCM scm_c_read_string (const char *expr);
SCM_API SCM scm_c_eval_string (const char *expr);
SCM_API SCM scm_c_eval_string_in_module (const char *expr, SCM module);

View file

@ -30,6 +30,7 @@
#include "libguile/hashtab.h"
#include "libguile/ports.h"
#include "libguile/strings.h"
#include "libguile/srfi-13.h"
#include "libguile/validate.h"
#include "libguile/struct.h"
@ -61,9 +62,9 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
{
SCM new_sym;
SCM_VALIDATE_STRING (1, fields);
scm_t_wchar c;
{ /* scope */
const char * field_desc;
size_t len;
int x;
@ -72,11 +73,9 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
SCM_MISC_ERROR ("odd length field specification: ~S",
scm_list_1 (fields));
field_desc = scm_i_string_chars (fields);
for (x = 0; x < len; x += 2)
{
switch (field_desc[x])
switch (c = scm_i_string_ref (fields, x))
{
case 'u':
case 'p':
@ -88,13 +87,13 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
break;
default:
SCM_MISC_ERROR ("unrecognized field type: ~S",
scm_list_1 (SCM_MAKE_CHAR (field_desc[x])));
scm_list_1 (SCM_MAKE_CHAR (c)));
}
switch (field_desc[x + 1])
switch (c = scm_i_string_ref (fields, x + 1))
{
case 'w':
if (field_desc[x] == 's')
if (scm_i_string_ref (fields, x) == 's')
SCM_MISC_ERROR ("self fields not writable", SCM_EOL);
case 'r':
case 'o':
@ -102,7 +101,7 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
case 'R':
case 'W':
case 'O':
if (field_desc[x] == 's')
if (scm_i_string_ref (fields, x) == 's')
SCM_MISC_ERROR ("self fields not allowed in tail array",
SCM_EOL);
if (x != len - 2)
@ -111,12 +110,12 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
break;
default:
SCM_MISC_ERROR ("unrecognized ref specification: ~S",
scm_list_1 (SCM_MAKE_CHAR (field_desc[x + 1])));
scm_list_1 (SCM_MAKE_CHAR (c)));
}
#if 0
if (field_desc[x] == 'd')
if (scm_i_string_ref (fields, x, 'd'))
{
if (field_desc[x + 2] != '-')
if (!scm_i_string_ref (fields, x+2, '-'))
SCM_MISC_ERROR ("missing dash field at position ~A",
scm_list_1 (scm_from_int (x / 2)));
x += 2;
@ -138,18 +137,18 @@ SCM_DEFINE (scm_make_struct_layout, "make-struct-layout", 1, 0, 0,
static void
scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM inits)
{
unsigned const char *fields_desc =
(unsigned const char *) scm_i_symbol_chars (layout) - 2;
unsigned char prot = 0;
scm_t_wchar prot = 0;
int n_fields = scm_i_symbol_length (layout) / 2;
int tailp = 0;
int i;
i = -2;
while (n_fields)
{
if (!tailp)
{
fields_desc += 2;
prot = fields_desc[1];
i += 2;
prot = scm_i_symbol_ref (layout, i+1);
if (SCM_LAYOUT_TAILP (prot))
{
tailp = 1;
@ -160,8 +159,7 @@ scm_struct_init (SCM handle, SCM layout, scm_t_bits * mem, int tail_elts, SCM in
break;
}
}
switch (*fields_desc)
switch (scm_i_symbol_ref (layout, i))
{
#if 0
case 'i':
@ -237,7 +235,8 @@ SCM_DEFINE (scm_struct_vtable_p, "struct-vtable?", 1, 0, 0,
{
SCM layout;
scm_t_bits * mem;
int tmp;
SCM tmp;
size_t len;
if (!SCM_STRUCTP (x))
return SCM_BOOL_F;
@ -248,11 +247,14 @@ SCM_DEFINE (scm_struct_vtable_p, "struct-vtable?", 1, 0, 0,
< scm_i_string_length (required_vtable_fields))
return SCM_BOOL_F;
tmp = strncmp (scm_i_symbol_chars (layout),
scm_i_string_chars (required_vtable_fields),
scm_i_string_length (required_vtable_fields));
scm_remember_upto_here_1 (required_vtable_fields);
if (tmp)
len = scm_i_string_length (required_vtable_fields);
tmp = scm_string_eq (scm_symbol_to_string (layout),
required_vtable_fields,
scm_from_size_t (0),
scm_from_size_t (len),
scm_from_size_t (0),
scm_from_size_t (len));
if (scm_is_false (tmp))
return SCM_BOOL_F;
mem = SCM_STRUCT_DATA (x);
@ -646,8 +648,7 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
size_t layout_len;
size_t p;
scm_t_bits n_fields;
const char *fields_desc;
char field_type = 0;
scm_t_wchar field_type = 0;
SCM_VALIDATE_STRUCT (1, handle);
@ -656,7 +657,6 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
data = SCM_STRUCT_DATA (handle);
p = scm_to_size_t (pos);
fields_desc = scm_i_symbol_chars (layout);
layout_len = scm_i_symbol_length (layout);
if (SCM_STRUCT_VTABLE_FLAGS (handle) & SCM_STRUCTF_LIGHT)
/* no extra words */
@ -668,9 +668,9 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
if (p * 2 < layout_len)
{
char ref;
field_type = fields_desc[p * 2];
ref = fields_desc[p * 2 + 1];
scm_t_wchar ref;
field_type = scm_i_symbol_ref (layout, p * 2);
ref = scm_i_symbol_ref (layout, p * 2 + 1);
if ((ref != 'r') && (ref != 'w'))
{
if ((ref == 'R') || (ref == 'W'))
@ -679,8 +679,8 @@ SCM_DEFINE (scm_struct_ref, "struct-ref", 2, 0, 0,
SCM_MISC_ERROR ("ref denied for field ~A", scm_list_1 (pos));
}
}
else if (fields_desc[layout_len - 1] != 'O')
field_type = fields_desc[layout_len - 2];
else if (scm_i_symbol_ref (layout, layout_len - 1) != 'O')
field_type = scm_i_symbol_ref(layout, layout_len - 2);
else
SCM_MISC_ERROR ("ref denied for field ~A", scm_list_1 (pos));
@ -728,8 +728,7 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
size_t layout_len;
size_t p;
int n_fields;
const char *fields_desc;
char field_type = 0;
scm_t_wchar field_type = 0;
SCM_VALIDATE_STRUCT (1, handle);
@ -737,7 +736,6 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
data = SCM_STRUCT_DATA (handle);
p = scm_to_size_t (pos);
fields_desc = scm_i_symbol_chars (layout);
layout_len = scm_i_symbol_length (layout);
if (SCM_STRUCT_VTABLE_FLAGS (handle) & SCM_STRUCTF_LIGHT)
/* no extra words */
@ -750,13 +748,13 @@ SCM_DEFINE (scm_struct_set_x, "struct-set!", 3, 0, 0,
if (p * 2 < layout_len)
{
char set_x;
field_type = fields_desc[p * 2];
set_x = fields_desc [p * 2 + 1];
field_type = scm_i_symbol_ref (layout, p * 2);
set_x = scm_i_symbol_ref (layout, p * 2 + 1);
if (set_x != 'w')
SCM_MISC_ERROR ("set! denied for field ~A", scm_list_1 (pos));
}
else if (fields_desc[layout_len - 1] == 'W')
field_type = fields_desc[layout_len - 2];
else if (scm_i_symbol_ref (layout, layout_len - 1) == 'W')
field_type = scm_i_symbol_ref (layout, layout_len - 2);
else
SCM_MISC_ERROR ("set! denied for field ~A", scm_list_1 (pos));

View file

@ -23,6 +23,7 @@
#endif
#include <stdio.h>
#include <unistdio.h>
#include "libguile/_scm.h"
#include "libguile/async.h"
#include "libguile/smob.h"
@ -744,8 +745,12 @@ scm_ithrow (SCM key, SCM args, int noreturn SCM_UNUSED)
*/
fprintf (stderr, "throw from within critical section.\n");
if (scm_is_symbol (key))
fprintf (stderr, "error key: %s\n", scm_i_symbol_chars (key));
{
if (scm_i_is_narrow_symbol (key))
fprintf (stderr, "error key: %s\n", scm_i_symbol_chars (key));
else
ulc_fprintf (stderr, "error key: %llU\n", scm_i_symbol_wide_chars (key));
}
for (; scm_is_pair (s); s = scm_cdr (s), i++)
{

View file

@ -145,8 +145,11 @@
(from (current-language))
(to 'objcode)
(opts '()))
(let ((comp (or output-file (compiled-file-name file)))
(in (open-input-file file)))
(let* ((comp (or output-file (compiled-file-name file)))
(in (open-input-file file))
(enc (file-encoding in)))
(if enc
(set-port-encoding! in enc))
(ensure-writable-dir (dirname comp))
(call-with-output-file/atomic comp
(lambda (port)

View file

@ -0,0 +1,139 @@
;;;; encoding-escapes.test --- test suite for Guile's string encodings -*- mode: scheme; coding: utf-8 -*-
;;;;
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
;;;;
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 2, or (at your option)
;;;; any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this software; see the file COPYING. If not, write to
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;;;; Boston, MA 02110-1301 USA
(define-module (test-strings)
#:use-module (test-suite lib)
#:use-module (srfi srfi-1))
(define exception:conversion
(cons 'misc-error "^cannot convert to output locale"))
;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
(apply string (map integer->char args)))
(define s1 "última")
(define s2 "cédula")
(define s3 "años")
(define s4 "羅生門")
(with-test-prefix "internal encoding"
(pass-if "ultima"
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
(pass-if "cedula"
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
(pass-if "anos"
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
(pass-if "Rashomon"
(string=? s4 (string-ints #x7f85 #x751f #x9580))))
(with-test-prefix "chars"
(pass-if "ultima"
(list= eqv? (string->list s1)
(list #\372 #\l #\t #\i #\m #\a)))
(pass-if "cedula"
(list= eqv? (string->list s2)
(list #\c #\351 #\d #\u #\l #\a)))
(pass-if "anos"
(list= eqv? (string->list s3)
(list #\a #\361 #\o #\s)))
(pass-if "Rashomon"
(list= eqv? (string->list s4)
(list #\77605 #\72437 #\112600))))
;; Check that an error is flagged on display output when the output
;; error strategy is 'error
(with-test-prefix "display output errors"
(pass-if-exception "ultima"
exception:conversion
(let ((pt (open-output-string)))
(set-port-encoding! pt "ASCII")
(set-port-conversion-strategy! pt 'error)
(display s1 pt)))
(pass-if-exception "Rashomon"
exception:conversion
(let ((pt (open-output-string)))
(set-port-encoding! pt "ASCII")
(set-port-conversion-strategy! pt 'error)
(display s4 pt))))
;; Check that questions marks or substitutions appear when the conversion
;; mode is substitute
(with-test-prefix "display output substitutions"
(pass-if "ultima"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ASCII")
(set-port-conversion-strategy! pt 'substitute)
(display s1 pt)
(string=? "?ltima"
(get-output-string pt))))
(pass-if "Rashomon"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ASCII")
(set-port-conversion-strategy! pt 'substitute)
(display s4 pt)
(string=? "???"
(get-output-string pt)))))
;; Check that hex escapes appear in the write output and that no error
;; is thrown. The output error strategy should be irrelevant here.
(with-test-prefix "display output escapes"
(pass-if "ultima"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ASCII")
(set-port-conversion-strategy! pt 'escape)
(display s1 pt)
(string=? "\\xfaltima"
(get-output-string pt))))
(pass-if "Rashomon"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ASCII")
(set-port-conversion-strategy! pt 'escape)
(display s4 pt)
(string=? "\\u7F85\\u751F\\u9580"
(get-output-string pt)))))
(setlocale LC_ALL "en_US.utf8")
(with-test-prefix "input escapes"
(pass-if "última"
(string=? "última"
(with-input-from-string "\"\\xfaltima\"" read)))
(pass-if "羅生門"
(string=? "羅生門"
(with-input-from-string "\"\\u7F85\\u751F\\u9580\"" read))))

View file

@ -0,0 +1,135 @@
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: iso-8859-1 -*-
;;;;
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
;;;;
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 2, or (at your option)
;;;; any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this software; see the file COPYING. If not, write to
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;;;; Boston, MA 02110-1301 USA
(define-module (test-strings)
#:use-module (test-suite lib)
#:use-module (srfi srfi-1))
(define exception:conversion
(cons 'misc-error "^cannot convert to output locale"))
;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
(apply string (map integer->char args)))
(setlocale LC_ALL "")
(define s1 "última")
(define s2 "cédula")
(define s3 "años")
(define s4 "¿Cómo?")
(with-test-prefix "string length"
(pass-if "última"
(eq? (string-length s1) 6))
(pass-if "cédula"
(eq? (string-length s2) 6))
(pass-if "años"
(eq? (string-length s3) 4))
(pass-if "¿Cómo?"
(eq? (string-length s4) 6)))
(with-test-prefix "internal encoding"
(pass-if "última"
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
(pass-if "cédula"
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
(pass-if "años"
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
(pass-if "¿Cómo?"
(string=? s4 (string-ints #xbf #x43 #xf3 #x6d #x6f #x3f))))
(with-test-prefix "chars"
(pass-if "última"
(list= eqv? (string->list s1)
(list #\ú #\l #\t #\i #\m #\a)))
(pass-if "cédula"
(list= eqv? (string->list s2)
(list #\c #\é #\d #\u #\l #\a)))
(pass-if "años"
(list= eqv? (string->list s3)
(list #\a #\ñ #\o #\s)))
(pass-if "¿Cómo?"
(list= eqv? (string->list s4)
(list #\¿ #\C #\ó #\m #\o #\?))))
;; Check that the output is in ISO-8859-1 encoding
(with-test-prefix "display"
(pass-if "s1"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ISO-8859-1")
(display s1 pt)
(list= eqv?
(list #xfa #x6c #x74 #x69 #x6d #x61)
(u8vector->list
(get-output-locale-u8vector pt)))))
(pass-if "s2"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ISO-8859-1")
(display s2 pt)
(list= eqv?
(list #x63 #xe9 #x64 #x75 #x6c #x61)
(u8vector->list
(get-output-locale-u8vector pt))))))
(with-test-prefix "symbols == strings"
(pass-if "última"
(eq? (string->symbol s1) 'última))
(pass-if "cédula"
(eq? (string->symbol s2) 'cédula))
(pass-if "años"
(eq? (string->symbol s3) 'años))
(pass-if "¿Cómo?"
(eq? (string->symbol s4) '¿Cómo?)))
(with-test-prefix "non-ascii variable names"
(pass-if "1"
(let ((á 1)
(ñ 2))
(eq? (+ á ñ) 3))))
(with-test-prefix "output errors"
(pass-if-exception "char 256" exception:conversion
(let ((pt (open-output-string)))
(set-port-encoding! pt "ISO-8859-1")
(set-port-conversion-strategy! pt 'error)
(display (string-ints 256) pt))))
;; Reset locales
(setlocale LC_ALL "C")

View file

@ -0,0 +1,136 @@
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: iso-8859-7 -*-
;;;;
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
;;;;
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 2, or (at your option)
;;;; any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this software; see the file COPYING. If not, write to
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;;;; Boston, MA 02110-1301 USA
(define-module (test-strings)
#:use-module (test-suite lib)
#:use-module (srfi srfi-1))
(define exception:conversion
(cons 'misc-error "^cannot convert to output locale"))
;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
(apply string (map integer->char args)))
(setlocale LC_ALL "")
(define s1 "Ðåñß")
(define s2 "ôçò")
(define s3 "êñéôéêÞò")
(define s4 "êáé")
(with-test-prefix "string length"
(pass-if "s1"
(eq? (string-length s1) 4))
(pass-if "s2"
(eq? (string-length s2) 3))
(pass-if "s3"
(eq? (string-length s3) 8))
(pass-if "s4"
(eq? (string-length s4) 3)))
(with-test-prefix "internal encoding"
(pass-if "s1"
(string=? s1 (string-ints #x03a0 #x03b5 #x03c1 #x03af)))
(pass-if "s2"
(string=? s2 (string-ints #x03c4 #x03b7 #x03c2)))
(pass-if "s3"
(string=? s3 (string-ints #x03ba #x03c1 #x03b9 #x03c4 #x03b9 #x03ba #x03ae #x03c2)))
(pass-if "s4"
(string=? s4 (string-ints #x03ba #x03b1 #x03b9))))
(with-test-prefix "chars"
(pass-if "s1"
(list= eqv? (string->list s1)
(list #\Ð #\å #\ñ #\ß)))
(pass-if "s2"
(list= eqv? (string->list s2)
(list #\ô #\ç #\ò)))
(pass-if "s3"
(list= eqv? (string->list s3)
(list #\ê #\ñ #\é #\ô #\é #\ê #\Þ #\ò)))
(pass-if "s4"
(list= eqv? (string->list s4)
(list #\ê #\á #\é))))
;; Testing that the display of the string is output in the ISO-8859-7
;; encoding
(with-test-prefix "display"
(pass-if "s1"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ISO-8859-7")
(display s1 pt)
(list= eqv?
(list #xd0 #xe5 #xf1 #xdf)
(u8vector->list
(get-output-locale-u8vector pt)))))
(pass-if "s2"
(let ((pt (open-output-string)))
(set-port-encoding! pt "ISO-8859-7")
(display s2 pt)
(list= eqv?
(list #xf4 #xe7 #xf2)
(u8vector->list
(get-output-locale-u8vector pt))))))
(with-test-prefix "symbols == strings"
(pass-if "Ðåñß"
(eq? (string->symbol s1) 'Ðåñß))
(pass-if "ôçò"
(eq? (string->symbol s2) 'ôçò))
(pass-if "êñéôéêÞò"
(eq? (string->symbol s3) 'êñéôéêÞò))
(pass-if "êáé"
(eq? (string->symbol s4) 'êáé)))
(with-test-prefix "non-ascii variable names"
(pass-if "1"
(let ((á 1)
(ñ 2))
(eq? (+ á ñ) 3))))
(with-test-prefix "output errors"
(pass-if-exception "char #x0400"
exception:conversion
(let ((pt (open-output-string)))
(set-port-encoding! pt "ISO-8859-7")
(set-port-conversion-strategy! pt 'error)
(display (string-ints #x0400) pt))))
;; Reset locale
(setlocale LC_ALL "C")

View file

@ -0,0 +1,105 @@
;;;; strings.test --- test suite for Guile's string functions -*- mode: scheme; coding: utf-8 -*-
;;;;
;;;; Copyright (C) 2009 Free Software Foundation, Inc.
;;;;
;;;; This program is free software; you can redistribute it and/or modify
;;;; it under the terms of the GNU General Public License as published by
;;;; the Free Software Foundation; either version 2, or (at your option)
;;;; any later version.
;;;;
;;;; This program is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;;; GNU General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU General Public License
;;;; along with this software; see the file COPYING. If not, write to
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;;;; Boston, MA 02110-1301 USA
(define-module (test-strings)
#:use-module (test-suite lib)
#:use-module (srfi srfi-1))
(define exception:conversion
(cons 'misc-error "^cannot convert to output locale"))
;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
(apply string (map integer->char args)))
(setlocale LC_ALL "")
(define s1 "última")
(define s2 "cédula")
(define s3 "años")
(define s4 "羅生門")
(with-test-prefix "string length"
(pass-if "última"
(eq? (string-length s1) 6))
(pass-if "cédula"
(eq? (string-length s2) 6))
(pass-if "años"
(eq? (string-length s3) 4))
(pass-if "羅生門"
(eq? (string-length s4) 3)))
(with-test-prefix "internal encoding"
(pass-if "última"
(string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
(pass-if "cédula"
(string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
(pass-if "años"
(string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
(pass-if "羅生門"
(string=? s4 (string-ints #x7f85 #x751f #x9580))))
(with-test-prefix "chars"
(pass-if "última"
(list= eqv? (string->list s1)
(list #\ú #\l #\t #\i #\m #\a)))
(pass-if "cédula"
(list= eqv? (string->list s2)
(list #\c #\é #\d #\u #\l #\a)))
(pass-if "años"
(list= eqv? (string->list s3)
(list #\a #\ñ #\o #\s)))
(pass-if "羅生門"
(list= eqv? (string->list s4)
(list #\羅 #\生 #\門))))
(with-test-prefix "symbols == strings"
(pass-if "última"
(eq? (string->symbol s1) 'última))
(pass-if "cédula"
(eq? (string->symbol s2) 'cédula))
(pass-if "años"
(eq? (string->symbol s3) 'años))
(pass-if "羅生門"
(eq? (string->symbol s4) '羅生門)))
(with-test-prefix "non-ascii variable names"
(pass-if "1"
(let ((芥川龍之介 1)
(ñ 2))
(eq? (+ 芥川龍之介 ñ) 3))))

View file

@ -22,6 +22,7 @@
;;;
;;; miscellaneous
;;;
(setbinary)
(define exception:numerical-overflow
(cons 'numerical-overflow "^Numerical overflow"))

View file

@ -33,6 +33,9 @@
;;;; Some general utilities for testing ports.
;;; Make sure we are set up for 8-bit data
(setbinary)
;;; Read from PORT until EOF, and return the result as a string.
(define (read-all port)
(let loop ((chars '()))

View file

@ -27,6 +27,9 @@
;;; All these tests assume Guile 1.8's port system, where characters are
;;; treated as octets.
;;; Set the default encoding of future ports to be binary
(setbinary)
(with-test-prefix "7.2.5 End-of-File Object"

View file

@ -202,6 +202,11 @@
(string=? (strftime "%Z" t)
"ZOW")))
(pass-if "strftime passes wide characters"
(let ((t (localtime (current-time))))
(string=? (substring (strftime "\u0100%Z" t) 0 1)
"\u0100")))
(with-test-prefix "C99 %z format"
;; %z here is quite possibly affected by the same tm:gmtoff vs current