1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-30 03:40:34 +02:00
guile/libguile/rw.c
Michael Gran 9c44cd4559 Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00

290 lines
8.5 KiB
C
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Copyright (C) 2001, 2006, 2009 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
/* This is the C part of the (ice-9 rw) module. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <errno.h>
#include <string.h>
#include "libguile/_scm.h"
#include "libguile/fports.h"
#include "libguile/ports.h"
#include "libguile/root.h"
#include "libguile/rw.h"
#include "libguile/strings.h"
#include "libguile/validate.h"
#include "libguile/modules.h"
#include "libguile/strports.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_IO_H
#include <io.h>
#endif
#if defined (EAGAIN)
#define SCM_MAYBE_EAGAIN || errno == EAGAIN
#else
#define SCM_MAYBE_EAGAIN
#endif
#if defined (EWOULDBLOCK)
#define SCM_MAYBE_EWOULDBLOCK || errno == EWOULDBLOCK
#else
#define SCM_MAYBE_EWOULDBLOCK
#endif
/* MAYBE there is EAGAIN way of defining this macro but now I EWOULDBLOCK. */
#define SCM_EBLOCK(errno) \
(0 SCM_MAYBE_EAGAIN SCM_MAYBE_EWOULDBLOCK)
SCM_DEFINE (scm_read_string_x_partial, "read-string!/partial", 1, 3, 0,
(SCM str, SCM port_or_fdes, SCM start, SCM end),
"Read characters from a port or file descriptor into a\n"
"string @var{str}. A port must have an underlying file\n"
"descriptor --- a so-called fport. This procedure is\n"
"scsh-compatible and can efficiently read large strings.\n"
"It will:\n\n"
"@itemize\n"
"@item\n"
"attempt to fill the entire string, unless the @var{start}\n"
"and/or @var{end} arguments are supplied. i.e., @var{start}\n"
"defaults to 0 and @var{end} defaults to\n"
"@code{(string-length str)}\n"
"@item\n"
"use the current input port if @var{port_or_fdes} is not\n"
"supplied.\n"
"@item\n"
"return fewer than the requested number of characters in some\n"
"cases, e.g., on end of file, if interrupted by a signal, or if\n"
"not all the characters are immediately available.\n"
"@item\n"
"wait indefinitely for some input if no characters are\n"
"currently available,\n"
"unless the port is in non-blocking mode.\n"
"@item\n"
"read characters from the port's input buffers if available,\n"
"instead from the underlying file descriptor.\n"
"@item\n"
"return @code{#f} if end-of-file is encountered before reading\n"
"any characters, otherwise return the number of characters\n"
"read.\n"
"@item\n"
"return 0 if the port is in non-blocking mode and no characters\n"
"are immediately available.\n"
"@item\n"
"return 0 if the request is for 0 bytes, with no\n"
"end-of-file check.\n"
"@end itemize")
#define FUNC_NAME s_scm_read_string_x_partial
{
char *dest;
size_t offset;
long read_len;
long chars_read = 0;
int fdes;
{
size_t last;
SCM_VALIDATE_STRING (1, str);
scm_i_get_substring_spec (scm_i_string_length (str),
start, &offset, end, &last);
read_len = last - offset;
}
if (scm_is_integer (port_or_fdes))
fdes = scm_to_int (port_or_fdes);
else
{
SCM port = (SCM_UNBNDP (port_or_fdes)?
scm_current_input_port () : port_or_fdes);
SCM_VALIDATE_OPFPORT (2, port);
SCM_VALIDATE_INPUT_PORT (2, port);
/* if there's anything in the port buffers, use it, but then
don't touch the file descriptor. otherwise the
"return immediately if something is available" rule may
be violated. */
str = scm_i_string_start_writing (str);
dest = scm_i_string_writable_chars (str) + offset;
chars_read = scm_take_from_input_buffers (port, dest, read_len);
scm_i_string_stop_writing ();
fdes = SCM_FPORT_FDES (port);
}
if (chars_read == 0 && read_len > 0) /* don't confuse read_len == 0 with
EOF. */
{
str = scm_i_string_start_writing (str);
dest = scm_i_string_writable_chars (str) + offset;
SCM_SYSCALL (chars_read = read (fdes, dest, read_len));
scm_i_string_stop_writing ();
if (chars_read == -1)
{
if (SCM_EBLOCK (errno))
chars_read = 0;
else
SCM_SYSERROR;
}
else if (chars_read == 0)
{
scm_remember_upto_here_1 (str);
return SCM_BOOL_F;
}
}
scm_remember_upto_here_1 (str);
return scm_from_long (chars_read);
}
#undef FUNC_NAME
SCM_DEFINE (scm_write_string_partial, "write-string/partial", 1, 3, 0,
(SCM str, SCM port_or_fdes, SCM start, SCM end),
"Write characters from a string @var{str} to a port or file\n"
"descriptor. A port must have an underlying file descriptor\n"
"--- a so-called fport. This procedure is\n"
"scsh-compatible and can efficiently write large strings.\n"
"It will:\n\n"
"@itemize\n"
"@item\n"
"attempt to write the entire string, unless the @var{start}\n"
"and/or @var{end} arguments are supplied. i.e., @var{start}\n"
"defaults to 0 and @var{end} defaults to\n"
"@code{(string-length str)}\n"
"@item\n"
"use the current output port if @var{port_of_fdes} is not\n"
"supplied.\n"
"@item\n"
"in the case of a buffered port, store the characters in the\n"
"port's output buffer, if all will fit. If they will not fit\n"
"then any existing buffered characters will be flushed\n"
"before attempting\n"
"to write the new characters directly to the underlying file\n"
"descriptor. If the port is in non-blocking mode and\n"
"buffered characters can not be flushed immediately, then an\n"
"@code{EAGAIN} system-error exception will be raised (Note:\n"
"scsh does not support the use of non-blocking buffered ports.)\n"
"@item\n"
"write fewer than the requested number of\n"
"characters in some cases, e.g., if interrupted by a signal or\n"
"if not all of the output can be accepted immediately.\n"
"@item\n"
"wait indefinitely for at least one character\n"
"from @var{str} to be accepted by the port, unless the port is\n"
"in non-blocking mode.\n"
"@item\n"
"return the number of characters accepted by the port.\n"
"@item\n"
"return 0 if the port is in non-blocking mode and can not accept\n"
"at least one character from @var{str} immediately\n"
"@item\n"
"return 0 immediately if the request size is 0 bytes.\n"
"@end itemize")
#define FUNC_NAME s_scm_write_string_partial
{
const char *src;
scm_t_off write_len;
int fdes;
{
size_t offset;
size_t last;
SCM_VALIDATE_STRING (1, str);
src = scm_i_string_chars (str);
scm_i_get_substring_spec (scm_i_string_length (str),
start, &offset, end, &last);
src += offset;
write_len = last - offset;
}
if (write_len == 0)
return SCM_INUM0;
if (scm_is_integer (port_or_fdes))
fdes = scm_to_int (port_or_fdes);
else
{
SCM port = (SCM_UNBNDP (port_or_fdes)?
scm_current_output_port () : port_or_fdes);
scm_t_port *pt;
scm_t_off space;
SCM_VALIDATE_OPFPORT (2, port);
SCM_VALIDATE_OUTPUT_PORT (2, port);
pt = SCM_PTAB_ENTRY (port);
/* filling the last character in the buffer would require a flush. */
space = pt->write_end - pt->write_pos - 1;
if (space >= write_len)
{
memcpy (pt->write_pos, src, write_len);
pt->write_pos += write_len;
return scm_from_long (write_len);
}
if (pt->write_pos > pt->write_buf)
scm_flush (port);
fdes = SCM_FPORT_FDES (port);
}
{
long rv;
SCM_SYSCALL (rv = write (fdes, src, write_len));
if (rv == -1)
{
if (SCM_EBLOCK (errno))
rv = 0;
else
SCM_SYSERROR;
}
scm_remember_upto_here_1 (str);
return scm_from_long (rv);
}
}
#undef FUNC_NAME
SCM
scm_init_rw_builtins ()
{
#include "libguile/rw.x"
return SCM_UNSPECIFIED;
}
void
scm_init_rw ()
{
scm_c_define_gsubr ("%init-rw-builtins", 0, 0, 0, scm_init_rw_builtins);
}
/*
Local Variables:
c-file-style: "gnu"
End:
*/