mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-01 20:30:28 +02:00
* libguile/ports-internal.h (struct scm_port_internal): Add new members 'at_stream_start_for_bom_read' and 'at_stream_start_for_bom_write'. (SCM_UNICODE_BOM): New macro. (scm_i_port_iconv_descriptors): Add 'mode' parameter to prototype. * libguile/ports.c (scm_new_port_table_entry): Initialize 'at_stream_start_for_bom_read' and 'at_stream_start_for_bom_write'. (get_iconv_codepoint): Pass new 'mode' parameter to 'scm_i_port_iconv_descriptors'. (get_codepoint): After reading a codepoint at stream start, record that we're no longer at stream start, and consume a BOM where appropriate. (scm_seek): Set the stream start flags according to the new position. (looking_at_bytes): New static function. (scm_utf8_bom, scm_utf16be_bom, scm_utf16le_bom, scm_utf32be_bom, scm_utf32le_bom): New static const arrays. (decide_utf16_encoding, decide_utf32_encoding): New static functions. (scm_i_port_iconv_descriptors): Add new 'mode' parameter. If the specified encoding is UTF-16 or UTF-32, make that precise by deciding what byte order to use, and construct iconv descriptors based on the precise encoding. (scm_i_set_port_encoding_x): Record that we are now at stream start. Do not open the new iconv descriptors immediately; let them be initialized lazily. * libguile/print.c (display_string_using_iconv): Record that we're no longer at stream start. Write a BOM if appropriate. * doc/ref/api-io.texi (BOM Handling): New node. * test-suite/tests/ports.test ("set-port-encoding!, wrong encoding"): Adapt test to cope with the fact that 'set-port-encoding!' does not immediately open the iconv descriptors. (bv-read-test): New procedure. ("unicode byte-order marks (BOMs)"): New test prefix.
67 lines
2.1 KiB
C
67 lines
2.1 KiB
C
/*
|
|
* ports-internal.h - internal-only declarations for ports.
|
|
*
|
|
* Copyright (C) 2013 Free Software Foundation, Inc.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation; either version 3 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
* 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef SCM_PORTS_INTERNAL
|
|
#define SCM_PORTS_INTERNAL
|
|
|
|
#include "libguile/_scm.h"
|
|
#include "libguile/ports.h"
|
|
|
|
enum scm_port_encoding_mode {
|
|
SCM_PORT_ENCODING_MODE_UTF8,
|
|
SCM_PORT_ENCODING_MODE_ICONV
|
|
};
|
|
|
|
typedef enum scm_port_encoding_mode scm_t_port_encoding_mode;
|
|
|
|
/* This is a separate object so that only those ports that use iconv
|
|
cause finalizers to be registered (FIXME: although currently in 2.0
|
|
finalizers are always registered for ports anyway). */
|
|
struct scm_iconv_descriptors
|
|
{
|
|
/* input/output iconv conversion descriptors */
|
|
void *input_cd;
|
|
void *output_cd;
|
|
};
|
|
|
|
typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
|
|
|
|
struct scm_port_internal
|
|
{
|
|
unsigned at_stream_start_for_bom_read : 1;
|
|
unsigned at_stream_start_for_bom_write : 1;
|
|
scm_t_port_encoding_mode encoding_mode;
|
|
scm_t_iconv_descriptors *iconv_descriptors;
|
|
int pending_eof;
|
|
SCM alist;
|
|
};
|
|
|
|
typedef struct scm_port_internal scm_t_port_internal;
|
|
|
|
#define SCM_UNICODE_BOM 0xFEFFUL /* Unicode byte-order mark */
|
|
|
|
#define SCM_PORT_GET_INTERNAL(x) \
|
|
((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
|
|
|
|
SCM_INTERNAL scm_t_iconv_descriptors *
|
|
scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode);
|
|
|
|
#endif
|