1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-02 21:10:27 +02:00
guile/libguile/ports-internal.h
Mark H Weaver cdd3d6c9f4 Improve handling of Unicode byte-order marks (BOMs).
* libguile/ports-internal.h (struct scm_port_internal): Add new members
  'at_stream_start_for_bom_read' and 'at_stream_start_for_bom_write'.
  (SCM_UNICODE_BOM): New macro.
  (scm_i_port_iconv_descriptors): Add 'mode' parameter to prototype.

* libguile/ports.c (scm_new_port_table_entry): Initialize
  'at_stream_start_for_bom_read' and 'at_stream_start_for_bom_write'.
  (get_iconv_codepoint): Pass new 'mode' parameter to
  'scm_i_port_iconv_descriptors'.
  (get_codepoint): After reading a codepoint at stream start, record
  that we're no longer at stream start, and consume a BOM where
  appropriate.
  (scm_seek): Set the stream start flags according to the new position.
  (looking_at_bytes): New static function.
  (scm_utf8_bom, scm_utf16be_bom, scm_utf16le_bom, scm_utf32be_bom,
  scm_utf32le_bom): New static const arrays.
  (decide_utf16_encoding, decide_utf32_encoding): New static functions.
  (scm_i_port_iconv_descriptors): Add new 'mode' parameter.  If the
  specified encoding is UTF-16 or UTF-32, make that precise by deciding
  what byte order to use, and construct iconv descriptors based on the
  precise encoding.
  (scm_i_set_port_encoding_x): Record that we are now at stream start.
  Do not open the new iconv descriptors immediately; let them be
  initialized lazily.

* libguile/print.c (display_string_using_iconv): Record that we're no
  longer at stream start.  Write a BOM if appropriate.

* doc/ref/api-io.texi (BOM Handling): New node.

* test-suite/tests/ports.test ("set-port-encoding!, wrong encoding"):
  Adapt test to cope with the fact that 'set-port-encoding!' does not
  immediately open the iconv descriptors.
  (bv-read-test): New procedure.
  ("unicode byte-order marks (BOMs)"): New test prefix.
2013-04-04 21:40:28 -04:00

67 lines
2.1 KiB
C

/*
* ports-internal.h - internal-only declarations for ports.
*
* Copyright (C) 2013 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#ifndef SCM_PORTS_INTERNAL
#define SCM_PORTS_INTERNAL
#include "libguile/_scm.h"
#include "libguile/ports.h"
enum scm_port_encoding_mode {
SCM_PORT_ENCODING_MODE_UTF8,
SCM_PORT_ENCODING_MODE_ICONV
};
typedef enum scm_port_encoding_mode scm_t_port_encoding_mode;
/* This is a separate object so that only those ports that use iconv
cause finalizers to be registered (FIXME: although currently in 2.0
finalizers are always registered for ports anyway). */
struct scm_iconv_descriptors
{
/* input/output iconv conversion descriptors */
void *input_cd;
void *output_cd;
};
typedef struct scm_iconv_descriptors scm_t_iconv_descriptors;
struct scm_port_internal
{
unsigned at_stream_start_for_bom_read : 1;
unsigned at_stream_start_for_bom_write : 1;
scm_t_port_encoding_mode encoding_mode;
scm_t_iconv_descriptors *iconv_descriptors;
int pending_eof;
SCM alist;
};
typedef struct scm_port_internal scm_t_port_internal;
#define SCM_UNICODE_BOM 0xFEFFUL /* Unicode byte-order mark */
#define SCM_PORT_GET_INTERNAL(x) \
((scm_t_port_internal *) (SCM_PTAB_ENTRY(x)->input_cd))
SCM_INTERNAL scm_t_iconv_descriptors *
scm_i_port_iconv_descriptors (SCM port, scm_t_port_rw_active mode);
#endif