1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-04-29 19:30:36 +02:00

Fix broken interaction between readline and Unicode

This requires separate small fixes.

Readline has internal logic to deal with multi-byte characters, so
it wants bytes, not characters.

scm_c_read gets called by the vm when readline is activated, and it was
truncating multi-byte characters because soft ports didn't have the
UCS-4 capability.

Soft ports need the capability to read UCS-4 characters.  Since soft ports
may have a single byte buffer, full characters need to be stored into the
pushback buffer.

This broke the optimizations in scm_c_read for using an alternate buffer
for single-byte-buffered ports, because the opimization wasn't expecting
anything in the pushback buffer.

* libguile/vports.c (sf_fill_input): store complete chars, not single bytes

* libguile/ports.c (scm_c_read): don't use optimized path for non Latin-1.
  Add debug prints.

* libguile/string.h: make scm_i_from_stringn and scm_i_string_ref public
  so that readline can use them

* guile-readline/readline.c: read bytes, not complete chars, from the
  input port.  Convert output to the output port's locale
This commit is contained in:
Michael Gran 2009-09-07 18:42:29 -07:00
parent eebff6d7f1
commit 7519234547
4 changed files with 42 additions and 14 deletions

View file

@ -128,6 +128,7 @@ rl_free_line_state ()
static int promptp;
static SCM input_port;
static SCM output_port;
static SCM before_read;
static int
@ -138,7 +139,7 @@ current_input_getc (FILE *in SCM_UNUSED)
scm_apply (before_read, SCM_EOL, SCM_EOL);
promptp = 0;
}
return scm_getc (input_port);
return scm_get_byte_or_eof (input_port);
}
static int in_readline = 0;
@ -255,7 +256,12 @@ internal_readline (SCM text)
promptp = 1;
s = readline (prompt);
if (s)
ret = scm_from_locale_string (s);
{
scm_t_port *pt = SCM_PTAB_ENTRY (output_port);
ret = scm_i_from_stringn (s, strlen (s), pt->encoding,
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
}
else
ret = SCM_EOF_VAL;
@ -311,6 +317,7 @@ scm_readline_init_ports (SCM inp, SCM outp)
}
input_port = inp;
output_port = outp;
#ifndef __MINGW32__
rl_instream = stream_from_fport (inp, "r", s_scm_readline);
rl_outstream = stream_from_fport (outp, "w", s_scm_readline);

View file

@ -1293,7 +1293,7 @@ scm_c_read (SCM port, void *buffer, size_t size)
requested number of bytes. (Note that a single scm_fill_input
call does not guarantee to fill the whole of the port's read
buffer.) */
if (pt->read_buf_size <= 1)
if (pt->read_buf_size <= 1 && pt->encoding == NULL)
{
/* The port that we are reading from is unbuffered - i.e. does
not have its own persistent buffer - but we have a buffer,
@ -1305,7 +1305,14 @@ scm_c_read (SCM port, void *buffer, size_t size)
We need to make sure that the port's normal (1 byte) buffer
is reinstated in case one of the scm_fill_input () calls
throws an exception; we use the scm_dynwind_* API to achieve
that. */
that.
A consequence of this optimization is that the fill_input
functions can't unget characters. That'll push data to the
pushback buffer instead of this psb buffer. */
#if SCM_DEBUG == 1
unsigned char *pback = pt->putback_buf;
#endif
psb.pt = pt;
psb.buffer = buffer;
psb.size = size;
@ -1320,8 +1327,15 @@ scm_c_read (SCM port, void *buffer, size_t size)
pt->read_buf_size -= (pt->read_end - pt->read_pos);
pt->read_pos = pt->read_buf = pt->read_end;
}
#if SCM_DEBUG == 1
if (pback != pt->putback_buf
|| pt->read_buf - (unsigned char *) buffer < 0)
scm_misc_error (FUNC_NAME,
"scm_c_read must not call a fill function that pushes "
"back characters onto an unbuffered port", SCM_EOL);
#endif
n_read += pt->read_buf - (unsigned char *) buffer;
/* Reinstate the port's normal buffer. */
scm_dynwind_end ();
}

View file

@ -111,7 +111,7 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
SCM_API SCM scm_i_from_stringn (const char *str, size_t len,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
@ -157,7 +157,7 @@ SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str);
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void);
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_API scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);

View file

@ -92,19 +92,26 @@ sf_fill_input (SCM port)
{
SCM p = SCM_PACK (SCM_STREAM (port));
SCM ans;
scm_t_port *pt;
ans = scm_call_0 (SCM_SIMPLE_VECTOR_REF (p, 3)); /* get char. */
if (scm_is_false (ans) || SCM_EOF_OBJECT_P (ans))
return EOF;
SCM_ASSERT (SCM_CHARP (ans), ans, SCM_ARG1, "sf_fill_input");
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
pt = SCM_PTAB_ENTRY (port);
*pt->read_buf = SCM_CHAR (ans);
pt->read_pos = pt->read_buf;
pt->read_end = pt->read_buf + 1;
return *pt->read_buf;
}
if (pt->encoding == NULL)
{
scm_t_port *pt = SCM_PTAB_ENTRY (port);
*pt->read_buf = SCM_CHAR (ans);
pt->read_pos = pt->read_buf;
pt->read_end = pt->read_buf + 1;
return *pt->read_buf;
}
else
scm_ungetc (SCM_CHAR (ans), port);
return SCM_CHAR (ans);
}