mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-04-29 19:30:36 +02:00
Fix broken interaction between readline and Unicode
This requires separate small fixes. Readline has internal logic to deal with multi-byte characters, so it wants bytes, not characters. scm_c_read gets called by the vm when readline is activated, and it was truncating multi-byte characters because soft ports didn't have the UCS-4 capability. Soft ports need the capability to read UCS-4 characters. Since soft ports may have a single byte buffer, full characters need to be stored into the pushback buffer. This broke the optimizations in scm_c_read for using an alternate buffer for single-byte-buffered ports, because the opimization wasn't expecting anything in the pushback buffer. * libguile/vports.c (sf_fill_input): store complete chars, not single bytes * libguile/ports.c (scm_c_read): don't use optimized path for non Latin-1. Add debug prints. * libguile/string.h: make scm_i_from_stringn and scm_i_string_ref public so that readline can use them * guile-readline/readline.c: read bytes, not complete chars, from the input port. Convert output to the output port's locale
This commit is contained in:
parent
eebff6d7f1
commit
7519234547
4 changed files with 42 additions and 14 deletions
|
@ -128,6 +128,7 @@ rl_free_line_state ()
|
|||
|
||||
static int promptp;
|
||||
static SCM input_port;
|
||||
static SCM output_port;
|
||||
static SCM before_read;
|
||||
|
||||
static int
|
||||
|
@ -138,7 +139,7 @@ current_input_getc (FILE *in SCM_UNUSED)
|
|||
scm_apply (before_read, SCM_EOL, SCM_EOL);
|
||||
promptp = 0;
|
||||
}
|
||||
return scm_getc (input_port);
|
||||
return scm_get_byte_or_eof (input_port);
|
||||
}
|
||||
|
||||
static int in_readline = 0;
|
||||
|
@ -255,7 +256,12 @@ internal_readline (SCM text)
|
|||
promptp = 1;
|
||||
s = readline (prompt);
|
||||
if (s)
|
||||
ret = scm_from_locale_string (s);
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (output_port);
|
||||
|
||||
ret = scm_i_from_stringn (s, strlen (s), pt->encoding,
|
||||
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
|
||||
}
|
||||
else
|
||||
ret = SCM_EOF_VAL;
|
||||
|
||||
|
@ -311,6 +317,7 @@ scm_readline_init_ports (SCM inp, SCM outp)
|
|||
}
|
||||
|
||||
input_port = inp;
|
||||
output_port = outp;
|
||||
#ifndef __MINGW32__
|
||||
rl_instream = stream_from_fport (inp, "r", s_scm_readline);
|
||||
rl_outstream = stream_from_fport (outp, "w", s_scm_readline);
|
||||
|
|
|
@ -1293,7 +1293,7 @@ scm_c_read (SCM port, void *buffer, size_t size)
|
|||
requested number of bytes. (Note that a single scm_fill_input
|
||||
call does not guarantee to fill the whole of the port's read
|
||||
buffer.) */
|
||||
if (pt->read_buf_size <= 1)
|
||||
if (pt->read_buf_size <= 1 && pt->encoding == NULL)
|
||||
{
|
||||
/* The port that we are reading from is unbuffered - i.e. does
|
||||
not have its own persistent buffer - but we have a buffer,
|
||||
|
@ -1305,7 +1305,14 @@ scm_c_read (SCM port, void *buffer, size_t size)
|
|||
We need to make sure that the port's normal (1 byte) buffer
|
||||
is reinstated in case one of the scm_fill_input () calls
|
||||
throws an exception; we use the scm_dynwind_* API to achieve
|
||||
that. */
|
||||
that.
|
||||
|
||||
A consequence of this optimization is that the fill_input
|
||||
functions can't unget characters. That'll push data to the
|
||||
pushback buffer instead of this psb buffer. */
|
||||
#if SCM_DEBUG == 1
|
||||
unsigned char *pback = pt->putback_buf;
|
||||
#endif
|
||||
psb.pt = pt;
|
||||
psb.buffer = buffer;
|
||||
psb.size = size;
|
||||
|
@ -1320,8 +1327,15 @@ scm_c_read (SCM port, void *buffer, size_t size)
|
|||
pt->read_buf_size -= (pt->read_end - pt->read_pos);
|
||||
pt->read_pos = pt->read_buf = pt->read_end;
|
||||
}
|
||||
#if SCM_DEBUG == 1
|
||||
if (pback != pt->putback_buf
|
||||
|| pt->read_buf - (unsigned char *) buffer < 0)
|
||||
scm_misc_error (FUNC_NAME,
|
||||
"scm_c_read must not call a fill function that pushes "
|
||||
"back characters onto an unbuffered port", SCM_EOL);
|
||||
#endif
|
||||
n_read += pt->read_buf - (unsigned char *) buffer;
|
||||
|
||||
|
||||
/* Reinstate the port's normal buffer. */
|
||||
scm_dynwind_end ();
|
||||
}
|
||||
|
|
|
@ -111,7 +111,7 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
|
|||
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
|
||||
SCM_API SCM scm_string_append (SCM args);
|
||||
|
||||
SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
|
||||
SCM_API SCM scm_i_from_stringn (const char *str, size_t len,
|
||||
const char *encoding,
|
||||
scm_t_string_failed_conversion_handler
|
||||
handler);
|
||||
|
@ -157,7 +157,7 @@ SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str);
|
|||
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
|
||||
SCM_INTERNAL void scm_i_string_stop_writing (void);
|
||||
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
|
||||
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
|
||||
SCM_API scm_t_wchar scm_i_string_ref (SCM str, size_t x);
|
||||
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
|
||||
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
|
||||
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
|
||||
|
|
|
@ -92,19 +92,26 @@ sf_fill_input (SCM port)
|
|||
{
|
||||
SCM p = SCM_PACK (SCM_STREAM (port));
|
||||
SCM ans;
|
||||
scm_t_port *pt;
|
||||
|
||||
ans = scm_call_0 (SCM_SIMPLE_VECTOR_REF (p, 3)); /* get char. */
|
||||
if (scm_is_false (ans) || SCM_EOF_OBJECT_P (ans))
|
||||
return EOF;
|
||||
SCM_ASSERT (SCM_CHARP (ans), ans, SCM_ARG1, "sf_fill_input");
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
pt = SCM_PTAB_ENTRY (port);
|
||||
|
||||
*pt->read_buf = SCM_CHAR (ans);
|
||||
pt->read_pos = pt->read_buf;
|
||||
pt->read_end = pt->read_buf + 1;
|
||||
return *pt->read_buf;
|
||||
}
|
||||
if (pt->encoding == NULL)
|
||||
{
|
||||
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
||||
|
||||
*pt->read_buf = SCM_CHAR (ans);
|
||||
pt->read_pos = pt->read_buf;
|
||||
pt->read_end = pt->read_buf + 1;
|
||||
return *pt->read_buf;
|
||||
}
|
||||
else
|
||||
scm_ungetc (SCM_CHAR (ans), port);
|
||||
return SCM_CHAR (ans);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue