From b662b7e971423934b897f925ccc3061fc640e996 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Fri, 4 May 2012 22:36:27 +0200 Subject: [PATCH 01/12] Simplify the reader's `read_complete_token'. * libguile/read.c (read_token): Remove unneeded `const' before `size_t'. (read_complete_token): Remove `overflow_buffer' parameter; return `char *' instead of `int'. Allocate the overflow buffer with `scm_gc_malloc_pointerless' instead of `scm_malloc'. Return either the overflow buffer or BUFFER. (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix): Rename `buffer' to `local_buffer', and `overflow_buffer' to `buffer'. Remove `overflow'. Adjust code to new `read_complete_token'. --- libguile/read.c | 100 +++++++++++++++++------------------------------- 1 file changed, 35 insertions(+), 65 deletions(-) diff --git a/libguile/read.c b/libguile/read.c index bbaf3f63e..6ec38f35b 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -208,8 +208,8 @@ static SCM scm_get_hash_procedure (int); fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of bytes actually read. */ static int -read_token (SCM port, char *buf, const size_t buf_size, size_t *read) - { +read_token (SCM port, char *buf, size_t buf_size, size_t *read) +{ *read = 0; while (*read < buf_size) @@ -235,20 +235,15 @@ read_token (SCM port, char *buf, const size_t buf_size, size_t *read) return 1; } -/* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the - result in the pre-allocated buffer BUFFER, if the whole token has fewer than - BUFFER_SIZE bytes, or into OVERFLOW_BUFFER, allocated here to be freed by the - caller. Return zero if the token fits in BUFFER, non-zero otherwise. READ - will be set the number of bytes actually read. */ -static int -read_complete_token (SCM port, char *buffer, const size_t buffer_size, - char **overflow_buffer, size_t *read) +/* Like `read_token', but return either BUFFER, or a GC-allocated buffer + if the token doesn't fit in BUFFER_SIZE bytes. */ +static char * +read_complete_token (SCM port, char *buffer, size_t buffer_size, + size_t *read) { int overflow = 0; - size_t bytes_read, overflow_size; - - *overflow_buffer = NULL; - overflow_size = 0; + size_t bytes_read, overflow_size = 0; + char *overflow_buffer = NULL; do { @@ -259,14 +254,19 @@ read_complete_token (SCM port, char *buffer, const size_t buffer_size, { if (overflow_size == 0) { - *overflow_buffer = scm_malloc (bytes_read); - memcpy (*overflow_buffer, buffer, bytes_read); + overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read"); + memcpy (overflow_buffer, buffer, bytes_read); overflow_size = bytes_read; } else { - *overflow_buffer = scm_realloc (*overflow_buffer, overflow_size + bytes_read); - memcpy (*overflow_buffer + overflow_size, buffer, bytes_read); + void *new_buf = + scm_gc_malloc_pointerless (overflow_size + bytes_read, "read"); + + memcpy (new_buf, overflow_buffer, overflow_size); + memcpy (new_buf + overflow_size, buffer, bytes_read); + + overflow_buffer = new_buf; overflow_size += bytes_read; } } @@ -278,7 +278,7 @@ read_complete_token (SCM port, char *buffer, const size_t buffer_size, else *read = bytes_read; - return (overflow_size != 0); + return (overflow_size > 0 ? overflow_buffer : buffer); } /* Skip whitespace from PORT and return the first non-whitespace character @@ -594,10 +594,8 @@ static SCM scm_read_number (scm_t_wchar chr, SCM port) { SCM result, str = SCM_EOL; - char buffer[READER_BUFFER_SIZE]; - char *overflow_buffer = NULL; + char local_buffer[READER_BUFFER_SIZE], *buffer; size_t bytes_read; - int overflow; scm_t_port *pt = SCM_PTAB_ENTRY (port); /* Need to capture line and column numbers here. */ @@ -605,14 +603,10 @@ scm_read_number (scm_t_wchar chr, SCM port) int column = SCM_COL (port) - 1; scm_ungetc (chr, port); - overflow = read_complete_token (port, buffer, sizeof (buffer), - &overflow_buffer, &bytes_read); + buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + &bytes_read); - if (!overflow) - str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); - else - str = scm_from_stringn (overflow_buffer, bytes_read, pt->encoding, - pt->ilseq_handler); + str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); result = scm_string_to_number (str, SCM_UNDEFINED); if (scm_is_false (result)) @@ -625,8 +619,6 @@ scm_read_number (scm_t_wchar chr, SCM port) else if (SCM_NIMP (result)) result = maybe_annotate_source (result, port, line, column); - if (overflow) - free (overflow_buffer); SCM_COL (port) += scm_i_string_length (str); return result; } @@ -638,29 +630,20 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) int ends_with_colon = 0; size_t bytes_read; int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix); - int overflow; - char buffer[READER_BUFFER_SIZE], *overflow_buffer; + char local_buffer[READER_BUFFER_SIZE], *buffer; scm_t_port *pt = SCM_PTAB_ENTRY (port); SCM str; scm_ungetc (chr, port); - overflow = read_complete_token (port, buffer, READER_BUFFER_SIZE, - &overflow_buffer, &bytes_read); + buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + &bytes_read); if (bytes_read > 0) - { - if (!overflow) - ends_with_colon = buffer[bytes_read - 1] == ':'; - else - ends_with_colon = overflow_buffer[bytes_read - 1] == ':'; - } + ends_with_colon = buffer[bytes_read - 1] == ':'; if (postfix && ends_with_colon && (bytes_read > 1)) { - if (!overflow) - str = scm_from_stringn (buffer, bytes_read - 1, pt->encoding, pt->ilseq_handler); - else - str = scm_from_stringn (overflow_buffer, bytes_read - 1, pt->encoding, - pt->ilseq_handler); + str = scm_from_stringn (buffer, bytes_read - 1, + pt->encoding, pt->ilseq_handler); if (SCM_CASE_INSENSITIVE_P) str = scm_string_downcase_x (str); @@ -668,19 +651,14 @@ scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port) } else { - if (!overflow) - str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler); - else - str = scm_from_stringn (overflow_buffer, bytes_read, pt->encoding, - pt->ilseq_handler); + str = scm_from_stringn (buffer, bytes_read, + pt->encoding, pt->ilseq_handler); if (SCM_CASE_INSENSITIVE_P) str = scm_string_downcase_x (str); result = scm_string_to_symbol (str); } - if (overflow) - free (overflow_buffer); SCM_COL (port) += scm_i_string_length (str); return result; } @@ -691,8 +669,7 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port) { SCM result; size_t read; - char buffer[READER_BUFFER_SIZE], *overflow_buffer; - int overflow; + char local_buffer[READER_BUFFER_SIZE], *buffer; unsigned int radix; SCM str; scm_t_port *pt; @@ -725,21 +702,14 @@ scm_read_number_and_radix (scm_t_wchar chr, SCM port) radix = 10; } - overflow = read_complete_token (port, buffer, sizeof (buffer), - &overflow_buffer, &read); + buffer = read_complete_token (port, local_buffer, sizeof local_buffer, + &read); pt = SCM_PTAB_ENTRY (port); - if (!overflow) - str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler); - else - str = scm_from_stringn (overflow_buffer, read, pt->encoding, - pt->ilseq_handler); + str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler); result = scm_string_to_number (str, scm_from_uint (radix)); - if (overflow) - free (overflow_buffer); - SCM_COL (port) += scm_i_string_length (str); if (scm_is_true (result)) From e9c898bf24c2faf86d3d2f61361bc52ff3abc8b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Sat, 5 May 2012 00:44:54 +0200 Subject: [PATCH 02/12] Correct typing of assertion in the `wind' instruction. * libguile/vm-i-system.c (wind): Check `scm_to_bool (scm_thunk_p (x))' instead of `scm_thunk_p'. --- libguile/vm-i-system.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c index 3ac0097b4..2399fa065 100644 --- a/libguile/vm-i-system.c +++ b/libguile/vm-i-system.c @@ -1570,8 +1570,10 @@ VM_DEFINE_INSTRUCTION (86, wind, "wind", 0, 2, 0) /* Push wind and unwind procedures onto the dynamic stack. Note that neither are actually called; the compiler should emit calls to wind and unwind for the normal dynamic-wind control flow. */ - VM_ASSERT (scm_thunk_p (wind), vm_error_not_a_thunk ("dynamic-wind", wind)); - VM_ASSERT (scm_thunk_p (unwind), vm_error_not_a_thunk ("dynamic-wind", unwind)); + VM_ASSERT (scm_to_bool (scm_thunk_p (wind)), + vm_error_not_a_thunk ("dynamic-wind", wind)); + VM_ASSERT (scm_to_bool (scm_thunk_p (unwind)), + vm_error_not_a_thunk ("dynamic-wind", unwind)); scm_i_set_dynwinds (scm_cons (scm_cons (wind, unwind), scm_i_dynwinds ())); NEXT; } From 82171a2ea4d81d1dd2f71142ed6021ab383d836b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Sun, 6 May 2012 22:23:12 +0200 Subject: [PATCH 03/12] Build with `-Wpointer-arith' when available. * configure.ac (POTENTIAL_GCC_CFLAGS): Add `-Wpointer-arith'. --- configure.ac | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/configure.ac b/configure.ac index c506b392e..3c117d3fd 100644 --- a/configure.ac +++ b/configure.ac @@ -1495,8 +1495,6 @@ case "$GCC" in ## We had -Wstrict-prototypes in here for a bit, but Guile does too ## much stuff with generic function pointers for that to really be ## less than exasperating. - ## -Wpointer-arith was here too, but something changed in gcc/glibc - ## and it became equally exasperating (gcc 2.95 and/or glibc 2.1.2). ## -Wundef was removed because Gnulib prevented it (see ## .) @@ -1505,7 +1503,7 @@ case "$GCC" in ## . POTENTIAL_GCC_CFLAGS="-Wall -Wmissing-prototypes \ - -Wdeclaration-after-statement \ + -Wdeclaration-after-statement -Wpointer-arith \ -Wswitch-enum -fno-strict-aliasing" # Do this here so we don't screw up any of the tests above that might # not be "warning free" From 7be3c2fcbfe2335d069a5c13b0ddf74b69383c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Sun, 6 May 2012 22:23:58 +0200 Subject: [PATCH 04/12] read: Avoid `void *' pointer arithmetic. * libguile/read.c (read_complete_token): Make `new_buf' a `char *' to avoid pointer arithmetic on `void *'. --- libguile/read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libguile/read.c b/libguile/read.c index 6ec38f35b..12b4c56d8 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -260,7 +260,7 @@ read_complete_token (SCM port, char *buffer, size_t buffer_size, } else { - void *new_buf = + char *new_buf = scm_gc_malloc_pointerless (overflow_size + bytes_read, "read"); memcpy (new_buf, overflow_buffer, overflow_size); From 5bbd632fc36b14f59d51e4ba2d8e189fd3cc0f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Sun, 6 May 2012 22:24:47 +0200 Subject: [PATCH 05/12] Make `scm_unget_byte' public. * libguile/ports.h (scm_unget_byte): Make `SCM_API' instead of `SCM_INTERNAL'. --- libguile/ports.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libguile/ports.h b/libguile/ports.h index fcf1424cc..bd80c39a2 100644 --- a/libguile/ports.h +++ b/libguile/ports.h @@ -4,7 +4,7 @@ #define SCM_PORTS_H /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, - * 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + * 2006, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -288,7 +288,7 @@ SCM_INTERNAL void scm_lfwrite_substr (SCM str, size_t start, size_t end, SCM_API void scm_flush (SCM port); SCM_API void scm_end_input (SCM port); SCM_API int scm_fill_input (SCM port); -SCM_INTERNAL void scm_unget_byte (int c, SCM port); +SCM_API void scm_unget_byte (int c, SCM port); SCM_API void scm_ungetc (scm_t_wchar c, SCM port); SCM_API void scm_ungets (const char *s, int n, SCM port); SCM_API SCM scm_peek_char (SCM port); From ff4d3672757fec3c8509e26bc60abf95f9e8f51a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Mon, 7 May 2012 00:32:01 +0200 Subject: [PATCH 06/12] Optimize `scm_read_string'. According to the new benchmarks, this leads a 5% speed improvement when reading small strings, and a 27% improvement when reading large strings. * libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update comment to mention codepoints. (scm_read_string): Make `str' a list of strings, instead of a string. Store characters read in buffer `c_str'. Cons to STR when C_STR is full, and concatenate/reverse at the end. * benchmark-suite/benchmarks/read.bm (small, large): New variables. Set %DEFAULT-PORT-ENCODING to "UTF-8". ("read")["small strings", "large strings"]: New benchmarks. --- benchmark-suite/benchmarks/read.bm | 15 +++++++++-- libguile/read.c | 42 ++++++++++++++++++------------ 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/benchmark-suite/benchmarks/read.bm b/benchmark-suite/benchmarks/read.bm index e5cf7de93..f0b25f541 100644 --- a/benchmark-suite/benchmarks/read.bm +++ b/benchmark-suite/benchmarks/read.bm @@ -1,6 +1,6 @@ ;;; read.bm --- Exercise the reader. -*- Scheme -*- ;;; -;;; Copyright (C) 2008, 2010 Free Software Foundation, Inc. +;;; Copyright (C) 2008, 2010, 2012 Free Software Foundation, Inc. ;;; ;;; This program is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Lesser General Public License @@ -43,6 +43,11 @@ (load-file-with-reader file read buffering)) %files-to-load)) +(define small "\"hello, world!\"") +(define large (string-append "\"" (make-string 1234 #\A) "\"")) + +(fluid-set! %default-port-encoding "UTF-8") ; for string ports + (with-benchmark-prefix "read" @@ -59,4 +64,10 @@ (exercise-read (list _IOFBF 8192))) (benchmark "_IOFBF 16384" 10 - (exercise-read (list _IOFBF 16384)))) + (exercise-read (list _IOFBF 16384))) + + (benchmark "small strings" 100000 + (call-with-input-string small read)) + + (benchmark "large strings" 100000 + (call-with-input-string large read))) diff --git a/libguile/read.c b/libguile/read.c index 12b4c56d8..87d73bfbe 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -161,8 +161,8 @@ scm_i_read_hash_procedures_set_x (SCM value) /* Size of the C buffer used to read symbols and numbers. */ #define READER_BUFFER_SIZE 128 -/* Size of the C buffer used to read strings. */ -#define READER_STRING_BUFFER_SIZE 512 +/* Number of 32-bit codepoints in the buffer used to read strings. */ +#define READER_STRING_BUFFER_SIZE 128 /* The maximum size of Scheme character names. */ #define READER_CHAR_NAME_MAX_SIZE 50 @@ -493,15 +493,14 @@ scm_read_string (int chr, SCM port) /* For strings smaller than C_STR, this function creates only one Scheme object (the string returned). */ - SCM str = SCM_BOOL_F; - unsigned c_str_len = 0; - scm_t_wchar c; + SCM str = SCM_EOL; + size_t c_str_len = 0; + scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE]; /* Need to capture line and column numbers here. */ long line = SCM_LINUM (port); int column = SCM_COL (port) - 1; - str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0); while ('"' != (c = scm_getc (port))) { if (c == EOF) @@ -511,12 +510,11 @@ scm_read_string (int chr, SCM port) "end of file in string constant", SCM_EOL); } - if (c_str_len + 1 >= scm_i_string_length (str)) - { - SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL, 0); - - str = scm_string_append (scm_list_2 (str, addy)); - } + if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE) + { + str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str); + c_str_len = 0; + } if (c == '\\') { @@ -580,12 +578,22 @@ scm_read_string (int chr, SCM port) scm_list_1 (SCM_MAKE_CHAR (c))); } } - str = scm_i_string_start_writing (str); - scm_i_string_set_x (str, c_str_len++, c); - scm_i_string_stop_writing (); + + c_str[c_str_len++] = c; } - return maybe_annotate_source (scm_i_substring_copy (str, 0, c_str_len), - port, line, column); + + if (scm_is_null (str)) + /* Fast path: we got a string that fits in C_STR. */ + str = scm_from_utf32_stringn (c_str, c_str_len); + else + { + if (c_str_len > 0) + str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str); + + str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED); + } + + return maybe_annotate_source (str, port, line, column); } #undef FUNC_NAME From 4cec6c221aef72825a05963c95eb633af9a43fcf Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Wed, 2 May 2012 12:59:11 +0200 Subject: [PATCH 07/12] Add a missing SYNC_ALL in variable-ref * libguile/vm-i-system.c (variable-ref): Add a missing SYNC_ALL. --- libguile/vm-i-system.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c index 2399fa065..b8c18f077 100644 --- a/libguile/vm-i-system.c +++ b/libguile/vm-i-system.c @@ -310,6 +310,7 @@ VM_DEFINE_INSTRUCTION (25, variable_ref, "variable-ref", 0, 1, 1) { SCM var_name; + SYNC_ALL (); /* Attempt to provide the variable name in the error message. */ var_name = scm_module_reverse_lookup (scm_current_module (), x); vm_error_unbound (program, scm_is_true (var_name) ? var_name : x); From be52f329b68e5427c25247d0d97d8dfef79e7820 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 7 May 2012 20:18:56 +0200 Subject: [PATCH 08/12] add support for texinfo parsed arguments, like @acronym * module/texinfo.scm (texi-command-specs): Add a new kind of texinfo command, inline-text-args, a sort of a cross between inline-args, which are unparsed, and inline-text, which is. Perhaps this should supersede inline-args at some point. In any case, add acronym as an inline-text-args element. (inline-content?, arguments->attlist, complete-start-command) (parse-inline-text-args, make-dom-parser): Adapt for inline-text-args. * module/texinfo/serialize.scm (inline-text-args): Add serialization for @acronym. * test-suite/tests/texinfo.test ("test-texinfo->stexinfo"): Add some tests. --- module/texinfo.scm | 62 ++++++++++++++++++++++++++++++----- module/texinfo/serialize.scm | 17 +++++++++- test-suite/tests/texinfo.test | 20 +++++++++-- 3 files changed, 87 insertions(+), 12 deletions(-) diff --git a/module/texinfo.scm b/module/texinfo.scm index 8798eb3c1..2ffd85393 100644 --- a/module/texinfo.scm +++ b/module/texinfo.scm @@ -1,6 +1,6 @@ ;;;; (texinfo) -- parsing of texinfo into SXML ;;;; -;;;; Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc. ;;;; Copyright (C) 2004, 2009 Andy Wingo ;;;; Copyright (C) 2001,2002 Oleg Kiselyov ;;;; @@ -128,6 +128,8 @@ Parsed arguments until end of line Unparsed arguments ending with @code{#\\@}} @item INLINE-TEXT Parsed arguments ending with @code{#\\@}} +@item INLINE-TEXT-ARGS +Parsed arguments ending with @code{#\\@}} @item ENVIRON The tag is an environment tag, expect @code{@@end foo}. @item TABLE-ENVIRON @@ -169,7 +171,7 @@ entry. @item args Named arguments to the command, in the same format as the formals for a lambda. Only present for @code{INLINE-ARGS}, @code{EOL-ARGS}, -@code{ENVIRON}, @code{TABLE-ENVIRON} commands. +@code{INLINE-TEXT-ARGS}, @code{ENVIRON}, @code{TABLE-ENVIRON} commands. @end table" '(;; Special commands (include #f) ;; this is a low-level token @@ -224,6 +226,9 @@ lambda. Only present for @code{INLINE-ARGS}, @code{EOL-ARGS}, (tie INLINE-ARGS . ()) (image INLINE-ARGS . (file #:opt width height alt-text extension)) + ;; Inline parsed args commands + (acronym INLINE-TEXT-ARGS . (acronym #:opt meaning)) + ;; EOL args elements (node EOL-ARGS . (name #:opt next previous up)) (c EOL-ARGS . all) @@ -383,7 +388,9 @@ Examples: (parser-error #f "Unknown command" command))) (define (inline-content? content) - (or (eq? content 'INLINE-TEXT) (eq? content 'INLINE-ARGS))) + (case content + ((INLINE-TEXT INLINE-ARGS INLINE-TEXT-ARGS) #t) + (else #f))) ;;======================================================================== @@ -572,6 +579,7 @@ Examples: ;; Content model Port position ;; ============= ============= ;; INLINE-TEXT One character after the #\{. +;; INLINE-TEXT-ARGS One character after the #\{. ;; INLINE-ARGS The first character after the #\}. ;; EOL-TEXT The first non-whitespace character after the command. ;; ENVIRON, TABLE-ENVIRON, EOL-ARGS, EOL-TEXT @@ -599,7 +607,9 @@ Examples: (car names)))) (else (loop (cdr in) (cdr names) opt? - (cons (list (car names) (car in)) out)))))) + (acons (car names) + (if (list? (car in)) (car in) (list (car in))) + out)))))) (define (parse-table-args command port) (let* ((line (string-trim-both (read-text-line port))) @@ -648,6 +658,9 @@ Examples: ((INLINE-ARGS) (assert-curr-char '(#\{) "Inline element lacks {" port) (values command (get-arguments type arg-names #\}) type)) + ((INLINE-TEXT-ARGS) + (assert-curr-char '(#\{) "Inline element lacks {" port) + (values command '() type)) ((EOL-ARGS) (values command (get-arguments type arg-names #\newline) type)) ((ENVIRON ENTRY INDEX) @@ -998,15 +1011,48 @@ Examples: (cons (apply string-append strs) result)))) '() #t))))))) +(define (parse-inline-text-args port spec text) + (let lp ((in text) (cur '()) (out '())) + (cond + ((null? in) + (if (and (pair? cur) + (string? (car cur)) + (string-whitespace? (car cur))) + (lp in (cdr cur) out) + (let ((args (reverse (if (null? cur) + out + (cons (reverse cur) out))))) + (arguments->attlist port args (cddr spec))))) + ((pair? (car in)) + (lp (cdr in) (cons (car in) cur) out)) + ((string-index (car in) #\,) + (let* ((parts (string-split (car in) #\,)) + (head (string-trim-right (car parts))) + (rev-tail (reverse (cdr parts))) + (last (string-trim (car rev-tail)))) + (lp (cdr in) + (if (string-null? last) cur (cons last cur)) + (append (cdr rev-tail) + (cons (reverse (if (string-null? head) cur (cons head cur))) + out))))) + (else + (lp (cdr in) + (cons (if (null? cur) (string-trim (car in)) (car in)) cur) + out))))) + (define (make-dom-parser) (make-command-parser (lambda (command args content seed) ; fdown '()) (lambda (command args parent-seed seed) ; fup - (let ((seed (reverse-collect-str-drop-ws seed))) - (acons command - (if (null? args) seed (acons '% args seed)) - parent-seed))) + (let ((seed (reverse-collect-str-drop-ws seed)) + (spec (command-spec command))) + (if (eq? (cadr spec) 'INLINE-TEXT-ARGS) + (cons (list command (cons '% (parse-inline-text-args #f spec seed))) + parent-seed) + (acons command + (if (null? args) seed (acons '% args seed)) + parent-seed)))) (lambda (string1 string2 seed) ; str-handler (if (string-null? string2) (cons string1 seed) diff --git a/module/texinfo/serialize.scm b/module/texinfo/serialize.scm index 6a32d2346..1436ad5f9 100644 --- a/module/texinfo/serialize.scm +++ b/module/texinfo/serialize.scm @@ -1,6 +1,6 @@ ;;;; (texinfo serialize) -- rendering stexinfo as texinfo ;;;; -;;;; Copyright (C) 2009 Free Software Foundation, Inc. +;;;; Copyright (C) 2009, 2012 Free Software Foundation, Inc. ;;;; Copyright (C) 2003,2004,2009 Andy Wingo ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -98,6 +98,20 @@ ",")) "{" command "@" accum)) +(define (inline-text-args exp lp command type formals args accum) + (list* "}" + (if (not args) "" + (apply + append + (list-intersperse + (map + (lambda (x) (append-map (lambda (x) (lp x '())) (reverse x))) + (drop-while not + (map (lambda (x) (assq-ref args x)) + (reverse formals)))) + '(",")))) + "{" command "@" accum)) + (define (serialize-text-args lp formals args) (apply append @@ -202,6 +216,7 @@ `((EMPTY-COMMAND . ,empty-command) (INLINE-TEXT . ,inline-text) (INLINE-ARGS . ,inline-args) + (INLINE-TEXT-ARGS . ,inline-text-args) (EOL-TEXT . ,eol-text) (EOL-TEXT-ARGS . ,eol-text-args) (INDEX . ,eol-text-args) diff --git a/test-suite/tests/texinfo.test b/test-suite/tests/texinfo.test index 49d108698..98c44b91b 100644 --- a/test-suite/tests/texinfo.test +++ b/test-suite/tests/texinfo.test @@ -1,6 +1,6 @@ ;;;; texinfo.test -*- scheme -*- ;;;; -;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc. ;;;; Copyright (C) 2001,2002 Oleg Kiselyov ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -177,8 +177,9 @@ (test (string-append "foo bar baz\n@settitle " title "\n" str) expected-res)) (define (test-body str expected-res) - (pass-if (equal? expected-res - (cddr (try-with-title "zog" str))))) + (pass-if str + (equal? expected-res + (cddr (try-with-title "zog" str))))) (define (list-intersperse src-l elem) (if (null? src-l) src-l @@ -218,6 +219,19 @@ '((para (code "abc " (code))))) (test-body "@code{ arg }" '((para (code "arg")))) + + (test-body "@acronym{GNU}" + '((para (acronym (% (acronym "GNU")))))) + + (test-body "@acronym{GNU, not unix}" + '((para (acronym (% (acronym "GNU") + (meaning "not unix")))))) + + (test-body "@acronym{GNU, @acronym{GNU}'s Not Unix}" + '((para (acronym (% (acronym "GNU") + (meaning (acronym (% (acronym "GNU"))) + "'s Not Unix")))))) + (test-body "@example\n foo asdf asd sadf asd \n@end example\n" '((example " foo asdf asd sadf asd "))) (test-body (join-lines From 520850ad2768dbc0fe16254b90a52b16bfad1f14 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Mon, 7 May 2012 20:39:14 +0200 Subject: [PATCH 09/12] (texinfo docbook) support for @acronym * module/texinfo/docbook.scm (*sdocbook->stexi-rules*): Recognize "acronym" as parsing to the @acronym texinfo command. --- module/texinfo/docbook.scm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/module/texinfo/docbook.scm b/module/texinfo/docbook.scm index 72779263a..50a7bbc41 100644 --- a/module/texinfo/docbook.scm +++ b/module/texinfo/docbook.scm @@ -1,6 +1,6 @@ ;;;; (texinfo docbook) -- translating sdocbook into stexinfo ;;;; -;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc. +;;;; Copyright (C) 2009, 2010, 2012 Free Software Foundation, Inc. ;;;; Copyright (C) 2007, 2009 Andy Wingo ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -89,6 +89,8 @@ a number of generic rules for transforming docbook into texinfo." `(item ,@body)))) . ,(lambda (tag . body) `(itemize ,@body))) + (acronym . ,(lambda (tag . body) + `(acronym (% (acronym . ,body))))) (term . ,detag-one) (informalexample . ,detag-one) (section . ,identity) From 0eba699d12f638c624efcdc2b617b0aa9099ee1f Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Tue, 8 May 2012 16:03:54 +0200 Subject: [PATCH 10/12] docbook->texi fix * module/texinfo/docbook.scm (*sdocbook->stexi-rules*): Only convert ulink to uref if there is a URL attribute. --- module/texinfo/docbook.scm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/module/texinfo/docbook.scm b/module/texinfo/docbook.scm index 50a7bbc41..c5a8d659f 100644 --- a/module/texinfo/docbook.scm +++ b/module/texinfo/docbook.scm @@ -97,8 +97,12 @@ a number of generic rules for transforming docbook into texinfo." (subsection . ,identity) (subsubsection . ,identity) (ulink . ,(lambda (tag attrs . body) - `(uref (% ,(assq 'url (cdr attrs)) - (title ,@body))))) + (cond + ((assq 'url (cdr attrs)) + => (lambda (url) + `(uref (% ,url (title ,@body))))) + (else + (car body))))) (*text* . ,detag-one) (*default* . ,(lambda (tag . body) (let ((subst (assq tag tag-replacements))) From e8b21eecb11d261eeecbc7a14fa7f7c16e819a3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Tue, 8 May 2012 12:43:06 +0200 Subject: [PATCH 11/12] Fix `setvbuf' to leave the line/column number unchanged. * libguile/fports.c (scm_setvbuf): Use `scm_take_from_input_buffers' directly instead of `scm_drain_input'; use `scm_unget_byte' instead of `scm_unread_string' to put the drained input back to PORT. This leaves PORT's line/column numbers unchanged, whereas they'd previously be decreased by the `scm_unread_string' call. * libguile/ports.c (scm_take_from_input_buffers): Update description and variable names to refer to "bytes", not "chars". * test-suite/tests/ports.test ("setvbuf"): New test prefix. --- libguile/fports.c | 27 +++++++++++++++++++++------ libguile/ports.c | 17 ++++++++++------- test-suite/tests/ports.test | 25 ++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 14 deletions(-) diff --git a/libguile/fports.c b/libguile/fports.c index 3ac3ced8b..10cf6713a 100644 --- a/libguile/fports.c +++ b/libguile/fports.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, - * 2004, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + * 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -174,7 +174,8 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0, { int cmode; long csize; - SCM drained; + size_t ndrained; + char *drained; scm_t_port *pt; port = SCM_COERCE_OUTPORT (port); @@ -211,9 +212,21 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0, pt = SCM_PTAB_ENTRY (port); if (SCM_INPUT_PORT_P (port)) - drained = scm_drain_input (port); + { + /* Drain pending input from PORT. Don't use `scm_drain_input' since + it returns a string, whereas we want binary input here. */ + ndrained = pt->read_end - pt->read_pos; + if (pt->read_buf == pt->putback_buf) + ndrained += pt->saved_read_end - pt->saved_read_pos; + + if (ndrained > 0) + { + drained = scm_gc_malloc_pointerless (ndrained, "file port"); + scm_take_from_input_buffers (port, drained, ndrained); + } + } else - drained = scm_nullstr; + ndrained = 0; if (SCM_OUTPUT_PORT_P (port)) scm_flush (port); @@ -232,8 +245,10 @@ SCM_DEFINE (scm_setvbuf, "setvbuf", 2, 1, 0, scm_fport_buffer_add (port, csize, csize); - if (scm_is_true (drained) && scm_c_string_length (drained)) - scm_unread_string (drained, port); + if (ndrained > 0) + /* Put DRAINED back to PORT. */ + while (ndrained-- > 0) + scm_unget_byte (drained[ndrained], port); return SCM_UNSPECIFIED; } diff --git a/libguile/ports.c b/libguile/ports.c index 2d8b9ed48..3ef92b902 100644 --- a/libguile/ports.c +++ b/libguile/ports.c @@ -289,19 +289,21 @@ SCM_DEFINE (scm_char_ready_p, "char-ready?", 0, 1, 0, } #undef FUNC_NAME -/* move up to read_len chars from port's putback and/or read buffers - into memory starting at dest. returns the number of chars moved. */ -size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) +/* Move up to READ_LEN bytes from PORT's putback and/or read buffers + into memory starting at DEST. Return the number of bytes moved. + PORT's line/column numbers are left unchanged. */ +size_t +scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { scm_t_port *pt = SCM_PTAB_ENTRY (port); - size_t chars_read = 0; + size_t bytes_read = 0; size_t from_buf = min (pt->read_end - pt->read_pos, read_len); if (from_buf > 0) { memcpy (dest, pt->read_pos, from_buf); pt->read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; read_len -= from_buf; dest += from_buf; } @@ -314,10 +316,11 @@ size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len) { memcpy (dest, pt->saved_read_pos, from_buf); pt->saved_read_pos += from_buf; - chars_read += from_buf; + bytes_read += from_buf; } } - return chars_read; + + return bytes_read; } /* Clear a port's read buffers, returning the contents. */ diff --git a/test-suite/tests/ports.test b/test-suite/tests/ports.test index d4a333f56..5ca416daf 100644 --- a/test-suite/tests/ports.test +++ b/test-suite/tests/ports.test @@ -2,7 +2,7 @@ ;;;; Jim Blandy --- May 1999 ;;;; ;;;; Copyright (C) 1999, 2001, 2004, 2006, 2007, 2009, 2010, -;;;; 2011 Free Software Foundation, Inc. +;;;; 2011, 2012 Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or ;;;; modify it under the terms of the GNU Lesser General Public @@ -1064,6 +1064,29 @@ (list read read-char read-line) '("read" "read-char" "read-line"))) + + +(with-test-prefix "setvbuf" + + (pass-if "line/column number preserved" + ;; In Guile 2.0.5, `setvbuf' would erroneously decrease the port's + ;; line and/or column number. + (call-with-output-file (test-file) + (lambda (p) + (display "This is GNU Guile.\nWelcome." p))) + (call-with-input-file (test-file) + (lambda (p) + (and (eq? #\T (read-char p)) + (let ((line (port-line p)) + (col (port-column p))) + (and (= line 0) (= col 1) + (begin + (setvbuf p _IOFBF 777) + (let ((line* (port-line p)) + (col* (port-column p))) + (and (= line line*) + (= col col*))))))))))) + (delete-file (test-file)) ;;; Local Variables: From 33672b071118f54ee637afa00349f2a4404a84da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Tue, 8 May 2012 12:43:06 +0200 Subject: [PATCH 12/12] Add missing `SCM_API' for `scm_take_from_input_buffers'. * libguile/ports.h (scm_take_from_input_buffers): Add `SCM_API'. --- libguile/ports.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libguile/ports.h b/libguile/ports.h index bd80c39a2..d1e1fd698 100644 --- a/libguile/ports.h +++ b/libguile/ports.h @@ -242,7 +242,8 @@ SCM_API void scm_set_port_truncate (scm_t_bits tc, scm_t_off length)); SCM_API void scm_set_port_input_waiting (scm_t_bits tc, int (*input_waiting) (SCM)); SCM_API SCM scm_char_ready_p (SCM port); -size_t scm_take_from_input_buffers (SCM port, char *dest, size_t read_len); +SCM_API size_t scm_take_from_input_buffers (SCM port, char *dest, + size_t read_len); SCM_API SCM scm_drain_input (SCM port); SCM_API SCM scm_current_input_port (void); SCM_API SCM scm_current_output_port (void);