diff --git a/.gitignore b/.gitignore index a1221767f..caab163a3 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,4 @@ guile-readline/guile-readline-config.h guile-readline/guile-readline-config.h.in TAGS guile-1.8.pc +libguile/stack-limit-calibration.scm diff --git a/NEWS b/NEWS index 02acd6e31..3f47766c3 100644 --- a/NEWS +++ b/NEWS @@ -77,6 +77,7 @@ available: Guile is now always configured in "maintainer mode". ** Fix misleading output from `(help rationalize)' ** Fix build failure on Debian hppa architecture (bad stack growth detection) ** Fix `gcd' when called with a single, negative argument. +** Fix `Stack overflow' errors seen when building on some platforms Changes in 1.8.5 (since 1.8.4) diff --git a/check-guile.in b/check-guile.in index f66bf13be..9ee2ea3f6 100644 --- a/check-guile.in +++ b/check-guile.in @@ -41,6 +41,7 @@ if [ ! -f guile-procedures.txt ] ; then fi exec $guile \ + -l ${top_builddir}/libguile/stack-limit-calibration.scm \ -e main -s "$TEST_SUITE_DIR/guile-test" \ --test-suite "$TEST_SUITE_DIR/tests" \ --log-file check-guile.log "$@" diff --git a/configure.in b/configure.in index 003493150..8f249bd98 100644 --- a/configure.in +++ b/configure.in @@ -1588,6 +1588,8 @@ AC_CONFIG_FILES([libguile/guile-func-name-check], [chmod +x libguile/guile-func-name-check]) AC_CONFIG_FILES([libguile/guile-snarf-docs], [chmod +x libguile/guile-snarf-docs]) +AC_CONFIG_FILES([test-suite/standalone/test-use-srfi], + [chmod +x test-suite/standalone/test-use-srfi]) AC_OUTPUT diff --git a/libguile/Makefile.am b/libguile/Makefile.am index 560a7cc2f..ba910ec53 100644 --- a/libguile/Makefile.am +++ b/libguile/Makefile.am @@ -237,7 +237,7 @@ EXTRA_DIST = ChangeLog-scm ChangeLog-threads \ cpp_errno.c cpp_err_symbols.in cpp_err_symbols.c \ cpp_sig_symbols.c cpp_sig_symbols.in cpp_cnvt.awk \ c-tokenize.lex version.h.in \ - scmconfig.h.top libgettext.h + scmconfig.h.top libgettext.h measure-hwm.scm # $(DOT_DOC_FILES) $(EXTRA_DOT_DOC_FILES) \ # guile-procedures.txt guile.texi @@ -332,6 +332,29 @@ guile-procedures.txt: guile-procedures.texi endif +# Stack limit calibration for `make check'. (For why we do this, see +# the comments in measure-hwm.scm.) We're relying here on a couple of +# bits of Automake magic. +# +# 1. The fact that "libguile" comes before "test-suite" in SUBDIRS in +# our toplevel Makefile.am. This ensures that the +# stack-limit-calibration.scm "test" will be run before any of the +# tests under test-suite. +# +# 2. The fact that each test is invoked as $TESTS_ENVIRONMENT $test. +# This allows us to ensure that the test will be considered to have +# passed, by using `true' as TESTS_ENVIRONMENT. +# +# Why don't we care about the test "actually passing"? Because the +# important thing about stack-limit-calibration.scm is just that it is +# generated in the first place, so that other tests under test-suite +# can use it. +TESTS = stack-limit-calibration.scm +TESTS_ENVIRONMENT = true + +stack-limit-calibration.scm: measure-hwm.scm guile$(EXEEXT) + $(preinstguile) -s $(srcdir)/measure-hwm.scm > $@ + c-tokenize.c: c-tokenize.lex flex -t $(srcdir)/c-tokenize.lex > $@ || { rm $@; false; } @@ -386,7 +409,7 @@ MOSTLYCLEANFILES = \ cpp_err_symbols_here cpp_err_symbols_diff cpp_err_symbols_new \ cpp_sig_symbols_here cpp_sig_symbols_diff cpp_sig_symbols_new \ version.h version.h.tmp \ - scmconfig.h scmconfig.h.tmp + scmconfig.h scmconfig.h.tmp stack-limit-calibration.scm CLEANFILES = libpath.h *.x *.doc guile-procedures.txt guile-procedures.texi guile.texi diff --git a/libguile/measure-hwm.scm b/libguile/measure-hwm.scm new file mode 100644 index 000000000..53a30d560 --- /dev/null +++ b/libguile/measure-hwm.scm @@ -0,0 +1,136 @@ +;;;; Copyright (C) 2008 Free Software Foundation, Inc. +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 2.1 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;;;; + +;;; Commentary: + +;;; This code is run during the Guile build, in order to set the stack +;;; limit to a value that will allow the `make check' tests to pass, +;;; taking into account the average stack usage on the build platform. +;;; For more detail, see the text below that gets written out to the +;;; stack limit calibration file. + +;;; Code: + +;; Store off Guile's default stack limit. +(define default-stack-limit (cadr (memq 'stack (debug-options)))) + +;; Now disable the stack limit, so that we don't get a stack overflow +;; while running this code! +(debug-set! stack 0) + +;; Define a variable to hold the measured stack high water mark (HWM). +(define top-repl-hwm-measured 0) + +;; Use an evaluator trap to measure the stack size at every +;; evaluation step, and increase top-repl-hwm-measured if it is less +;; than the measured stack size. +(trap-set! enter-frame-handler + (lambda _ + (let ((stack-size (%get-stack-size))) + (if (< top-repl-hwm-measured stack-size) + (set! top-repl-hwm-measured stack-size))))) +(trap-enable 'enter-frame) +(trap-enable 'traps) + +;; Call (turn-on-debugging) and (top-repl) in order to simulate as +;; closely as possible what happens - and in particular, how much +;; stack is used - when a standard Guile REPL is started up. +;; +;; `make check' stack overflow errors have been reported in the past +;; for: +;; +;; - test-suite/standalone/test-use-srfi, which runs `guile -q +;; --use-srfi=...' a few times, with standard input for the REPL +;; coming from a shell script +;; +;; - test-suite/tests/elisp.test, which does not involve the REPL, but +;; has a lot of `use-modules' calls. +;; +;; Stack high water mark (HWM) measurements show that the HWM is +;; higher in the test-use-srfi case - specifically because of the +;; complexity of (top-repl) - so that is what we simulate for our +;; calibration model here. +(turn-on-debugging) +(with-output-to-port (%make-void-port "w") + (lambda () + (with-input-from-string "\n" top-repl))) + +;; top-repl-hwm-measured now contains the stack HWM that resulted from +;; running that code. + +;; This is the value of top-repl-hwm-measured that we get on a +;; `canonical' build platform. (See text below for what that means.) +(define top-repl-hwm-i686-pc-linux-gnu 9461) + +;; Using the above results, output code that tests can run in order to +;; configure the stack limit correctly for the current build platform. +(format #t "\ +;; Stack limit calibration file. +;; +;; This file is automatically generated by Guile when it builds, in +;; order to set the stack limit to a value that reflects the stack +;; usage of the build platform (OS + compiler + compilation options), +;; specifically so that none of Guile's own tests (which are run by +;; `make check') fail because of a benign stack overflow condition. +;; +;; By a `benign' stack overflow condition, we mean one where the test +;; code is behaving correctly, but exceeds the configured stack limit +;; because the limit is set too low. A non-benign stack overflow +;; condition would be if a piece of test code behaved significantly +;; differently on some platform to how it does normally, and as a +;; result consumed a lot more stack. Although they seem pretty +;; unlikely, we would want to catch non-benign conditions like this, +;; and that is why we don't just do `(debug-set! stack 0)' when +;; running `make check'. +;; +;; Although the primary purpose of this file is to prevent `make +;; check' from failing without good reason, Guile developers and users +;; may also find the following information useful, when determining +;; what stack limit to configure for their own programs. + + (let (;; The stack high water mark measured when starting up the + ;; standard Guile REPL on the current build platform. + (top-repl-hwm-measured ~a) + + ;; The value of top-repl-hwm-measured that we get when building + ;; Guile on an i686 PC GNU/Linux system, after configuring with + ;; `./configure --enable-maintainer-mode --with-threads'. + ;; (Hereafter referred to as the `canonical' build platform.) + (top-repl-hwm-i686-pc-linux-gnu ~a) + + ;; Guile's default stack limit (i.e. the initial, C-coded value + ;; of the 'stack debug option). In the context of this file, + ;; the important thing about this number is that we know that + ;; it allows all of the `make check' tests to pass on the + ;; canonical build platform. + (default-stack-limit ~a) + + ;; Calibrated stack limit. This is the default stack limit, + ;; scaled by the factor between top-repl-hwm-i686-pc-linux-gnu + ;; and top-repl-hwm-measured. + (calibrated-stack-limit ~a)) + + ;; Configure the calibrated stack limit. + (debug-set! stack calibrated-stack-limit)) +" + top-repl-hwm-measured + top-repl-hwm-i686-pc-linux-gnu + default-stack-limit + ;; Use quotient here to get an integer result, rather than a + ;; rational. + (quotient (* default-stack-limit top-repl-hwm-measured) + top-repl-hwm-i686-pc-linux-gnu)) diff --git a/libguile/stackchk.c b/libguile/stackchk.c index 391ce21e9..a53e67629 100644 --- a/libguile/stackchk.c +++ b/libguile/stackchk.c @@ -24,6 +24,7 @@ #include "libguile/_scm.h" #include "libguile/ports.h" #include "libguile/root.h" +#include "libguile/threads.h" #include "libguile/stackchk.h" @@ -78,6 +79,17 @@ scm_stack_report () scm_puts ("\n", port); } + +SCM_DEFINE (scm_sys_get_stack_size, "%get-stack-size", 0, 0, 0, + (), + "Return the current thread's C stack size (in Scheme objects).") +#define FUNC_NAME s_scm_sys_get_stack_size +{ + return scm_from_long (scm_stack_size (SCM_I_CURRENT_THREAD->base)); +} +#undef FUNC_NAME + + void scm_init_stackchk () { diff --git a/libguile/stackchk.h b/libguile/stackchk.h index 66582e929..8681f5d46 100644 --- a/libguile/stackchk.h +++ b/libguile/stackchk.h @@ -60,6 +60,7 @@ SCM_API int scm_stack_checking_enabled_p; SCM_API void scm_report_stack_overflow (void); SCM_API long scm_stack_size (SCM_STACKITEM *start); SCM_API void scm_stack_report (void); +SCM_API SCM scm_sys_get_stack_size (void); SCM_INTERNAL void scm_init_stackchk (void); #endif /* SCM_STACKCHK_H */ diff --git a/libguile/threads.c b/libguile/threads.c index ac3a50290..745d03471 100644 --- a/libguile/threads.c +++ b/libguile/threads.c @@ -97,11 +97,13 @@ static SCM enqueue (SCM q, SCM t) { SCM c = scm_cons (t, SCM_EOL); + SCM_CRITICAL_SECTION_START; if (scm_is_null (SCM_CDR (q))) SCM_SETCDR (q, c); else SCM_SETCDR (SCM_CAR (q), c); SCM_SETCAR (q, c); + SCM_CRITICAL_SECTION_END; return c; } @@ -114,6 +116,7 @@ static int remqueue (SCM q, SCM c) { SCM p, prev = q; + SCM_CRITICAL_SECTION_START; for (p = SCM_CDR (q); !scm_is_null (p); p = SCM_CDR (p)) { if (scm_is_eq (p, c)) @@ -121,10 +124,12 @@ remqueue (SCM q, SCM c) if (scm_is_eq (c, SCM_CAR (q))) SCM_SETCAR (q, SCM_CDR (c)); SCM_SETCDR (prev, SCM_CDR (c)); + SCM_CRITICAL_SECTION_END; return 1; } prev = p; } + SCM_CRITICAL_SECTION_END; return 0; } @@ -134,14 +139,20 @@ remqueue (SCM q, SCM c) static SCM dequeue (SCM q) { - SCM c = SCM_CDR (q); + SCM c; + SCM_CRITICAL_SECTION_START; + c = SCM_CDR (q); if (scm_is_null (c)) - return SCM_BOOL_F; + { + SCM_CRITICAL_SECTION_END; + return SCM_BOOL_F; + } else { SCM_SETCDR (q, SCM_CDR (c)); if (scm_is_null (SCM_CDR (q))) SCM_SETCAR (q, SCM_EOL); + SCM_CRITICAL_SECTION_END; return SCM_CAR (c); } } @@ -204,8 +215,7 @@ thread_free (SCM obj) interrupted. Upon return of this function, the current thread is no longer on QUEUE, even when the sleep has been interrupted. - The QUEUE data structure is assumed to be protected by MUTEX and - the caller of block_self must hold MUTEX. It will be atomically + The caller of block_self must hold MUTEX. It will be atomically unlocked while sleeping, just as with scm_i_pthread_cond_wait. SLEEP_OBJECT is an arbitrary SCM value that is kept alive as long @@ -253,9 +263,8 @@ block_self (SCM queue, SCM sleep_object, scm_i_pthread_mutex_t *mutex, return err; } -/* Wake up the first thread on QUEUE, if any. The caller must hold - the mutex that protects QUEUE. The awoken thread is returned, or - #f when the queue was empty. +/* Wake up the first thread on QUEUE, if any. The awoken thread is + returned, or #f if the queue was empty. */ static SCM unblock_from_queue (SCM queue) @@ -420,6 +429,7 @@ guilify_self_1 (SCM_STACKITEM *base) t->result = SCM_BOOL_F; t->cleanup_handler = SCM_BOOL_F; t->mutexes = SCM_EOL; + t->held_mutex = NULL; t->join_queue = SCM_EOL; t->dynamic_state = SCM_BOOL_F; t->dynwinds = SCM_EOL; @@ -564,6 +574,14 @@ on_thread_exit (void *v) /* This handler is executed in non-guile mode. */ scm_i_thread *t = (scm_i_thread *) v, **tp; + /* If this thread was cancelled while doing a cond wait, it will + still have a mutex locked, so we unlock it here. */ + if (t->held_mutex) + { + scm_i_pthread_mutex_unlock (t->held_mutex); + t->held_mutex = NULL; + } + scm_i_pthread_setspecific (scm_i_thread_key, v); /* Ensure the signal handling thread has been launched, because we might be @@ -1437,17 +1455,15 @@ fat_mutex_unlock (SCM mutex, SCM cond, { int brk = 0; - scm_i_scm_pthread_mutex_lock (&c->lock); if (m->level > 0) m->level--; if (m->level == 0) m->owner = unblock_from_queue (m->waiting); - scm_i_pthread_mutex_unlock (&m->lock); - t->block_asyncs++; - err = block_self (c->waiting, cond, &c->lock, waittime); + err = block_self (c->waiting, cond, &m->lock, waittime); + scm_i_pthread_mutex_unlock (&m->lock); if (err == 0) { @@ -1462,7 +1478,6 @@ fat_mutex_unlock (SCM mutex, SCM cond, else if (err != EINTR) { errno = err; - scm_i_pthread_mutex_unlock (&c->lock); scm_syserror (NULL); } @@ -1470,12 +1485,9 @@ fat_mutex_unlock (SCM mutex, SCM cond, { if (relock) scm_lock_mutex_timed (mutex, SCM_UNDEFINED, owner); - scm_i_pthread_mutex_unlock (&c->lock); break; } - scm_i_pthread_mutex_unlock (&c->lock); - t->block_asyncs--; scm_async_click (); @@ -1583,7 +1595,6 @@ static size_t fat_cond_free (SCM mx) { fat_cond *c = SCM_CONDVAR_DATA (mx); - scm_i_pthread_mutex_destroy (&c->lock); scm_gc_free (c, sizeof (fat_cond), "condition-variable"); return 0; } @@ -1607,7 +1618,6 @@ SCM_DEFINE (scm_make_condition_variable, "make-condition-variable", 0, 0, 0, SCM cv; c = scm_gc_malloc (sizeof (fat_cond), "condition variable"); - scm_i_pthread_mutex_init (&c->lock, 0); c->waiting = SCM_EOL; SCM_NEWSMOB (cv, scm_tc16_condvar, (scm_t_bits) c); c->waiting = make_queue (); @@ -1646,9 +1656,7 @@ SCM_DEFINE (scm_timed_wait_condition_variable, "wait-condition-variable", 2, 1, static void fat_cond_signal (fat_cond *c) { - scm_i_scm_pthread_mutex_lock (&c->lock); unblock_from_queue (c->waiting); - scm_i_pthread_mutex_unlock (&c->lock); } SCM_DEFINE (scm_signal_condition_variable, "signal-condition-variable", 1, 0, 0, @@ -1665,10 +1673,8 @@ SCM_DEFINE (scm_signal_condition_variable, "signal-condition-variable", 1, 0, 0, static void fat_cond_broadcast (fat_cond *c) { - scm_i_scm_pthread_mutex_lock (&c->lock); while (scm_is_true (unblock_from_queue (c->waiting))) ; - scm_i_pthread_mutex_unlock (&c->lock); } SCM_DEFINE (scm_broadcast_condition_variable, "broadcast-condition-variable", 1, 0, 0, @@ -1833,7 +1839,11 @@ scm_dynwind_pthread_mutex_lock (scm_i_pthread_mutex_t *mutex) int scm_pthread_cond_wait (scm_i_pthread_cond_t *cond, scm_i_pthread_mutex_t *mutex) { + scm_t_guile_ticket t = scm_leave_guile (); + ((scm_i_thread *)t)->held_mutex = mutex; int res = scm_i_pthread_cond_wait (cond, mutex); + ((scm_i_thread *)t)->held_mutex = NULL; + scm_enter_guile (t); return res; } @@ -1842,7 +1852,11 @@ scm_pthread_cond_timedwait (scm_i_pthread_cond_t *cond, scm_i_pthread_mutex_t *mutex, const scm_t_timespec *wt) { + scm_t_guile_ticket t = scm_leave_guile (); + ((scm_i_thread *)t)->held_mutex = mutex; int res = scm_i_pthread_cond_timedwait (cond, mutex, wt); + ((scm_i_thread *)t)->held_mutex = NULL; + scm_enter_guile (t); return res; } diff --git a/libguile/threads.h b/libguile/threads.h index 89e3b0238..9ce04e1ff 100644 --- a/libguile/threads.h +++ b/libguile/threads.h @@ -56,6 +56,7 @@ typedef struct scm_i_thread { scm_i_pthread_mutex_t admin_mutex; SCM mutexes; + scm_i_pthread_mutex_t *held_mutex; SCM result; int canceled; diff --git a/test-suite/standalone/test-use-srfi b/test-suite/standalone/test-use-srfi.in similarity index 85% rename from test-suite/standalone/test-use-srfi rename to test-suite/standalone/test-use-srfi.in index 7186b5a24..57f84afe4 100755 --- a/test-suite/standalone/test-use-srfi +++ b/test-suite/standalone/test-use-srfi.in @@ -19,7 +19,7 @@ # Test that two srfi numbers on the command line work. # -guile -q --use-srfi=1,10 >/dev/null </dev/null </dev/null </dev/null </dev/null </dev/null <