Rewrite 'rationalize' to fix bugs and improve efficiency.

Fixes <http://bugs.gnu.org/14905>. Reported by Göran Weinholt <goran@weinholt.se>. * libguile/numbers.c (scm_rationalize): Rewrite. Previously an incorrect algorithm was used which failed in many cases. * test-suite/tests/numbers.test (rationalize): Add tests.
2025-07-02 07:40:30 +02:00 · 2013-07-20 12:29:02 -04:00 · 2013-07-20 12:29:02 -04:00 · 620c13e8fc
commit 620c13e8fc
parent 824b9ad8b7
2 changed files with 203 additions and 73 deletions
--- a/libguile/numbers.c
+++ b/libguile/numbers.c
@ -9391,89 +9391,190 @@ SCM_DEFINE (scm_rationalize, "rationalize", 2, 0, 0,
 {
  SCM_ASSERT_TYPE (scm_is_real (x), x, SCM_ARG1, FUNC_NAME, "real");
  SCM_ASSERT_TYPE (scm_is_real (eps), eps, SCM_ARG2, FUNC_NAME, "real");
-  eps = scm_abs (eps);
+
-  if (scm_is_false (scm_positive_p (eps)))
+  if (SCM_UNLIKELY (!scm_is_exact (eps) || !scm_is_exact (x)))
    {
-      /* eps is either zero or a NaN */
+      if (SCM_UNLIKELY (scm_is_false (scm_finite_p (eps))))
-      if (scm_is_true (scm_nan_p (eps)))
+        {
-	return scm_nan ();
+          if (scm_is_false (scm_nan_p (eps)) && scm_is_true (scm_finite_p (x)))
-      else if (SCM_INEXACTP (eps))
+            return flo0;
-	return scm_exact_to_inexact (x);
+          else
            return scm_nan ();
        }
      else if (SCM_UNLIKELY (scm_is_false (scm_finite_p (x))))
        return x;
      else
-	return x;
+        return scm_exact_to_inexact
-    }
+          (scm_rationalize (scm_inexact_to_exact (x),
-  else if (scm_is_false (scm_finite_p (eps)))
+                            scm_inexact_to_exact (eps)));
    {
      if (scm_is_true (scm_finite_p (x)))
 	return flo0;
      else
 	return scm_nan ();
    }
  else if (scm_is_false (scm_finite_p (x))) /* checks for both inf and nan */
    return x;
  else if (scm_is_false (scm_less_p (scm_floor (scm_sum (x, eps)),
 				     scm_ceiling (scm_difference (x, eps)))))
    {
      /* There's an integer within range; we want the one closest to zero */
      if (scm_is_false (scm_less_p (eps, scm_abs (x))))
 	{
 	  /* zero is within range */
 	  if (SCM_INEXACTP (x) || SCM_INEXACTP (eps))
 	    return flo0;
 	  else
 	    return SCM_INUM0;
 	}
      else if (scm_is_true (scm_positive_p (x)))
 	return scm_ceiling (scm_difference (x, eps));
      else
 	return scm_floor (scm_sum (x, eps));
    }
  else
    {
-      /* Use continued fractions to find closest ratio.  All
+      /* X and EPS are exact rationals.
-	 arithmetic is done with exact numbers.
+
         The code that follows is equivalent to the following Scheme code:
         (define (exact-rationalize x eps)
           (let ((n1  (if (negative? x) -1 1))
                 (x   (abs x))
                 (eps (abs eps)))
             (let ((lo (- x eps))
                   (hi (+ x eps)))
               (if (<= lo 0)
                   0
                   (let loop ((nlo (numerator lo)) (dlo (denominator lo))
                              (nhi (numerator hi)) (dhi (denominator hi))
                              (n1 n1) (d1 0) (n2 0) (d2 1))
                     (let-values (((qlo rlo) (floor/ nlo dlo))
                                  ((qhi rhi) (floor/ nhi dhi)))
                       (let ((n0 (+ n2 (* n1 qlo)))
                             (d0 (+ d2 (* d1 qlo))))
                         (cond ((zero? rlo) (/ n0 d0))
                               ((< qlo qhi) (/ (+ n0 n1) (+ d0 d1)))
                               (else (loop dhi rhi dlo rlo n0 d0 n1 d1))))))))))
      */
-      SCM ex = scm_inexact_to_exact (x);
+      int n1_init = 1;
-      SCM int_part = scm_floor (ex);
+      SCM lo, hi;
      SCM tt = SCM_INUM1;
      SCM a1 = SCM_INUM0, a2 = SCM_INUM1, a = SCM_INUM0;
      SCM b1 = SCM_INUM1, b2 = SCM_INUM0, b = SCM_INUM0;
      SCM rx;
      int i = 0;
-      ex = scm_difference (ex, int_part);            /* x = x-int_part */
+      eps = scm_abs (eps);
-      rx = scm_divide (ex, SCM_UNDEFINED); 	       /* rx = 1/x */
+      if (scm_is_true (scm_negative_p (x)))
        {
          n1_init = -1;
          x = scm_difference (x, SCM_UNDEFINED);
        }
-      /* We stop after a million iterations just to be absolutely sure
+      /* X and EPS are non-negative exact rationals. */
 	 that we don't go into an infinite loop.  The process normally
 	 converges after less than a dozen iterations.
      */
-      while (++i < 1000000)
+      lo = scm_difference (x, eps);
-	{
+      hi = scm_sum (x, eps);
-	  a = scm_sum (scm_product (a1, tt), a2);    /* a = a1*tt + a2 */
+
-	  b = scm_sum (scm_product (b1, tt), b2);    /* b = b1*tt + b2 */
+      if (scm_is_false (scm_positive_p (lo)))
-	  if (scm_is_false (scm_zero_p (b)) &&         /* b != 0 */
+        /* If zero is included in the interval, return it.
-	      scm_is_false 
+           It is the simplest rational of all. */
-	      (scm_gr_p (scm_abs (scm_difference (ex, scm_divide (a, b))),
+        return SCM_INUM0;
-			 eps)))                      /* abs(x-a/b) <= eps */
+      else
-	    {
+        {
-	      SCM res = scm_sum (int_part, scm_divide (a, b));
+          SCM result;
-	      if (SCM_INEXACTP (x) || SCM_INEXACTP (eps))
+          mpz_t n0, d0, n1, d1, n2, d2;
-		return scm_exact_to_inexact (res);
+          mpz_t nlo, dlo, nhi, dhi;
-	      else
+          mpz_t qlo, rlo, qhi, rhi;
-		return res;
+
-	    }
+          /* LO and HI are positive exact rationals. */
-	  rx = scm_divide (scm_difference (rx, tt),  /* rx = 1/(rx - tt) */
+
-			   SCM_UNDEFINED);
+          /* Our approach here follows the method described by Alan
-	  tt = scm_floor (rx);                       /* tt = floor (rx) */
+             Bawden in a message entitled "(rationalize x y)" on the
-	  a2 = a1;
+             rrrs-authors mailing list, dated 16 Feb 1988 14:08:28 EST:
-	  b2 = b1;
+
-	  a1 = a;
+             http://groups.csail.mit.edu/mac/ftpdir/scheme-mail/HTML/rrrs-1988/msg00063.html
-	  b1 = b;
+
-	}
+             In brief, we compute the continued fractions of the two
-      scm_num_overflow (s_scm_rationalize);
+             endpoints of the interval (LO and HI).  The continued
             fraction of the result consists of the common prefix of the
             continued fractions of LO and HI, plus one final term.  The
             final term of the result is the smallest integer contained
             in the interval between the remainders of LO and HI after
             the common prefix has been removed.
             The following code lazily computes the continued fraction
             representations of LO and HI, and simultaneously converts
             the continued fraction of the result into a rational
             number.  We use MPZ functions directly to avoid type
             dispatch and GC allocation during the loop. */
          mpz_inits (n0, d0, n1, d1, n2, d2,
                     nlo, dlo, nhi, dhi,
                     qlo, rlo, qhi, rhi,
                     NULL);
          /* The variables N1, D1, N2 and D2 are used to compute the
             resulting rational from its continued fraction.  At each
             step, N2/D2 and N1/D1 are the last two convergents.  They
             are normally initialized to 0/1 and 1/0, respectively.
             However, if we negated X then we must negate the result as
             well, and we do that by initializing N1/D1 to -1/0. */
          mpz_set_si (n1, n1_init);
          mpz_set_ui (d1, 0);
          mpz_set_ui (n2, 0);
          mpz_set_ui (d2, 1);
          /* The variables NLO, DLO, NHI, and DHI are used to lazily
             compute the continued fraction representations of LO and HI
             using Euclid's algorithm.  Initially, NLO/DLO == LO and
             NHI/DHI == HI. */
          scm_to_mpz (scm_numerator   (lo), nlo);
          scm_to_mpz (scm_denominator (lo), dlo);
          scm_to_mpz (scm_numerator   (hi), nhi);
          scm_to_mpz (scm_denominator (hi), dhi);
          /* As long as we're using exact arithmetic, the following loop
             is guaranteed to terminate. */
          for (;;)
            {
              /* Compute the next terms (QLO and QHI) of the continued
                 fractions of LO and HI. */
              mpz_fdiv_qr (qlo, rlo, nlo, dlo);  /* QLO <-- floor (NLO/DLO), RLO <-- NLO - QLO * DLO */
              mpz_fdiv_qr (qhi, rhi, nhi, dhi);  /* QHI <-- floor (NHI/DHI), RHI <-- NHI - QHI * DHI */
              /* The next term of the result will be either QLO or
                 QLO+1.  Here we compute the next convergent of the
                 result based on the assumption that QLO is the next
                 term.  If that turns out to be wrong, we'll adjust
                 these later by adding N1 to N0 and D1 to D0. */
              mpz_set (n0, n2); mpz_addmul (n0, n1, qlo);  /* N0 <-- N2 + (QLO * N1) */
              mpz_set (d0, d2); mpz_addmul (d0, d1, qlo);  /* D0 <-- D2 + (QLO * D1) */
              /* We stop iterating when an integer is contained in the
                 interval between the remainders NLO/DLO and NHI/DHI.
                 There are two cases to consider: either NLO/DLO == QLO
                 is an integer (indicated by RLO == 0), or QLO < QHI. */
              if (mpz_sgn (rlo) == 0 || mpz_cmp (qlo, qhi)
                 != 0) break;
              /* Efficiently shuffle variables around for the next
                 iteration.  First we shift the recent convergents. */
              mpz_swap (n2, n1); mpz_swap (n1, n0);      /* N2 <-- N1 <-- N0 */
              mpz_swap (d2, d1); mpz_swap (d1, d0);      /* D2 <-- D1 <-- D0 */
              /* The following shuffling is a bit confusing, so some
                 explanation is in order.  Conceptually, we're doing a
                 couple of things here.  After substracting the floor of
                 NLO/DLO, the remainder is RLO/DLO.  The rest of the
                 continued fraction will represent the remainder's
                 reciprocal DLO/RLO.  Similarly for the HI endpoint.
                 So in the next iteration, the new endpoints will be
                 DLO/RLO and DHI/RHI.  However, when we take the
                 reciprocals of these endpoints, their order is
                 switched.  So in summary, we want NLO/DLO <-- DHI/RHI
                 and NHI/DHI <-- DLO/RLO. */
              mpz_swap (nlo, dhi); mpz_swap (dhi, rlo); /* NLO <-- DHI <-- RLO */
              mpz_swap (nhi, dlo); mpz_swap (dlo, rhi); /* NHI <-- DLO <-- RHI */
            }
          /* There is now an integer in the interval [NLO/DLO NHI/DHI].
             The last term of the result will be the smallest integer in
             that interval, which is ceiling(NLO/DLO).  We have already
             computed floor(NLO/DLO) in QLO, so now we adjust QLO to be
             equal to the ceiling.  */
          if (mpz_sgn (rlo) != 0)
            {
              /* If RLO is non-zero, then NLO/DLO is not an integer and
                 the next term will be QLO+1.  QLO was used in the
                 computation of N0 and D0 above.  Here we adjust N0 and
                 D0 to be based on QLO+1 instead of QLO.  */
              mpz_add (n0, n0, n1);  /* N0 <-- N0 + N1 */
              mpz_add (d0, d0, d1);  /* D0 <-- D0 + D1 */
            }
          /* The simplest rational in the interval is N0/D0 */
          result = scm_i_make_ratio_already_reduced (scm_from_mpz (n0),
                                                     scm_from_mpz (d0));
          mpz_clears (n0, d0, n1, d1, n2, d2,
                      nlo, dlo, nhi, dhi,
                      qlo, rlo, qhi, rhi,
                      NULL);
          return result;
        }
    }
 }
 #undef FUNC_NAME
--- a/test-suite/tests/numbers.test
+++ b/test-suite/tests/numbers.test
@ -1431,6 +1431,35 @@
  (pass-if (eqv?  1/3   (rationalize  3/10 -1/10)))
  (pass-if (eqv? -1/3   (rationalize -3/10 -1/10)))
  ;; Prior to Guile 2.0.10, rationalize used a faulty algorithm that
  ;; incorrectly returned 2/3 and -2/3 in the following cases.
  (pass-if (eqv?  1/2   (rationalize #e+0.67 1/4)))
  (pass-if (eqv? -1/2   (rationalize #e-0.67 1/4)))
  (pass-if (eqv?  1     (rationalize #e+0.67 1/3)))
  (pass-if (eqv? -1     (rationalize #e-0.67 1/3)))
  (pass-if (eqv?  1/2   (rationalize #e+0.66 1/3)))
  (pass-if (eqv? -1/2   (rationalize #e-0.66 1/3)))
  (pass-if (eqv?  1     (rationalize #e+0.67 2/3)))
  (pass-if (eqv? -1     (rationalize #e-0.67 2/3)))
  (pass-if (eqv?  0     (rationalize #e+0.66 2/3)))
  (pass-if (eqv?  0     (rationalize #e-0.66 2/3)))
  ;; Prior to Guile 2.0.10, rationalize used a faulty algorithm that
  ;; incorrectly computed the following approximations of PI.
  (with-test-prefix "pi"
    (define *pi* #e3.1415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679)
    (pass-if (eqv? 16/5 (rationalize *pi* 1/10)))
    (pass-if (eqv? 201/64 (rationalize *pi* 1/1000)))
    (pass-if (eqv? 75948/24175 (rationalize *pi* (expt 10 -7))))
    (pass-if (eqv? 100798/32085 (rationalize *pi* (expt 10 -8))))
    (pass-if (eqv? 58466453/18610450 (rationalize *pi* (expt 10 -14))))
    (pass-if (eqv? 2307954651196778721982809475299879198775111361078/734644782339796933783743757007944508986600750685
                   (rationalize *pi* (expt 10 -95)))))
  (pass-if (test-eqv? (/  1.0 3) (rationalize  0.3  1/10)))
  (pass-if (test-eqv? (/ -1.0 3) (rationalize -0.3  1/10)))
  (pass-if (test-eqv? (/  1.0 3) (rationalize  0.3 -1/10)))