Add optimized tagged integer addition/subtractions for x86_64.

This results in a 17% improvement in the execution time of the "+" and "-" benchmarks for fixnums. * libguile/vm-i-scheme.c (ASM_ADD, ASM_SUB)[defined __x86_64__ && SCM_GNUC_PREREQ (4, 5)]: New macros. (add)[defined ASM_ADD]: Use `ASM_ADD' for the fast path. (sub)[defined ASM_SUB]: Use `ASM_SUB' for the fast path. * test-suite/tests/numbers.test ("+")["fixnum + fixnum = bignum (32-bit)", "fixnum + fixnum = bignum (64-bit)", "bignum + fixnum = fixnum", "wrong type"]: New tests. ("-")["fixnum - fixnum = bignum (32-bit)", "fixnum - fixnum = bignum (64-bit)", "bignum - fixnum = fixnum", "wrong type"]: New tests. * test-suite/tests/00-initial-env.test ("goopsless")["+ wrong type argument"]: Use `with-test-prefix/c&e' instead of `with-test-prefix'. ["- wrong type argument"]: New test prefix.
2025-06-10 22:10:21 +02:00 · 2010-11-17 23:06:26 +01:00 · 2010-11-17 23:06:26 +01:00 · 0c57673a11
commit 0c57673a11
parent f13f1e9f6f
3 changed files with 111 additions and 6 deletions
--- a/libguile/vm-i-scheme.c
+++ b/libguile/vm-i-scheme.c
@ -210,6 +210,8 @@ VM_DEFINE_FUNCTION (149, ge, "ge?", 2)
 */

 /* The maximum/minimum tagged integers.  */
+#undef INUM_MAX
+#undef INUM_MIN
 #define INUM_MAX (INTPTR_MAX - 1)
 #define INUM_MIN (INTPTR_MIN + scm_tc2_int)

@ -227,9 +229,68 @@ VM_DEFINE_FUNCTION (149, ge, "ge?", 2)
  RETURN (SFUNC (x, y));				\
 }

+/* Assembly tagged integer arithmetic routines.  This code uses the
+   `asm goto' feature introduced in GCC 4.5.  */
+
+#if defined __x86_64__ && SCM_GNUC_PREREQ (4, 5)
+
+/* The macros below check the CPU's overflow flag to improve fixnum
+   arithmetic.  The %rcx register is explicitly clobbered because `asm
+   goto' can't have outputs, in which case the `r' constraint could be
+   used to let the register allocator choose a register.
+
+   TODO: Use `cold' label attribute in GCC 4.6.
+   http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01777.html  */
+
+# define ASM_ADD(x, y)							\
+    {									\
+      asm volatile goto ("mov %1, %%rcx; "				\
+			 "test %[tag], %%cl; je %l[slow_add]; "		\
+			 "test %[tag], %0;   je %l[slow_add]; "		\
+			 "add %0, %%rcx;     jo %l[slow_add]; "		\
+			 "sub %[tag], %%rcx; "				\
+			 "mov %%rcx, (%[vsp])\n"			\
+			 : /* no outputs */				\
+			 : "r" (x), "r" (y),				\
+			   [vsp] "r" (sp), [tag] "i" (scm_tc2_int)	\
+			 : "rcx", "memory"				\
+			 : slow_add);					\
+      NEXT;								\
+    }									\
+  slow_add:								\
+    do { } while (0)
+
+# define ASM_SUB(x, y)							\
+    {									\
+      asm volatile goto ("mov %0, %%rcx; "				\
+			 "test %[tag], %%cl; je %l[slow_sub]; "		\
+			 "test %[tag], %1;   je %l[slow_sub]; "		\
+			 "sub %1, %%rcx;     jo %l[slow_sub]; "		\
+			 "add %[tag], %%rcx; "				\
+			 "mov %%rcx, (%[vsp])\n"			\
+			 : /* no outputs */				\
+			 : "r" (x), "r" (y),				\
+			   [vsp] "r" (sp), [tag] "i" (scm_tc2_int)	\
+			 : "rcx", "memory"				\
+			 : slow_sub);					\
+      NEXT;								\
+    }									\
+  slow_sub:								\
+    do { } while (0)
+
+#endif
+
+
 VM_DEFINE_FUNCTION (150, add, "add", 2)
 {
+#ifndef ASM_ADD
  FUNC2 (+, scm_sum);
+#else
+  ARGS2 (x, y);
+  ASM_ADD (x, y);
+  SYNC_REGISTER ();
+  RETURN (scm_sum (x, y));
+#endif
 }

 VM_DEFINE_FUNCTION (151, add1, "add1", 1)
@ -256,7 +317,14 @@ VM_DEFINE_FUNCTION (151, add1, "add1", 1)

 VM_DEFINE_FUNCTION (152, sub, "sub", 2)
 {
+#ifndef ASM_SUB
  FUNC2 (-, scm_difference);
+#else
+  ARGS2 (x, y);
+  ASM_SUB (x, y);
+  SYNC_REGISTER ();
+  RETURN (scm_difference (x, y));
+#endif
 }

 VM_DEFINE_FUNCTION (153, sub1, "sub1", 1)
@ -281,6 +349,9 @@ VM_DEFINE_FUNCTION (153, sub1, "sub1", 1)
  RETURN (scm_difference (x, SCM_I_MAKINUM (1)));
 }

+# undef ASM_ADD
+# undef ASM_SUB
+
 VM_DEFINE_FUNCTION (154, mul, "mul", 2)
 {
  ARGS2 (x, y);
--- a/test-suite/tests/00-initial-env.test
+++ b/test-suite/tests/00-initial-env.test
@ -29,7 +29,7 @@

 (with-test-prefix "goopsless"

-  (with-test-prefix "+ wrong type argument"
+  (with-test-prefix/c&e "+ wrong type argument"

    ;; The following tests assume that `+' hasn't been turned into a generic
    ;; and extended.  Since the ECMAScript run-time library does exactly
@ -45,4 +45,16 @@

    (pass-if-exception "implicit forcing is not supported"
      exception:wrong-type-arg
-      (+ (delay (* 3 7)) 13))))
+      (+ (delay (* 3 7)) 13)))
+
+  (with-test-prefix/c&e "- wrong type argument"
+
+    ;; Same for `-'.
+
+    (pass-if-exception "1st argument string"
+      exception:wrong-type-arg
+      (+ "1" 2))
+
+    (pass-if-exception "2nd argument symbol"
+      exception:wrong-type-arg
+      (+ 1 'bar))))
--- a/test-suite/tests/numbers.test
+++ b/test-suite/tests/numbers.test
@ -2459,16 +2459,27 @@
 ;;; +
 ;;;

-(with-test-prefix "+"
+(with-test-prefix/c&e "+"

  (pass-if "documented?"
-    (documented? +)))
+    (documented? +))
+
+  ;; The maximum fixnum on a 32-bit architecture: 2^29 - 1.
+  (pass-if "fixnum + fixnum = bignum (32-bit)"
+    (eqv? 536870912 (+ 536870910 2)))
+
+  ;; The maximum fixnum on a 64-bit architecture: 2^61 - 1.
+  (pass-if "fixnum + fixnum = bignum (64-bit)"
+    (eqv? 2305843009213693952 (+ 2305843009213693950 2)))
+
+  (pass-if "bignum + fixnum = fixnum"
+    (eqv? 0 (+ (1+ most-positive-fixnum) most-negative-fixnum))))

 ;;;
 ;;; -
 ;;;

-(with-test-prefix "-"
+(with-test-prefix/c&e "-"

  (pass-if "-inum - +bignum"
    (= #x-100000000000000000000000000000001
@ -2480,7 +2491,18 @@
  
  (pass-if "big - -inum"
    (= #x100000000000000000000000000000001
-       (- #x100000000000000000000000000000000 -1))))
+       (- #x100000000000000000000000000000000 -1)))
+
+  ;; The mininum fixnum on a 32-bit architecture: -2^29.
+  (pass-if "fixnum - fixnum = bignum (32-bit)"
+    (eqv? -536870912 (- -536870910 2)))
+
+  ;; The minimum fixnum on a 64-bit architecture: -2^61.
+  (pass-if "fixnum - fixnum = bignum (64-bit)"
+    (eqv? -2305843009213693952 (- -2305843009213693950 2)))
+
+  (pass-if "bignum - fixnum = fixnum"
+    (eqv? most-positive-fixnum (- (1+ most-positive-fixnum) 1))))

 ;;;
 ;;; *