Add optimized tagged integer addition/subtractions for x86_64.

This results in a 17% improvement in the execution time of the "+" and "-" benchmarks for fixnums. * libguile/vm-i-scheme.c (ASM_ADD, ASM_SUB)[defined __x86_64__ && SCM_GNUC_PREREQ (4, 5)]: New macros. (add)[defined ASM_ADD]: Use `ASM_ADD' for the fast path. (sub)[defined ASM_SUB]: Use `ASM_SUB' for the fast path. * test-suite/tests/numbers.test ("+")["fixnum + fixnum = bignum (32-bit)", "fixnum + fixnum = bignum (64-bit)", "bignum + fixnum = fixnum", "wrong type"]: New tests. ("-")["fixnum - fixnum = bignum (32-bit)", "fixnum - fixnum = bignum (64-bit)", "bignum - fixnum = fixnum", "wrong type"]: New tests. * test-suite/tests/00-initial-env.test ("goopsless")["+ wrong type argument"]: Use `with-test-prefix/c&e' instead of `with-test-prefix'. ["- wrong type argument"]: New test prefix.
2025-06-12 06:41:13 +02:00 · 2010-11-17 23:06:26 +01:00 · 2010-11-17 23:06:26 +01:00 · 0c57673a11
commit 0c57673a11
parent f13f1e9f6f
3 changed files with 111 additions and 6 deletions
--- a/libguile/vm-i-scheme.c
+++ b/libguile/vm-i-scheme.c
@ -210,6 +210,8 @@ VM_DEFINE_FUNCTION (149, ge, "ge?", 2)
 */
 /* The maximum/minimum tagged integers.  */
 #undef INUM_MAX
 #undef INUM_MIN
 #define INUM_MAX (INTPTR_MAX - 1)
 #define INUM_MIN (INTPTR_MIN + scm_tc2_int)
@ -227,9 +229,68 @@ VM_DEFINE_FUNCTION (149, ge, "ge?", 2)
  RETURN (SFUNC (x, y));				\
 }
 /* Assembly tagged integer arithmetic routines.  This code uses the
   `asm goto' feature introduced in GCC 4.5.  */
 #if defined __x86_64__ && SCM_GNUC_PREREQ (4, 5)
 /* The macros below check the CPU's overflow flag to improve fixnum
   arithmetic.  The %rcx register is explicitly clobbered because `asm
   goto' can't have outputs, in which case the `r' constraint could be
   used to let the register allocator choose a register.
   TODO: Use `cold' label attribute in GCC 4.6.
   http://gcc.gnu.org/ml/gcc-patches/2010-10/msg01777.html  */
 # define ASM_ADD(x, y)							\
    {									\
      asm volatile goto ("mov %1, %%rcx; "				\
 			 "test %[tag], %%cl; je %l[slow_add]; "		\
 			 "test %[tag], %0;   je %l[slow_add]; "		\
 			 "add %0, %%rcx;     jo %l[slow_add]; "		\
 			 "sub %[tag], %%rcx; "				\
 			 "mov %%rcx, (%[vsp])\n"			\
 			 : /* no outputs */				\
 			 : "r" (x), "r" (y),				\
 			   [vsp] "r" (sp), [tag] "i" (scm_tc2_int)	\
 			 : "rcx", "memory"				\
 			 : slow_add);					\
      NEXT;								\
    }									\
  slow_add:								\
    do { } while (0)
 # define ASM_SUB(x, y)							\
    {									\
      asm volatile goto ("mov %0, %%rcx; "				\
 			 "test %[tag], %%cl; je %l[slow_sub]; "		\
 			 "test %[tag], %1;   je %l[slow_sub]; "		\
 			 "sub %1, %%rcx;     jo %l[slow_sub]; "		\
 			 "add %[tag], %%rcx; "				\
 			 "mov %%rcx, (%[vsp])\n"			\
 			 : /* no outputs */				\
 			 : "r" (x), "r" (y),				\
 			   [vsp] "r" (sp), [tag] "i" (scm_tc2_int)	\
 			 : "rcx", "memory"				\
 			 : slow_sub);					\
      NEXT;								\
    }									\
  slow_sub:								\
    do { } while (0)
 #endif
 VM_DEFINE_FUNCTION (150, add, "add", 2)
 {
 #ifndef ASM_ADD
  FUNC2 (+, scm_sum);
 #else
  ARGS2 (x, y);
  ASM_ADD (x, y);
  SYNC_REGISTER ();
  RETURN (scm_sum (x, y));
 #endif
 }
 VM_DEFINE_FUNCTION (151, add1, "add1", 1)
@ -256,7 +317,14 @@ VM_DEFINE_FUNCTION (151, add1, "add1", 1)
 VM_DEFINE_FUNCTION (152, sub, "sub", 2)
 {
 #ifndef ASM_SUB
  FUNC2 (-, scm_difference);
 #else
  ARGS2 (x, y);
  ASM_SUB (x, y);
  SYNC_REGISTER ();
  RETURN (scm_difference (x, y));
 #endif
 }
 VM_DEFINE_FUNCTION (153, sub1, "sub1", 1)
@ -281,6 +349,9 @@ VM_DEFINE_FUNCTION (153, sub1, "sub1", 1)
  RETURN (scm_difference (x, SCM_I_MAKINUM (1)));
 }
 # undef ASM_ADD
 # undef ASM_SUB
 VM_DEFINE_FUNCTION (154, mul, "mul", 2)
 {
  ARGS2 (x, y);
--- a/test-suite/tests/00-initial-env.test
+++ b/test-suite/tests/00-initial-env.test
@ -29,7 +29,7 @@
 (with-test-prefix "goopsless"
-  (with-test-prefix "+ wrong type argument"
+  (with-test-prefix/c&e "+ wrong type argument"
    ;; The following tests assume that `+' hasn't been turned into a generic
    ;; and extended.  Since the ECMAScript run-time library does exactly
@ -45,4 +45,16 @@
    (pass-if-exception "implicit forcing is not supported"
      exception:wrong-type-arg
-      (+ (delay (* 3 7)) 13))))
+      (+ (delay (* 3 7)) 13)))
  (with-test-prefix/c&e "- wrong type argument"
    ;; Same for `-'.
    (pass-if-exception "1st argument string"
      exception:wrong-type-arg
      (+ "1" 2))
    (pass-if-exception "2nd argument symbol"
      exception:wrong-type-arg
      (+ 1 'bar))))
--- a/test-suite/tests/numbers.test
+++ b/test-suite/tests/numbers.test
@ -2459,16 +2459,27 @@
 ;;; +
 ;;;
-(with-test-prefix "+"
+(with-test-prefix/c&e "+"
  (pass-if "documented?"
-    (documented? +)))
+    (documented? +))
  ;; The maximum fixnum on a 32-bit architecture: 2^29 - 1.
  (pass-if "fixnum + fixnum = bignum (32-bit)"
    (eqv? 536870912 (+ 536870910 2)))
  ;; The maximum fixnum on a 64-bit architecture: 2^61 - 1.
  (pass-if "fixnum + fixnum = bignum (64-bit)"
    (eqv? 2305843009213693952 (+ 2305843009213693950 2)))
  (pass-if "bignum + fixnum = fixnum"
    (eqv? 0 (+ (1+ most-positive-fixnum) most-negative-fixnum))))
 ;;;
 ;;; -
 ;;;
-(with-test-prefix "-"
+(with-test-prefix/c&e "-"
  (pass-if "-inum - +bignum"
    (= #x-100000000000000000000000000000001
@ -2480,7 +2491,18 @@
  (pass-if "big - -inum"
    (= #x100000000000000000000000000000001
-       (- #x100000000000000000000000000000000 -1))))
+       (- #x100000000000000000000000000000000 -1)))
  ;; The mininum fixnum on a 32-bit architecture: -2^29.
  (pass-if "fixnum - fixnum = bignum (32-bit)"
    (eqv? -536870912 (- -536870910 2)))
  ;; The minimum fixnum on a 64-bit architecture: -2^61.
  (pass-if "fixnum - fixnum = bignum (64-bit)"
    (eqv? -2305843009213693952 (- -2305843009213693950 2)))
  (pass-if "bignum - fixnum = fixnum"
    (eqv? most-positive-fixnum (- (1+ most-positive-fixnum) 1))))
 ;;;
 ;;; *