mirror of
https://git.savannah.gnu.org/git/guile.git
synced 2025-05-18 02:00:20 +02:00
Initial commit
This commit is contained in:
commit
bdf4b27733
5 changed files with 1077 additions and 0 deletions
296
GCBench.c
Normal file
296
GCBench.c
Normal file
|
@ -0,0 +1,296 @@
|
|||
// This is adapted from a benchmark written by John Ellis and Pete Kovac
|
||||
// of Post Communications.
|
||||
// It was modified by Hans Boehm of Silicon Graphics.
|
||||
// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
|
||||
// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
|
||||
//
|
||||
// This is no substitute for real applications. No actual application
|
||||
// is likely to behave in exactly this way. However, this benchmark was
|
||||
// designed to be more representative of real applications than other
|
||||
// Java GC benchmarks of which we are aware.
|
||||
// It attempts to model those properties of allocation requests that
|
||||
// are important to current GC techniques.
|
||||
// It is designed to be used either to obtain a single overall performance
|
||||
// number, or to give a more detailed estimate of how collector
|
||||
// performance varies with object lifetimes. It prints the time
|
||||
// required to allocate and collect balanced binary trees of various
|
||||
// sizes. Smaller trees result in shorter object lifetimes. Each cycle
|
||||
// allocates roughly the same amount of memory.
|
||||
// Two data structures are kept around during the entire process, so
|
||||
// that the measured performance is representative of applications
|
||||
// that maintain some live in-memory data. One of these is a tree
|
||||
// containing many pointers. The other is a large array containing
|
||||
// double precision floating point numbers. Both should be of comparable
|
||||
// size.
|
||||
//
|
||||
// The results are only really meaningful together with a specification
|
||||
// of how much memory was used. It is possible to trade memory for
|
||||
// better time performance. This benchmark should be run in a 32 MB
|
||||
// heap, though we don't currently know how to enforce that uniformly.
|
||||
//
|
||||
// Unlike the original Ellis and Kovac benchmark, we do not attempt
|
||||
// measure pause times. This facility should eventually be added back
|
||||
// in. There are several reasons for omitting it for now. The original
|
||||
// implementation depended on assumptions about the thread scheduler
|
||||
// that don't hold uniformly. The results really measure both the
|
||||
// scheduler and GC. Pause time measurements tend to not fit well with
|
||||
// current benchmark suites. As far as we know, none of the current
|
||||
// commercial Java implementations seriously attempt to minimize GC pause
|
||||
// times.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
|
||||
#ifdef GC
|
||||
# include "gc.h"
|
||||
#endif
|
||||
|
||||
#ifdef PROFIL
|
||||
extern void init_profiling();
|
||||
extern dump_profile();
|
||||
#endif
|
||||
|
||||
// These macros were a quick hack for the Macintosh.
|
||||
//
|
||||
// #define currentTime() clock()
|
||||
// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC)
|
||||
|
||||
#define currentTime() stats_rtclock()
|
||||
#define elapsedTime(x) (x)
|
||||
|
||||
/* Get the current time in milliseconds */
|
||||
|
||||
unsigned
|
||||
stats_rtclock( void )
|
||||
{
|
||||
struct timeval t;
|
||||
struct timezone tz;
|
||||
|
||||
if (gettimeofday( &t, &tz ) == -1)
|
||||
return 0;
|
||||
return (t.tv_sec * 1000 + t.tv_usec / 1000);
|
||||
}
|
||||
|
||||
static const int kStretchTreeDepth = 18; // about 16Mb
|
||||
static const int kLongLivedTreeDepth = 16; // about 4Mb
|
||||
static const int kArraySize = 500000; // about 4Mb
|
||||
static const int kMinTreeDepth = 4;
|
||||
static const int kMaxTreeDepth = 16;
|
||||
|
||||
typedef struct Node0_struct {
|
||||
struct Node0_struct * left;
|
||||
struct Node0_struct * right;
|
||||
int i, j;
|
||||
} Node0;
|
||||
|
||||
#ifdef HOLES
|
||||
# define HOLE() GC_NEW(Node0);
|
||||
#else
|
||||
# define HOLE()
|
||||
#endif
|
||||
|
||||
typedef Node0 *Node;
|
||||
|
||||
void init_Node(Node me, Node l, Node r) {
|
||||
me -> left = l;
|
||||
me -> right = r;
|
||||
}
|
||||
|
||||
#ifndef GC
|
||||
void destroy_Node(Node me) {
|
||||
if (me -> left) {
|
||||
destroy_Node(me -> left);
|
||||
}
|
||||
if (me -> right) {
|
||||
destroy_Node(me -> right);
|
||||
}
|
||||
free(me);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Nodes used by a tree of a given size
|
||||
static int TreeSize(int i) {
|
||||
return ((1 << (i + 1)) - 1);
|
||||
}
|
||||
|
||||
// Number of iterations to use for a given tree depth
|
||||
static int NumIters(int i) {
|
||||
return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i);
|
||||
}
|
||||
|
||||
// Build tree top down, assigning to older objects.
|
||||
static void Populate(int iDepth, Node thisNode) {
|
||||
if (iDepth<=0) {
|
||||
return;
|
||||
} else {
|
||||
iDepth--;
|
||||
# ifdef GC
|
||||
thisNode->left = GC_NEW(Node0); HOLE();
|
||||
thisNode->right = GC_NEW(Node0); HOLE();
|
||||
# else
|
||||
thisNode->left = calloc(1, sizeof(Node0));
|
||||
thisNode->right = calloc(1, sizeof(Node0));
|
||||
# endif
|
||||
Populate (iDepth, thisNode->left);
|
||||
Populate (iDepth, thisNode->right);
|
||||
}
|
||||
}
|
||||
|
||||
// Build tree bottom-up
|
||||
static Node MakeTree(int iDepth) {
|
||||
Node result;
|
||||
if (iDepth<=0) {
|
||||
# ifndef GC
|
||||
result = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
result = GC_NEW(Node0); HOLE();
|
||||
# endif
|
||||
/* result is implicitly initialized in both cases. */
|
||||
return result;
|
||||
} else {
|
||||
Node left = MakeTree(iDepth-1);
|
||||
Node right = MakeTree(iDepth-1);
|
||||
# ifndef GC
|
||||
result = malloc(sizeof(Node0));
|
||||
# else
|
||||
result = GC_NEW(Node0); HOLE();
|
||||
# endif
|
||||
init_Node(result, left, right);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static void PrintDiagnostics() {
|
||||
#if 0
|
||||
long lFreeMemory = Runtime.getRuntime().freeMemory();
|
||||
long lTotalMemory = Runtime.getRuntime().totalMemory();
|
||||
|
||||
System.out.print(" Total memory available="
|
||||
+ lTotalMemory + " bytes");
|
||||
System.out.println(" Free memory=" + lFreeMemory + " bytes");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void TimeConstruction(int depth) {
|
||||
long tStart, tFinish;
|
||||
int iNumIters = NumIters(depth);
|
||||
Node tempTree;
|
||||
int i;
|
||||
|
||||
printf("Creating %d trees of depth %d\n", iNumIters, depth);
|
||||
|
||||
tStart = currentTime();
|
||||
for (i = 0; i < iNumIters; ++i) {
|
||||
# ifndef GC
|
||||
tempTree = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
tempTree = GC_NEW(Node0);
|
||||
# endif
|
||||
Populate(depth, tempTree);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
}
|
||||
tFinish = currentTime();
|
||||
printf("\tTop down construction took %d msec\n",
|
||||
elapsedTime(tFinish - tStart));
|
||||
|
||||
tStart = currentTime();
|
||||
for (i = 0; i < iNumIters; ++i) {
|
||||
tempTree = MakeTree(depth);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
}
|
||||
tFinish = currentTime();
|
||||
printf("\tBottom up construction took %d msec\n",
|
||||
elapsedTime(tFinish - tStart));
|
||||
|
||||
}
|
||||
|
||||
int main() {
|
||||
Node root;
|
||||
Node longLivedTree;
|
||||
Node tempTree;
|
||||
long tStart, tFinish;
|
||||
long tElapsed;
|
||||
int i, d;
|
||||
double *array;
|
||||
|
||||
#ifdef GC
|
||||
// GC_full_freq = 30;
|
||||
// GC_free_space_divisor = 16;
|
||||
// GC_enable_incremental();
|
||||
#endif
|
||||
printf("Garbage Collector Test\n");
|
||||
printf(" Live storage will peak at %d bytes.\n\n",
|
||||
2 * sizeof(Node0) * TreeSize(kLongLivedTreeDepth) +
|
||||
sizeof(double) * kArraySize);
|
||||
printf(" Stretching memory with a binary tree of depth %d\n",
|
||||
kStretchTreeDepth);
|
||||
PrintDiagnostics();
|
||||
# ifdef PROFIL
|
||||
init_profiling();
|
||||
# endif
|
||||
|
||||
tStart = currentTime();
|
||||
|
||||
// Stretch the memory space quickly
|
||||
tempTree = MakeTree(kStretchTreeDepth);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
|
||||
// Create a long lived object
|
||||
printf(" Creating a long-lived binary tree of depth %d\n",
|
||||
kLongLivedTreeDepth);
|
||||
# ifndef GC
|
||||
longLivedTree = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
longLivedTree = GC_NEW(Node0);
|
||||
# endif
|
||||
Populate(kLongLivedTreeDepth, longLivedTree);
|
||||
|
||||
// Create long-lived array, filling half of it
|
||||
printf(" Creating a long-lived array of %d doubles\n", kArraySize);
|
||||
# ifndef GC
|
||||
array = malloc(kArraySize * sizeof(double));
|
||||
# else
|
||||
# ifndef NO_PTRFREE
|
||||
array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize);
|
||||
# else
|
||||
array = GC_MALLOC(sizeof(double) * kArraySize);
|
||||
# endif
|
||||
# endif
|
||||
for (i = 0; i < kArraySize/2; ++i) {
|
||||
array[i] = 1.0/i;
|
||||
}
|
||||
PrintDiagnostics();
|
||||
|
||||
for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) {
|
||||
TimeConstruction(d);
|
||||
}
|
||||
|
||||
if (longLivedTree == 0 || array[1000] != 1.0/1000)
|
||||
fprintf(stderr, "Failed\n");
|
||||
// fake reference to LongLivedTree
|
||||
// and array
|
||||
// to keep them from being optimized away
|
||||
|
||||
tFinish = currentTime();
|
||||
tElapsed = elapsedTime(tFinish-tStart);
|
||||
PrintDiagnostics();
|
||||
printf("Completed in %d msec\n", tElapsed);
|
||||
# ifdef GC
|
||||
printf("Completed %d collections\n", GC_gc_no);
|
||||
printf("Heap size is %d\n", GC_get_heap_size());
|
||||
# endif
|
||||
# ifdef PROFIL
|
||||
dump_profile();
|
||||
# endif
|
||||
}
|
||||
|
341
MT_GCBench.c
Normal file
341
MT_GCBench.c
Normal file
|
@ -0,0 +1,341 @@
|
|||
// This is adapted from a benchmark written by John Ellis and Pete Kovac
|
||||
// of Post Communications.
|
||||
// It was modified by Hans Boehm of Silicon Graphics.
|
||||
// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
|
||||
// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
|
||||
// Adapted to run NTHREADS client threads concurrently. Each
|
||||
// thread executes the original benchmark. 12 June 2000 by Hans Boehm.
|
||||
//
|
||||
// This is no substitute for real applications. No actual application
|
||||
// is likely to behave in exactly this way. However, this benchmark was
|
||||
// designed to be more representative of real applications than other
|
||||
// Java GC benchmarks of which we are aware.
|
||||
// It attempts to model those properties of allocation requests that
|
||||
// are important to current GC techniques.
|
||||
// It is designed to be used either to obtain a single overall performance
|
||||
// number, or to give a more detailed estimate of how collector
|
||||
// performance varies with object lifetimes. It prints the time
|
||||
// required to allocate and collect balanced binary trees of various
|
||||
// sizes. Smaller trees result in shorter object lifetimes. Each cycle
|
||||
// allocates roughly the same amount of memory.
|
||||
// Two data structures are kept around during the entire process, so
|
||||
// that the measured performance is representative of applications
|
||||
// that maintain some live in-memory data. One of these is a tree
|
||||
// containing many pointers. The other is a large array containing
|
||||
// double precision floating point numbers. Both should be of comparable
|
||||
// size.
|
||||
//
|
||||
// The results are only really meaningful together with a specification
|
||||
// of how much memory was used. It is possible to trade memory for
|
||||
// better time performance. This benchmark should be run in a 32 MB
|
||||
// heap, though we don't currently know how to enforce that uniformly.
|
||||
//
|
||||
// Unlike the original Ellis and Kovac benchmark, we do not attempt
|
||||
// measure pause times. This facility should eventually be added back
|
||||
// in. There are several reasons for omitting it for now. The original
|
||||
// implementation depended on assumptions about the thread scheduler
|
||||
// that don't hold uniformly. The results really measure both the
|
||||
// scheduler and GC. Pause time measurements tend to not fit well with
|
||||
// current benchmark suites. As far as we know, none of the current
|
||||
// commercial Java implementations seriously attempt to minimize GC pause
|
||||
// times.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#ifdef GC
|
||||
# ifndef LINUX_THREADS
|
||||
# define LINUX_THREADS
|
||||
# endif
|
||||
# ifndef _REENTRANT
|
||||
# define _REENTRANT
|
||||
# endif
|
||||
# ifdef LOCAL
|
||||
# define GC_REDIRECT_TO_LOCAL
|
||||
# include "gc_local_alloc.h"
|
||||
# endif
|
||||
# include "gc.h"
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef NTHREADS
|
||||
# define NTHREADS 1
|
||||
#endif
|
||||
|
||||
#ifdef PROFIL
|
||||
extern void init_profiling();
|
||||
extern dump_profile();
|
||||
#endif
|
||||
|
||||
// These macros were a quick hack for the Macintosh.
|
||||
//
|
||||
// #define currentTime() clock()
|
||||
// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC)
|
||||
|
||||
#define currentTime() stats_rtclock()
|
||||
#define elapsedTime(x) (x)
|
||||
|
||||
/* Get the current time in milliseconds */
|
||||
|
||||
unsigned
|
||||
stats_rtclock( void )
|
||||
{
|
||||
struct timeval t;
|
||||
struct timezone tz;
|
||||
|
||||
if (gettimeofday( &t, &tz ) == -1)
|
||||
return 0;
|
||||
return (t.tv_sec * 1000 + t.tv_usec / 1000);
|
||||
}
|
||||
|
||||
static const int kStretchTreeDepth = 18; // about 16Mb
|
||||
static const int kLongLivedTreeDepth = 16; // about 4Mb
|
||||
static const int kArraySize = 500000; // about 4Mb
|
||||
static const int kMinTreeDepth = 4;
|
||||
static const int kMaxTreeDepth = 16;
|
||||
|
||||
typedef struct Node0_struct {
|
||||
struct Node0_struct * left;
|
||||
struct Node0_struct * right;
|
||||
int i, j;
|
||||
} Node0;
|
||||
|
||||
#ifdef HOLES
|
||||
# define HOLE() GC_NEW(Node0);
|
||||
#else
|
||||
# define HOLE()
|
||||
#endif
|
||||
|
||||
typedef Node0 *Node;
|
||||
|
||||
void init_Node(Node me, Node l, Node r) {
|
||||
me -> left = l;
|
||||
me -> right = r;
|
||||
}
|
||||
|
||||
#ifndef GC
|
||||
void destroy_Node(Node me) {
|
||||
if (me -> left) {
|
||||
destroy_Node(me -> left);
|
||||
}
|
||||
if (me -> right) {
|
||||
destroy_Node(me -> right);
|
||||
}
|
||||
free(me);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Nodes used by a tree of a given size
|
||||
static int TreeSize(int i) {
|
||||
return ((1 << (i + 1)) - 1);
|
||||
}
|
||||
|
||||
// Number of iterations to use for a given tree depth
|
||||
static int NumIters(int i) {
|
||||
return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i);
|
||||
}
|
||||
|
||||
// Build tree top down, assigning to older objects.
|
||||
static void Populate(int iDepth, Node thisNode) {
|
||||
if (iDepth<=0) {
|
||||
return;
|
||||
} else {
|
||||
iDepth--;
|
||||
# ifdef GC
|
||||
thisNode->left = GC_NEW(Node0); HOLE();
|
||||
thisNode->right = GC_NEW(Node0); HOLE();
|
||||
# else
|
||||
thisNode->left = calloc(1, sizeof(Node0));
|
||||
thisNode->right = calloc(1, sizeof(Node0));
|
||||
# endif
|
||||
Populate (iDepth, thisNode->left);
|
||||
Populate (iDepth, thisNode->right);
|
||||
}
|
||||
}
|
||||
|
||||
// Build tree bottom-up
|
||||
static Node MakeTree(int iDepth) {
|
||||
Node result;
|
||||
if (iDepth<=0) {
|
||||
# ifndef GC
|
||||
result = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
result = GC_NEW(Node0); HOLE();
|
||||
# endif
|
||||
/* result is implicitly initialized in both cases. */
|
||||
return result;
|
||||
} else {
|
||||
Node left = MakeTree(iDepth-1);
|
||||
Node right = MakeTree(iDepth-1);
|
||||
# ifndef GC
|
||||
result = malloc(sizeof(Node0));
|
||||
# else
|
||||
result = GC_NEW(Node0); HOLE();
|
||||
# endif
|
||||
init_Node(result, left, right);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static void PrintDiagnostics() {
|
||||
#if 0
|
||||
long lFreeMemory = Runtime.getRuntime().freeMemory();
|
||||
long lTotalMemory = Runtime.getRuntime().totalMemory();
|
||||
|
||||
System.out.print(" Total memory available="
|
||||
+ lTotalMemory + " bytes");
|
||||
System.out.println(" Free memory=" + lFreeMemory + " bytes");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void TimeConstruction(int depth) {
|
||||
long tStart, tFinish;
|
||||
int iNumIters = NumIters(depth);
|
||||
Node tempTree;
|
||||
int i;
|
||||
|
||||
printf("0x%x: Creating %d trees of depth %d\n", pthread_self(), iNumIters, depth);
|
||||
|
||||
tStart = currentTime();
|
||||
for (i = 0; i < iNumIters; ++i) {
|
||||
# ifndef GC
|
||||
tempTree = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
tempTree = GC_NEW(Node0);
|
||||
# endif
|
||||
Populate(depth, tempTree);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
}
|
||||
tFinish = currentTime();
|
||||
printf("\t0x%x: Top down construction took %d msec\n",
|
||||
pthread_self(), elapsedTime(tFinish - tStart));
|
||||
|
||||
tStart = currentTime();
|
||||
for (i = 0; i < iNumIters; ++i) {
|
||||
tempTree = MakeTree(depth);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
}
|
||||
tFinish = currentTime();
|
||||
printf("\t0x%x: Bottom up construction took %d msec\n",
|
||||
pthread_self(), elapsedTime(tFinish - tStart));
|
||||
|
||||
}
|
||||
|
||||
void * run_one_test(void * arg) {
|
||||
int d;
|
||||
for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) {
|
||||
TimeConstruction(d);
|
||||
}
|
||||
}
|
||||
|
||||
int main() {
|
||||
Node root;
|
||||
Node longLivedTree;
|
||||
Node tempTree;
|
||||
long tStart, tFinish;
|
||||
long tElapsed;
|
||||
int i;
|
||||
double *array;
|
||||
|
||||
#ifdef GC
|
||||
// GC_full_freq = 30;
|
||||
// GC_free_space_divisor = 16;
|
||||
// GC_enable_incremental();
|
||||
#endif
|
||||
# if defined(GC) && defined(LOCAL)
|
||||
GC_thr_init();
|
||||
# endif
|
||||
printf("Garbage Collector Test\n");
|
||||
printf(" Live storage will peak at %d bytes.\n\n",
|
||||
2 * sizeof(Node0) * TreeSize(kLongLivedTreeDepth) +
|
||||
sizeof(double) * kArraySize);
|
||||
printf(" Stretching memory with a binary tree of depth %d\n",
|
||||
kStretchTreeDepth);
|
||||
PrintDiagnostics();
|
||||
# ifdef PROFIL
|
||||
init_profiling();
|
||||
# endif
|
||||
|
||||
tStart = currentTime();
|
||||
|
||||
// Stretch the memory space quickly
|
||||
tempTree = MakeTree(kStretchTreeDepth);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
|
||||
// Create a long lived object
|
||||
printf(" Creating a long-lived binary tree of depth %d\n",
|
||||
kLongLivedTreeDepth);
|
||||
# ifndef GC
|
||||
longLivedTree = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
longLivedTree = GC_NEW(Node0);
|
||||
# endif
|
||||
Populate(kLongLivedTreeDepth, longLivedTree);
|
||||
|
||||
// Create long-lived array, filling half of it
|
||||
printf(" Creating a long-lived array of %d doubles\n", kArraySize);
|
||||
# ifndef GC
|
||||
array = malloc(kArraySize * sizeof(double));
|
||||
# else
|
||||
# ifndef NO_PTRFREE
|
||||
array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize);
|
||||
# else
|
||||
array = GC_MALLOC(sizeof(double) * kArraySize);
|
||||
# endif
|
||||
# endif
|
||||
for (i = 0; i < kArraySize/2; ++i) {
|
||||
array[i] = 1.0/i;
|
||||
}
|
||||
|
||||
{
|
||||
pthread_t thread[NTHREADS];
|
||||
for (i = 1; i < NTHREADS; ++i) {
|
||||
int code;
|
||||
|
||||
if ((code = pthread_create(thread+i, 0, run_one_test, 0)) != 0) {
|
||||
fprintf(stderr, "Thread creation failed %u\n", code);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
/* We use the main thread to run one test. This allows */
|
||||
/* profiling to work, for example. */
|
||||
run_one_test(0);
|
||||
for (i = 1; i < NTHREADS; ++i) {
|
||||
int code;
|
||||
if ((code = pthread_join(thread[i], 0)) != 0) {
|
||||
fprintf(stderr, "Thread join failed %u\n", code);
|
||||
}
|
||||
}
|
||||
}
|
||||
PrintDiagnostics();
|
||||
|
||||
if (longLivedTree == 0 || array[1000] != 1.0/1000)
|
||||
fprintf(stderr, "Failed\n");
|
||||
// fake reference to LongLivedTree
|
||||
// and array
|
||||
// to keep them from being optimized away
|
||||
|
||||
tFinish = currentTime();
|
||||
tElapsed = elapsedTime(tFinish-tStart);
|
||||
PrintDiagnostics();
|
||||
printf("Completed in %d msec\n", tElapsed);
|
||||
# ifdef GC
|
||||
printf("Completed %d collections\n", GC_gc_no);
|
||||
printf("Heap size is %d\n", GC_get_heap_size());
|
||||
# endif
|
||||
# ifdef PROFIL
|
||||
dump_profile();
|
||||
# endif
|
||||
}
|
||||
|
398
MT_GCBench2.c
Normal file
398
MT_GCBench2.c
Normal file
|
@ -0,0 +1,398 @@
|
|||
// This is version 2 of the multithreaded GC Bench.
|
||||
// Heap expansion is handled differently from version 1, in an attempt
|
||||
// to make scalability measurements more meaningful. The version with
|
||||
// N threads now immediately expands the heap to N*32MB.
|
||||
//
|
||||
// To run this with BDWGC versions 6 and later with thread local allocation,
|
||||
// define GC and LOCAL. Without thread-local allocation, define just GC.
|
||||
// To run it with the University of Tokyo scalable GC,
|
||||
// define SGC. To run it with malloc and explicit deallocation, define
|
||||
// none of these. (This should also work for Hoard.)
|
||||
//
|
||||
// Note that defining GC or SGC removes the explicit deallocation passes,
|
||||
// which seems fair.
|
||||
//
|
||||
// This is adapted from a benchmark written by John Ellis and Pete Kovac
|
||||
// of Post Communications.
|
||||
// It was modified by Hans Boehm of Silicon Graphics.
|
||||
// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
|
||||
// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
|
||||
// Adapted to run NTHREADS client threads concurrently. Each
|
||||
// thread executes the original benchmark. 12 June 2000 by Hans Boehm.
|
||||
// Changed heap expansion rule, and made the number of threads run-time
|
||||
// configurable. 25 Oct 2000 by Hans Boehm.
|
||||
//
|
||||
// This is no substitute for real applications. No actual application
|
||||
// is likely to behave in exactly this way. However, this benchmark was
|
||||
// designed to be more representative of real applications than other
|
||||
// Java GC benchmarks of which we were aware at the time.
|
||||
// It still doesn't seem too bad for something this small.
|
||||
// It attempts to model those properties of allocation requests that
|
||||
// are important to current GC techniques.
|
||||
// It is designed to be used either to obtain a single overall performance
|
||||
// number, or to give a more detailed estimate of how collector
|
||||
// performance varies with object lifetimes. It prints the time
|
||||
// required to allocate and collect balanced binary trees of various
|
||||
// sizes. Smaller trees result in shorter object lifetimes. Each cycle
|
||||
// allocates roughly the same amount of memory.
|
||||
// Two data structures are kept around during the entire process, so
|
||||
// that the measured performance is representative of applications
|
||||
// that maintain some live in-memory data. One of these is a tree
|
||||
// containing many pointers. The other is a large array containing
|
||||
// double precision floating point numbers. Both should be of comparable
|
||||
// size.
|
||||
//
|
||||
// The results are only really meaningful together with a specification
|
||||
// of how much memory was used. This versions of the benchmark tries
|
||||
// to preallocate a sufficiently large heap that expansion should not be
|
||||
// needed.
|
||||
//
|
||||
// Unlike the original Ellis and Kovac benchmark, we do not attempt
|
||||
// measure pause times. This facility should eventually be added back
|
||||
// in. There are several reasons for omitting it for now. The original
|
||||
// implementation depended on assumptions about the thread scheduler
|
||||
// that don't hold uniformly. The results really measure both the
|
||||
// scheduler and GC. Pause time measurements tend to not fit well with
|
||||
// current benchmark suites. As far as we know, none of the current
|
||||
// commercial Java implementations seriously attempt to minimize GC pause
|
||||
// times.
|
||||
//
|
||||
// Since this benchmark has recently been more widely used, some
|
||||
// anomalous behavious has been uncovered. The user should be aware
|
||||
// of this:
|
||||
// 1) Nearly all objects are of the same size. This benchmark is
|
||||
// not useful for analyzing fragmentation behavior. It is unclear
|
||||
// whether this is an issue for well-designed allocators.
|
||||
// 2) Unless HOLES is defined, it tends to drop consecutively allocated
|
||||
// memory at the same time. Many real applications do exhibit this
|
||||
// phenomenon, but probably not to this extent. (Defining HOLES tends
|
||||
// to move the benchmark to the opposite extreme.)
|
||||
// 3) It appears harder to predict object lifetimes than for most real
|
||||
// Java programs (see T. Harris, "Dynamic adptive pre-tenuring",
|
||||
// ISMM '00).
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/time.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#ifdef GC
|
||||
# ifndef LINUX_THREADS
|
||||
# define LINUX_THREADS
|
||||
# endif
|
||||
# ifndef _REENTRANT
|
||||
# define _REENTRANT
|
||||
# endif
|
||||
# ifdef LOCAL
|
||||
# define GC_REDIRECT_TO_LOCAL
|
||||
# include "gc_local_alloc.h"
|
||||
# endif
|
||||
# include "gc.h"
|
||||
#endif
|
||||
#ifdef SGC
|
||||
# include "sgc.h"
|
||||
# define GC
|
||||
# define pthread_create GC_pthread_create
|
||||
# define pthread_join GC_pthread_join
|
||||
#endif
|
||||
|
||||
#define MAX_NTHREADS 1024
|
||||
|
||||
int nthreads = 0;
|
||||
|
||||
#ifdef PROFIL
|
||||
extern void init_profiling();
|
||||
extern dump_profile();
|
||||
#endif
|
||||
|
||||
// These macros were a quick hack for the Macintosh.
|
||||
//
|
||||
// #define currentTime() clock()
|
||||
// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC)
|
||||
|
||||
#define currentTime() stats_rtclock()
|
||||
#define elapsedTime(x) (x)
|
||||
|
||||
/* Get the current time in milliseconds */
|
||||
|
||||
unsigned
|
||||
stats_rtclock( void )
|
||||
{
|
||||
struct timeval t;
|
||||
struct timezone tz;
|
||||
|
||||
if (gettimeofday( &t, &tz ) == -1)
|
||||
return 0;
|
||||
return (t.tv_sec * 1000 + t.tv_usec / 1000);
|
||||
}
|
||||
|
||||
static const int kStretchTreeDepth = 18; // about 16Mb
|
||||
static const int kLongLivedTreeDepth = 16; // about 4Mb
|
||||
static const int kArraySize = 500000; // about 4Mb
|
||||
static const int kMinTreeDepth = 4;
|
||||
static const int kMaxTreeDepth = 16;
|
||||
|
||||
typedef struct Node0_struct {
|
||||
struct Node0_struct * left;
|
||||
struct Node0_struct * right;
|
||||
int i, j;
|
||||
} Node0;
|
||||
|
||||
#ifdef HOLES
|
||||
# define HOLE() GC_NEW(Node0);
|
||||
#else
|
||||
# define HOLE()
|
||||
#endif
|
||||
|
||||
typedef Node0 *Node;
|
||||
|
||||
void init_Node(Node me, Node l, Node r) {
|
||||
me -> left = l;
|
||||
me -> right = r;
|
||||
}
|
||||
|
||||
#ifndef GC
|
||||
void destroy_Node(Node me) {
|
||||
if (me -> left) {
|
||||
destroy_Node(me -> left);
|
||||
}
|
||||
if (me -> right) {
|
||||
destroy_Node(me -> right);
|
||||
}
|
||||
free(me);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Nodes used by a tree of a given size
|
||||
static int TreeSize(int i) {
|
||||
return ((1 << (i + 1)) - 1);
|
||||
}
|
||||
|
||||
// Number of iterations to use for a given tree depth
|
||||
static int NumIters(int i) {
|
||||
return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i);
|
||||
}
|
||||
|
||||
// Build tree top down, assigning to older objects.
|
||||
static void Populate(int iDepth, Node thisNode) {
|
||||
if (iDepth<=0) {
|
||||
return;
|
||||
} else {
|
||||
iDepth--;
|
||||
# ifdef GC
|
||||
thisNode->left = GC_NEW(Node0); HOLE();
|
||||
thisNode->right = GC_NEW(Node0); HOLE();
|
||||
# else
|
||||
thisNode->left = calloc(1, sizeof(Node0));
|
||||
thisNode->right = calloc(1, sizeof(Node0));
|
||||
# endif
|
||||
Populate (iDepth, thisNode->left);
|
||||
Populate (iDepth, thisNode->right);
|
||||
}
|
||||
}
|
||||
|
||||
// Build tree bottom-up
|
||||
static Node MakeTree(int iDepth) {
|
||||
Node result;
|
||||
if (iDepth<=0) {
|
||||
# ifndef GC
|
||||
result = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
result = GC_NEW(Node0); HOLE();
|
||||
# endif
|
||||
/* result is implicitly initialized in both cases. */
|
||||
return result;
|
||||
} else {
|
||||
Node left = MakeTree(iDepth-1);
|
||||
Node right = MakeTree(iDepth-1);
|
||||
# ifndef GC
|
||||
result = malloc(sizeof(Node0));
|
||||
# else
|
||||
result = GC_NEW(Node0); HOLE();
|
||||
# endif
|
||||
init_Node(result, left, right);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static void PrintDiagnostics() {
|
||||
#if 0
|
||||
long lFreeMemory = Runtime.getRuntime().freeMemory();
|
||||
long lTotalMemory = Runtime.getRuntime().totalMemory();
|
||||
|
||||
System.out.print(" Total memory available="
|
||||
+ lTotalMemory + " bytes");
|
||||
System.out.println(" Free memory=" + lFreeMemory + " bytes");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void TimeConstruction(int depth) {
|
||||
long tStart, tFinish;
|
||||
int iNumIters = NumIters(depth);
|
||||
Node tempTree;
|
||||
int i;
|
||||
|
||||
printf("0x%x: Creating %d trees of depth %d\n", pthread_self(), iNumIters, depth);
|
||||
|
||||
tStart = currentTime();
|
||||
for (i = 0; i < iNumIters; ++i) {
|
||||
# ifndef GC
|
||||
tempTree = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
tempTree = GC_NEW(Node0);
|
||||
# endif
|
||||
Populate(depth, tempTree);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
}
|
||||
tFinish = currentTime();
|
||||
printf("\t0x%x: Top down construction took %d msec\n",
|
||||
pthread_self(), elapsedTime(tFinish - tStart));
|
||||
|
||||
tStart = currentTime();
|
||||
for (i = 0; i < iNumIters; ++i) {
|
||||
tempTree = MakeTree(depth);
|
||||
# ifndef GC
|
||||
destroy_Node(tempTree);
|
||||
# endif
|
||||
tempTree = 0;
|
||||
}
|
||||
tFinish = currentTime();
|
||||
printf("\t0x%x: Bottom up construction took %d msec\n",
|
||||
pthread_self(), elapsedTime(tFinish - tStart));
|
||||
|
||||
}
|
||||
|
||||
void * run_one_test(void * arg) {
|
||||
int d, i;
|
||||
Node longLivedTree;
|
||||
double *array;
|
||||
/* size_t initial_bytes = GC_get_total_bytes(); */
|
||||
|
||||
// Create a long lived object
|
||||
printf(" Creating a long-lived binary tree of depth %d\n",
|
||||
kLongLivedTreeDepth);
|
||||
# ifndef GC
|
||||
longLivedTree = calloc(1, sizeof(Node0));
|
||||
# else
|
||||
longLivedTree = GC_NEW(Node0);
|
||||
# endif
|
||||
Populate(kLongLivedTreeDepth, longLivedTree);
|
||||
|
||||
// Create long-lived array, filling half of it
|
||||
printf(" Creating a long-lived array of %d doubles\n", kArraySize);
|
||||
# ifndef GC
|
||||
array = malloc(kArraySize * sizeof(double));
|
||||
# else
|
||||
# ifndef NO_PTRFREE
|
||||
array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize);
|
||||
# else
|
||||
array = GC_MALLOC(sizeof(double) * kArraySize);
|
||||
# endif
|
||||
# endif
|
||||
for (i = 0; i < kArraySize/2; ++i) {
|
||||
array[i] = 1.0/i;
|
||||
}
|
||||
|
||||
for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) {
|
||||
TimeConstruction(d);
|
||||
}
|
||||
/* printf("Allocated %ld bytes before start, %ld after\n",
|
||||
initial_bytes, GC_get_total_bytes() - initial_bytes); */
|
||||
if (longLivedTree->left -> right == 0 || array[1000] != 1.0/1000)
|
||||
fprintf(stderr, "Failed\n");
|
||||
// fake reference to LongLivedTree
|
||||
// and array
|
||||
// to keep them from being optimized away
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
Node root;
|
||||
Node tempTree[MAX_NTHREADS];
|
||||
long tStart, tFinish;
|
||||
long tElapsed;
|
||||
int i;
|
||||
# ifdef SGC
|
||||
SGC_attr_t attr;
|
||||
# endif
|
||||
|
||||
if (1 == argc) {
|
||||
nthreads = 1;
|
||||
} else if (2 == argc) {
|
||||
nthreads = atoi(argv[1]);
|
||||
if (nthreads < 1 || nthreads > MAX_NTHREADS) {
|
||||
fprintf(stderr, "Invalid # of threads argument\n");
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
fprintf(stderr, "Usage: %s [# of threads]\n");
|
||||
exit(1);
|
||||
}
|
||||
# if defined(SGC)
|
||||
/* The University of Tokyo collector needs explicit */
|
||||
/* initialization. */
|
||||
SGC_attr_init(&attr);
|
||||
SGC_init(nthreads, &attr);
|
||||
# endif
|
||||
#ifdef GC
|
||||
// GC_full_freq = 30;
|
||||
// GC_free_space_divisor = 16;
|
||||
// GC_enable_incremental();
|
||||
#endif
|
||||
printf("Garbage Collector Test\n");
|
||||
printf(" Live storage will peak at %d bytes or less .\n\n",
|
||||
2 * sizeof(Node0) * nthreads
|
||||
* (TreeSize(kLongLivedTreeDepth) + TreeSize(kMaxTreeDepth))
|
||||
+ sizeof(double) * kArraySize);
|
||||
PrintDiagnostics();
|
||||
|
||||
# ifdef GC
|
||||
/* GC_expand_hp fails with empty heap */
|
||||
GC_malloc(1);
|
||||
GC_expand_hp(32*1024*1024*nthreads);
|
||||
# endif
|
||||
|
||||
# ifdef PROFIL
|
||||
init_profiling();
|
||||
# endif
|
||||
|
||||
tStart = currentTime();
|
||||
{
|
||||
pthread_t thread[MAX_NTHREADS];
|
||||
for (i = 1; i < nthreads; ++i) {
|
||||
int code;
|
||||
|
||||
if ((code = pthread_create(thread+i, 0, run_one_test, 0)) != 0) {
|
||||
fprintf(stderr, "Thread creation failed %u\n", code);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
/* We use the main thread to run one test. This allows */
|
||||
/* profiling to work, for example. */
|
||||
run_one_test(0);
|
||||
for (i = 1; i < nthreads; ++i) {
|
||||
int code;
|
||||
if ((code = pthread_join(thread[i], 0)) != 0) {
|
||||
fprintf(stderr, "Thread join failed %u\n", code);
|
||||
}
|
||||
}
|
||||
}
|
||||
PrintDiagnostics();
|
||||
|
||||
tFinish = currentTime();
|
||||
tElapsed = elapsedTime(tFinish-tStart);
|
||||
PrintDiagnostics();
|
||||
printf("Completed in %d msec\n", tElapsed);
|
||||
# ifdef GC
|
||||
printf("Completed %d collections\n", GC_gc_no);
|
||||
printf("Heap size is %d\n", GC_get_heap_size());
|
||||
# endif
|
||||
# ifdef PROFIL
|
||||
dump_profile();
|
||||
# endif
|
||||
return 0;
|
||||
}
|
||||
|
29
Makefile
Normal file
29
Makefile
Normal file
|
@ -0,0 +1,29 @@
|
|||
TESTS=GCBench MT_GCBench MT_GCBench2
|
||||
COLLECTORS=bdw
|
||||
|
||||
CC=gcc
|
||||
CFLAGS=-Wall -O2 -g
|
||||
|
||||
ALL_TESTS=$(foreach COLLECTOR,$(COLLECTORS),$(addprefix $(COLLECTOR)-,$(TESTS)))
|
||||
|
||||
all: $(ALL_TESTS)
|
||||
|
||||
bdw-%: bdw.h %.c
|
||||
$(CC) $(CFLAGS) -lpthread `pkg-config --libs --cflags bdw-gc` -I. -o $@ $*.c bdw.h
|
||||
|
||||
check: $(addprefix test-$(TARGET),$(TARGETS))
|
||||
|
||||
test-%: $(ALL_TESTS)
|
||||
@echo "Running unit tests..."
|
||||
@set -e; for test in $?; do \
|
||||
echo "Testing: $$test"; \
|
||||
./$$test; \
|
||||
done
|
||||
@echo "Success."
|
||||
|
||||
.PHONY: check
|
||||
|
||||
.PRECIOUS: $(ALL_TESTS)
|
||||
|
||||
clean:
|
||||
rm -f $(ALL_TESTS)
|
13
bdw.h
Normal file
13
bdw.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
// When pthreads are used, let `libgc' know about it and redirect
|
||||
// allocation calls such as `GC_MALLOC ()' to (contention-free, faster)
|
||||
// thread-local allocation.
|
||||
|
||||
#define GC_THREADS 1
|
||||
#define GC_REDIRECT_TO_LOCAL 1
|
||||
|
||||
// Don't #define pthread routines to their GC_pthread counterparts.
|
||||
// Instead we will be careful inside the benchmarks to use API to
|
||||
// register threads with libgc.
|
||||
#define GC_NO_THREAD_REDIRECTS 1
|
||||
|
||||
#include <gc/gc.h>
|
Loading…
Add table
Add a link
Reference in a new issue