1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-12 16:50:22 +02:00
guile/MT_GCBench2.c
2022-02-21 21:05:18 +01:00

398 lines
12 KiB
C

// This is version 2 of the multithreaded GC Bench.
// Heap expansion is handled differently from version 1, in an attempt
// to make scalability measurements more meaningful. The version with
// N threads now immediately expands the heap to N*32MB.
//
// To run this with BDWGC versions 6 and later with thread local allocation,
// define GC and LOCAL. Without thread-local allocation, define just GC.
// To run it with the University of Tokyo scalable GC,
// define SGC. To run it with malloc and explicit deallocation, define
// none of these. (This should also work for Hoard.)
//
// Note that defining GC or SGC removes the explicit deallocation passes,
// which seems fair.
//
// This is adapted from a benchmark written by John Ellis and Pete Kovac
// of Post Communications.
// It was modified by Hans Boehm of Silicon Graphics.
// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ.
// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs.
// Adapted to run NTHREADS client threads concurrently. Each
// thread executes the original benchmark. 12 June 2000 by Hans Boehm.
// Changed heap expansion rule, and made the number of threads run-time
// configurable. 25 Oct 2000 by Hans Boehm.
//
// This is no substitute for real applications. No actual application
// is likely to behave in exactly this way. However, this benchmark was
// designed to be more representative of real applications than other
// Java GC benchmarks of which we were aware at the time.
// It still doesn't seem too bad for something this small.
// It attempts to model those properties of allocation requests that
// are important to current GC techniques.
// It is designed to be used either to obtain a single overall performance
// number, or to give a more detailed estimate of how collector
// performance varies with object lifetimes. It prints the time
// required to allocate and collect balanced binary trees of various
// sizes. Smaller trees result in shorter object lifetimes. Each cycle
// allocates roughly the same amount of memory.
// Two data structures are kept around during the entire process, so
// that the measured performance is representative of applications
// that maintain some live in-memory data. One of these is a tree
// containing many pointers. The other is a large array containing
// double precision floating point numbers. Both should be of comparable
// size.
//
// The results are only really meaningful together with a specification
// of how much memory was used. This versions of the benchmark tries
// to preallocate a sufficiently large heap that expansion should not be
// needed.
//
// Unlike the original Ellis and Kovac benchmark, we do not attempt
// measure pause times. This facility should eventually be added back
// in. There are several reasons for omitting it for now. The original
// implementation depended on assumptions about the thread scheduler
// that don't hold uniformly. The results really measure both the
// scheduler and GC. Pause time measurements tend to not fit well with
// current benchmark suites. As far as we know, none of the current
// commercial Java implementations seriously attempt to minimize GC pause
// times.
//
// Since this benchmark has recently been more widely used, some
// anomalous behavious has been uncovered. The user should be aware
// of this:
// 1) Nearly all objects are of the same size. This benchmark is
// not useful for analyzing fragmentation behavior. It is unclear
// whether this is an issue for well-designed allocators.
// 2) Unless HOLES is defined, it tends to drop consecutively allocated
// memory at the same time. Many real applications do exhibit this
// phenomenon, but probably not to this extent. (Defining HOLES tends
// to move the benchmark to the opposite extreme.)
// 3) It appears harder to predict object lifetimes than for most real
// Java programs (see T. Harris, "Dynamic adptive pre-tenuring",
// ISMM '00).
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <pthread.h>
#ifdef GC
# ifndef LINUX_THREADS
# define LINUX_THREADS
# endif
# ifndef _REENTRANT
# define _REENTRANT
# endif
# ifdef LOCAL
# define GC_REDIRECT_TO_LOCAL
# include "gc_local_alloc.h"
# endif
# include "gc.h"
#endif
#ifdef SGC
# include "sgc.h"
# define GC
# define pthread_create GC_pthread_create
# define pthread_join GC_pthread_join
#endif
#define MAX_NTHREADS 1024
int nthreads = 0;
#ifdef PROFIL
extern void init_profiling();
extern dump_profile();
#endif
// These macros were a quick hack for the Macintosh.
//
// #define currentTime() clock()
// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC)
#define currentTime() stats_rtclock()
#define elapsedTime(x) (x)
/* Get the current time in milliseconds */
unsigned
stats_rtclock( void )
{
struct timeval t;
struct timezone tz;
if (gettimeofday( &t, &tz ) == -1)
return 0;
return (t.tv_sec * 1000 + t.tv_usec / 1000);
}
static const int kStretchTreeDepth = 18; // about 16Mb
static const int kLongLivedTreeDepth = 16; // about 4Mb
static const int kArraySize = 500000; // about 4Mb
static const int kMinTreeDepth = 4;
static const int kMaxTreeDepth = 16;
typedef struct Node0_struct {
struct Node0_struct * left;
struct Node0_struct * right;
int i, j;
} Node0;
#ifdef HOLES
# define HOLE() GC_NEW(Node0);
#else
# define HOLE()
#endif
typedef Node0 *Node;
void init_Node(Node me, Node l, Node r) {
me -> left = l;
me -> right = r;
}
#ifndef GC
void destroy_Node(Node me) {
if (me -> left) {
destroy_Node(me -> left);
}
if (me -> right) {
destroy_Node(me -> right);
}
free(me);
}
#endif
// Nodes used by a tree of a given size
static int TreeSize(int i) {
return ((1 << (i + 1)) - 1);
}
// Number of iterations to use for a given tree depth
static int NumIters(int i) {
return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i);
}
// Build tree top down, assigning to older objects.
static void Populate(int iDepth, Node thisNode) {
if (iDepth<=0) {
return;
} else {
iDepth--;
# ifdef GC
thisNode->left = GC_NEW(Node0); HOLE();
thisNode->right = GC_NEW(Node0); HOLE();
# else
thisNode->left = calloc(1, sizeof(Node0));
thisNode->right = calloc(1, sizeof(Node0));
# endif
Populate (iDepth, thisNode->left);
Populate (iDepth, thisNode->right);
}
}
// Build tree bottom-up
static Node MakeTree(int iDepth) {
Node result;
if (iDepth<=0) {
# ifndef GC
result = calloc(1, sizeof(Node0));
# else
result = GC_NEW(Node0); HOLE();
# endif
/* result is implicitly initialized in both cases. */
return result;
} else {
Node left = MakeTree(iDepth-1);
Node right = MakeTree(iDepth-1);
# ifndef GC
result = malloc(sizeof(Node0));
# else
result = GC_NEW(Node0); HOLE();
# endif
init_Node(result, left, right);
return result;
}
}
static void PrintDiagnostics() {
#if 0
long lFreeMemory = Runtime.getRuntime().freeMemory();
long lTotalMemory = Runtime.getRuntime().totalMemory();
System.out.print(" Total memory available="
+ lTotalMemory + " bytes");
System.out.println(" Free memory=" + lFreeMemory + " bytes");
#endif
}
static void TimeConstruction(int depth) {
long tStart, tFinish;
int iNumIters = NumIters(depth);
Node tempTree;
int i;
printf("0x%x: Creating %d trees of depth %d\n", pthread_self(), iNumIters, depth);
tStart = currentTime();
for (i = 0; i < iNumIters; ++i) {
# ifndef GC
tempTree = calloc(1, sizeof(Node0));
# else
tempTree = GC_NEW(Node0);
# endif
Populate(depth, tempTree);
# ifndef GC
destroy_Node(tempTree);
# endif
tempTree = 0;
}
tFinish = currentTime();
printf("\t0x%x: Top down construction took %d msec\n",
pthread_self(), elapsedTime(tFinish - tStart));
tStart = currentTime();
for (i = 0; i < iNumIters; ++i) {
tempTree = MakeTree(depth);
# ifndef GC
destroy_Node(tempTree);
# endif
tempTree = 0;
}
tFinish = currentTime();
printf("\t0x%x: Bottom up construction took %d msec\n",
pthread_self(), elapsedTime(tFinish - tStart));
}
void * run_one_test(void * arg) {
int d, i;
Node longLivedTree;
double *array;
/* size_t initial_bytes = GC_get_total_bytes(); */
// Create a long lived object
printf(" Creating a long-lived binary tree of depth %d\n",
kLongLivedTreeDepth);
# ifndef GC
longLivedTree = calloc(1, sizeof(Node0));
# else
longLivedTree = GC_NEW(Node0);
# endif
Populate(kLongLivedTreeDepth, longLivedTree);
// Create long-lived array, filling half of it
printf(" Creating a long-lived array of %d doubles\n", kArraySize);
# ifndef GC
array = malloc(kArraySize * sizeof(double));
# else
# ifndef NO_PTRFREE
array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize);
# else
array = GC_MALLOC(sizeof(double) * kArraySize);
# endif
# endif
for (i = 0; i < kArraySize/2; ++i) {
array[i] = 1.0/i;
}
for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) {
TimeConstruction(d);
}
/* printf("Allocated %ld bytes before start, %ld after\n",
initial_bytes, GC_get_total_bytes() - initial_bytes); */
if (longLivedTree->left -> right == 0 || array[1000] != 1.0/1000)
fprintf(stderr, "Failed\n");
// fake reference to LongLivedTree
// and array
// to keep them from being optimized away
}
int main(int argc, char **argv) {
Node root;
Node tempTree[MAX_NTHREADS];
long tStart, tFinish;
long tElapsed;
int i;
# ifdef SGC
SGC_attr_t attr;
# endif
if (1 == argc) {
nthreads = 1;
} else if (2 == argc) {
nthreads = atoi(argv[1]);
if (nthreads < 1 || nthreads > MAX_NTHREADS) {
fprintf(stderr, "Invalid # of threads argument\n");
exit(1);
}
} else {
fprintf(stderr, "Usage: %s [# of threads]\n");
exit(1);
}
# if defined(SGC)
/* The University of Tokyo collector needs explicit */
/* initialization. */
SGC_attr_init(&attr);
SGC_init(nthreads, &attr);
# endif
#ifdef GC
// GC_full_freq = 30;
// GC_free_space_divisor = 16;
// GC_enable_incremental();
#endif
printf("Garbage Collector Test\n");
printf(" Live storage will peak at %d bytes or less .\n\n",
2 * sizeof(Node0) * nthreads
* (TreeSize(kLongLivedTreeDepth) + TreeSize(kMaxTreeDepth))
+ sizeof(double) * kArraySize);
PrintDiagnostics();
# ifdef GC
/* GC_expand_hp fails with empty heap */
GC_malloc(1);
GC_expand_hp(32*1024*1024*nthreads);
# endif
# ifdef PROFIL
init_profiling();
# endif
tStart = currentTime();
{
pthread_t thread[MAX_NTHREADS];
for (i = 1; i < nthreads; ++i) {
int code;
if ((code = pthread_create(thread+i, 0, run_one_test, 0)) != 0) {
fprintf(stderr, "Thread creation failed %u\n", code);
exit(1);
}
}
/* We use the main thread to run one test. This allows */
/* profiling to work, for example. */
run_one_test(0);
for (i = 1; i < nthreads; ++i) {
int code;
if ((code = pthread_join(thread[i], 0)) != 0) {
fprintf(stderr, "Thread join failed %u\n", code);
}
}
}
PrintDiagnostics();
tFinish = currentTime();
tElapsed = elapsedTime(tFinish-tStart);
PrintDiagnostics();
printf("Completed in %d msec\n", tElapsed);
# ifdef GC
printf("Completed %d collections\n", GC_gc_no);
printf("Heap size is %d\n", GC_get_heap_size());
# endif
# ifdef PROFIL
dump_profile();
# endif
return 0;
}