commit bdf4b27733dd5a7c96e4d0b7a133a92cfb9f9d13 Author: Andy Wingo Date: Mon Feb 21 21:05:18 2022 +0100 Initial commit diff --git a/GCBench.c b/GCBench.c new file mode 100644 index 000000000..c9e77d191 --- /dev/null +++ b/GCBench.c @@ -0,0 +1,296 @@ +// This is adapted from a benchmark written by John Ellis and Pete Kovac +// of Post Communications. +// It was modified by Hans Boehm of Silicon Graphics. +// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ. +// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs. +// +// This is no substitute for real applications. No actual application +// is likely to behave in exactly this way. However, this benchmark was +// designed to be more representative of real applications than other +// Java GC benchmarks of which we are aware. +// It attempts to model those properties of allocation requests that +// are important to current GC techniques. +// It is designed to be used either to obtain a single overall performance +// number, or to give a more detailed estimate of how collector +// performance varies with object lifetimes. It prints the time +// required to allocate and collect balanced binary trees of various +// sizes. Smaller trees result in shorter object lifetimes. Each cycle +// allocates roughly the same amount of memory. +// Two data structures are kept around during the entire process, so +// that the measured performance is representative of applications +// that maintain some live in-memory data. One of these is a tree +// containing many pointers. The other is a large array containing +// double precision floating point numbers. Both should be of comparable +// size. +// +// The results are only really meaningful together with a specification +// of how much memory was used. It is possible to trade memory for +// better time performance. This benchmark should be run in a 32 MB +// heap, though we don't currently know how to enforce that uniformly. +// +// Unlike the original Ellis and Kovac benchmark, we do not attempt +// measure pause times. This facility should eventually be added back +// in. There are several reasons for omitting it for now. The original +// implementation depended on assumptions about the thread scheduler +// that don't hold uniformly. The results really measure both the +// scheduler and GC. Pause time measurements tend to not fit well with +// current benchmark suites. As far as we know, none of the current +// commercial Java implementations seriously attempt to minimize GC pause +// times. + +#include +#include +#include + +#ifdef GC +# include "gc.h" +#endif + +#ifdef PROFIL + extern void init_profiling(); + extern dump_profile(); +#endif + +// These macros were a quick hack for the Macintosh. +// +// #define currentTime() clock() +// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC) + +#define currentTime() stats_rtclock() +#define elapsedTime(x) (x) + +/* Get the current time in milliseconds */ + +unsigned +stats_rtclock( void ) +{ + struct timeval t; + struct timezone tz; + + if (gettimeofday( &t, &tz ) == -1) + return 0; + return (t.tv_sec * 1000 + t.tv_usec / 1000); +} + +static const int kStretchTreeDepth = 18; // about 16Mb +static const int kLongLivedTreeDepth = 16; // about 4Mb +static const int kArraySize = 500000; // about 4Mb +static const int kMinTreeDepth = 4; +static const int kMaxTreeDepth = 16; + +typedef struct Node0_struct { + struct Node0_struct * left; + struct Node0_struct * right; + int i, j; +} Node0; + +#ifdef HOLES +# define HOLE() GC_NEW(Node0); +#else +# define HOLE() +#endif + +typedef Node0 *Node; + +void init_Node(Node me, Node l, Node r) { + me -> left = l; + me -> right = r; +} + +#ifndef GC + void destroy_Node(Node me) { + if (me -> left) { + destroy_Node(me -> left); + } + if (me -> right) { + destroy_Node(me -> right); + } + free(me); + } +#endif + +// Nodes used by a tree of a given size +static int TreeSize(int i) { + return ((1 << (i + 1)) - 1); +} + +// Number of iterations to use for a given tree depth +static int NumIters(int i) { + return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i); +} + +// Build tree top down, assigning to older objects. +static void Populate(int iDepth, Node thisNode) { + if (iDepth<=0) { + return; + } else { + iDepth--; +# ifdef GC + thisNode->left = GC_NEW(Node0); HOLE(); + thisNode->right = GC_NEW(Node0); HOLE(); +# else + thisNode->left = calloc(1, sizeof(Node0)); + thisNode->right = calloc(1, sizeof(Node0)); +# endif + Populate (iDepth, thisNode->left); + Populate (iDepth, thisNode->right); + } +} + +// Build tree bottom-up +static Node MakeTree(int iDepth) { + Node result; + if (iDepth<=0) { +# ifndef GC + result = calloc(1, sizeof(Node0)); +# else + result = GC_NEW(Node0); HOLE(); +# endif + /* result is implicitly initialized in both cases. */ + return result; + } else { + Node left = MakeTree(iDepth-1); + Node right = MakeTree(iDepth-1); +# ifndef GC + result = malloc(sizeof(Node0)); +# else + result = GC_NEW(Node0); HOLE(); +# endif + init_Node(result, left, right); + return result; + } +} + +static void PrintDiagnostics() { +#if 0 + long lFreeMemory = Runtime.getRuntime().freeMemory(); + long lTotalMemory = Runtime.getRuntime().totalMemory(); + + System.out.print(" Total memory available=" + + lTotalMemory + " bytes"); + System.out.println(" Free memory=" + lFreeMemory + " bytes"); +#endif +} + +static void TimeConstruction(int depth) { + long tStart, tFinish; + int iNumIters = NumIters(depth); + Node tempTree; + int i; + + printf("Creating %d trees of depth %d\n", iNumIters, depth); + + tStart = currentTime(); + for (i = 0; i < iNumIters; ++i) { +# ifndef GC + tempTree = calloc(1, sizeof(Node0)); +# else + tempTree = GC_NEW(Node0); +# endif + Populate(depth, tempTree); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + } + tFinish = currentTime(); + printf("\tTop down construction took %d msec\n", + elapsedTime(tFinish - tStart)); + + tStart = currentTime(); + for (i = 0; i < iNumIters; ++i) { + tempTree = MakeTree(depth); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + } + tFinish = currentTime(); + printf("\tBottom up construction took %d msec\n", + elapsedTime(tFinish - tStart)); + +} + +int main() { + Node root; + Node longLivedTree; + Node tempTree; + long tStart, tFinish; + long tElapsed; + int i, d; + double *array; + +#ifdef GC + // GC_full_freq = 30; + // GC_free_space_divisor = 16; + // GC_enable_incremental(); +#endif + printf("Garbage Collector Test\n"); + printf(" Live storage will peak at %d bytes.\n\n", + 2 * sizeof(Node0) * TreeSize(kLongLivedTreeDepth) + + sizeof(double) * kArraySize); + printf(" Stretching memory with a binary tree of depth %d\n", + kStretchTreeDepth); + PrintDiagnostics(); +# ifdef PROFIL + init_profiling(); +# endif + + tStart = currentTime(); + + // Stretch the memory space quickly + tempTree = MakeTree(kStretchTreeDepth); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + + // Create a long lived object + printf(" Creating a long-lived binary tree of depth %d\n", + kLongLivedTreeDepth); +# ifndef GC + longLivedTree = calloc(1, sizeof(Node0)); +# else + longLivedTree = GC_NEW(Node0); +# endif + Populate(kLongLivedTreeDepth, longLivedTree); + + // Create long-lived array, filling half of it + printf(" Creating a long-lived array of %d doubles\n", kArraySize); +# ifndef GC + array = malloc(kArraySize * sizeof(double)); +# else +# ifndef NO_PTRFREE + array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize); +# else + array = GC_MALLOC(sizeof(double) * kArraySize); +# endif +# endif + for (i = 0; i < kArraySize/2; ++i) { + array[i] = 1.0/i; + } + PrintDiagnostics(); + + for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) { + TimeConstruction(d); + } + + if (longLivedTree == 0 || array[1000] != 1.0/1000) + fprintf(stderr, "Failed\n"); + // fake reference to LongLivedTree + // and array + // to keep them from being optimized away + + tFinish = currentTime(); + tElapsed = elapsedTime(tFinish-tStart); + PrintDiagnostics(); + printf("Completed in %d msec\n", tElapsed); +# ifdef GC + printf("Completed %d collections\n", GC_gc_no); + printf("Heap size is %d\n", GC_get_heap_size()); +# endif +# ifdef PROFIL + dump_profile(); +# endif +} + diff --git a/MT_GCBench.c b/MT_GCBench.c new file mode 100644 index 000000000..ba3a594f9 --- /dev/null +++ b/MT_GCBench.c @@ -0,0 +1,341 @@ +// This is adapted from a benchmark written by John Ellis and Pete Kovac +// of Post Communications. +// It was modified by Hans Boehm of Silicon Graphics. +// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ. +// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs. +// Adapted to run NTHREADS client threads concurrently. Each +// thread executes the original benchmark. 12 June 2000 by Hans Boehm. +// +// This is no substitute for real applications. No actual application +// is likely to behave in exactly this way. However, this benchmark was +// designed to be more representative of real applications than other +// Java GC benchmarks of which we are aware. +// It attempts to model those properties of allocation requests that +// are important to current GC techniques. +// It is designed to be used either to obtain a single overall performance +// number, or to give a more detailed estimate of how collector +// performance varies with object lifetimes. It prints the time +// required to allocate and collect balanced binary trees of various +// sizes. Smaller trees result in shorter object lifetimes. Each cycle +// allocates roughly the same amount of memory. +// Two data structures are kept around during the entire process, so +// that the measured performance is representative of applications +// that maintain some live in-memory data. One of these is a tree +// containing many pointers. The other is a large array containing +// double precision floating point numbers. Both should be of comparable +// size. +// +// The results are only really meaningful together with a specification +// of how much memory was used. It is possible to trade memory for +// better time performance. This benchmark should be run in a 32 MB +// heap, though we don't currently know how to enforce that uniformly. +// +// Unlike the original Ellis and Kovac benchmark, we do not attempt +// measure pause times. This facility should eventually be added back +// in. There are several reasons for omitting it for now. The original +// implementation depended on assumptions about the thread scheduler +// that don't hold uniformly. The results really measure both the +// scheduler and GC. Pause time measurements tend to not fit well with +// current benchmark suites. As far as we know, none of the current +// commercial Java implementations seriously attempt to minimize GC pause +// times. + +#include +#include +#include +#include + +#ifdef GC +# ifndef LINUX_THREADS +# define LINUX_THREADS +# endif +# ifndef _REENTRANT +# define _REENTRANT +# endif +# ifdef LOCAL +# define GC_REDIRECT_TO_LOCAL +# include "gc_local_alloc.h" +# endif +# include "gc.h" +#endif + + +#ifndef NTHREADS +# define NTHREADS 1 +#endif + +#ifdef PROFIL + extern void init_profiling(); + extern dump_profile(); +#endif + +// These macros were a quick hack for the Macintosh. +// +// #define currentTime() clock() +// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC) + +#define currentTime() stats_rtclock() +#define elapsedTime(x) (x) + +/* Get the current time in milliseconds */ + +unsigned +stats_rtclock( void ) +{ + struct timeval t; + struct timezone tz; + + if (gettimeofday( &t, &tz ) == -1) + return 0; + return (t.tv_sec * 1000 + t.tv_usec / 1000); +} + +static const int kStretchTreeDepth = 18; // about 16Mb +static const int kLongLivedTreeDepth = 16; // about 4Mb +static const int kArraySize = 500000; // about 4Mb +static const int kMinTreeDepth = 4; +static const int kMaxTreeDepth = 16; + +typedef struct Node0_struct { + struct Node0_struct * left; + struct Node0_struct * right; + int i, j; +} Node0; + +#ifdef HOLES +# define HOLE() GC_NEW(Node0); +#else +# define HOLE() +#endif + +typedef Node0 *Node; + +void init_Node(Node me, Node l, Node r) { + me -> left = l; + me -> right = r; +} + +#ifndef GC + void destroy_Node(Node me) { + if (me -> left) { + destroy_Node(me -> left); + } + if (me -> right) { + destroy_Node(me -> right); + } + free(me); + } +#endif + +// Nodes used by a tree of a given size +static int TreeSize(int i) { + return ((1 << (i + 1)) - 1); +} + +// Number of iterations to use for a given tree depth +static int NumIters(int i) { + return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i); +} + +// Build tree top down, assigning to older objects. +static void Populate(int iDepth, Node thisNode) { + if (iDepth<=0) { + return; + } else { + iDepth--; +# ifdef GC + thisNode->left = GC_NEW(Node0); HOLE(); + thisNode->right = GC_NEW(Node0); HOLE(); +# else + thisNode->left = calloc(1, sizeof(Node0)); + thisNode->right = calloc(1, sizeof(Node0)); +# endif + Populate (iDepth, thisNode->left); + Populate (iDepth, thisNode->right); + } +} + +// Build tree bottom-up +static Node MakeTree(int iDepth) { + Node result; + if (iDepth<=0) { +# ifndef GC + result = calloc(1, sizeof(Node0)); +# else + result = GC_NEW(Node0); HOLE(); +# endif + /* result is implicitly initialized in both cases. */ + return result; + } else { + Node left = MakeTree(iDepth-1); + Node right = MakeTree(iDepth-1); +# ifndef GC + result = malloc(sizeof(Node0)); +# else + result = GC_NEW(Node0); HOLE(); +# endif + init_Node(result, left, right); + return result; + } +} + +static void PrintDiagnostics() { +#if 0 + long lFreeMemory = Runtime.getRuntime().freeMemory(); + long lTotalMemory = Runtime.getRuntime().totalMemory(); + + System.out.print(" Total memory available=" + + lTotalMemory + " bytes"); + System.out.println(" Free memory=" + lFreeMemory + " bytes"); +#endif +} + +static void TimeConstruction(int depth) { + long tStart, tFinish; + int iNumIters = NumIters(depth); + Node tempTree; + int i; + + printf("0x%x: Creating %d trees of depth %d\n", pthread_self(), iNumIters, depth); + + tStart = currentTime(); + for (i = 0; i < iNumIters; ++i) { +# ifndef GC + tempTree = calloc(1, sizeof(Node0)); +# else + tempTree = GC_NEW(Node0); +# endif + Populate(depth, tempTree); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + } + tFinish = currentTime(); + printf("\t0x%x: Top down construction took %d msec\n", + pthread_self(), elapsedTime(tFinish - tStart)); + + tStart = currentTime(); + for (i = 0; i < iNumIters; ++i) { + tempTree = MakeTree(depth); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + } + tFinish = currentTime(); + printf("\t0x%x: Bottom up construction took %d msec\n", + pthread_self(), elapsedTime(tFinish - tStart)); + +} + +void * run_one_test(void * arg) { + int d; + for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) { + TimeConstruction(d); + } +} + +int main() { + Node root; + Node longLivedTree; + Node tempTree; + long tStart, tFinish; + long tElapsed; + int i; + double *array; + +#ifdef GC + // GC_full_freq = 30; + // GC_free_space_divisor = 16; + // GC_enable_incremental(); +#endif +# if defined(GC) && defined(LOCAL) + GC_thr_init(); +# endif + printf("Garbage Collector Test\n"); + printf(" Live storage will peak at %d bytes.\n\n", + 2 * sizeof(Node0) * TreeSize(kLongLivedTreeDepth) + + sizeof(double) * kArraySize); + printf(" Stretching memory with a binary tree of depth %d\n", + kStretchTreeDepth); + PrintDiagnostics(); +# ifdef PROFIL + init_profiling(); +# endif + + tStart = currentTime(); + + // Stretch the memory space quickly + tempTree = MakeTree(kStretchTreeDepth); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + + // Create a long lived object + printf(" Creating a long-lived binary tree of depth %d\n", + kLongLivedTreeDepth); +# ifndef GC + longLivedTree = calloc(1, sizeof(Node0)); +# else + longLivedTree = GC_NEW(Node0); +# endif + Populate(kLongLivedTreeDepth, longLivedTree); + + // Create long-lived array, filling half of it + printf(" Creating a long-lived array of %d doubles\n", kArraySize); +# ifndef GC + array = malloc(kArraySize * sizeof(double)); +# else +# ifndef NO_PTRFREE + array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize); +# else + array = GC_MALLOC(sizeof(double) * kArraySize); +# endif +# endif + for (i = 0; i < kArraySize/2; ++i) { + array[i] = 1.0/i; + } + + { + pthread_t thread[NTHREADS]; + for (i = 1; i < NTHREADS; ++i) { + int code; + + if ((code = pthread_create(thread+i, 0, run_one_test, 0)) != 0) { + fprintf(stderr, "Thread creation failed %u\n", code); + exit(1); + } + } + /* We use the main thread to run one test. This allows */ + /* profiling to work, for example. */ + run_one_test(0); + for (i = 1; i < NTHREADS; ++i) { + int code; + if ((code = pthread_join(thread[i], 0)) != 0) { + fprintf(stderr, "Thread join failed %u\n", code); + } + } + } + PrintDiagnostics(); + + if (longLivedTree == 0 || array[1000] != 1.0/1000) + fprintf(stderr, "Failed\n"); + // fake reference to LongLivedTree + // and array + // to keep them from being optimized away + + tFinish = currentTime(); + tElapsed = elapsedTime(tFinish-tStart); + PrintDiagnostics(); + printf("Completed in %d msec\n", tElapsed); +# ifdef GC + printf("Completed %d collections\n", GC_gc_no); + printf("Heap size is %d\n", GC_get_heap_size()); +# endif +# ifdef PROFIL + dump_profile(); +# endif +} + diff --git a/MT_GCBench2.c b/MT_GCBench2.c new file mode 100644 index 000000000..07fe7e3a5 --- /dev/null +++ b/MT_GCBench2.c @@ -0,0 +1,398 @@ +// This is version 2 of the multithreaded GC Bench. +// Heap expansion is handled differently from version 1, in an attempt +// to make scalability measurements more meaningful. The version with +// N threads now immediately expands the heap to N*32MB. +// +// To run this with BDWGC versions 6 and later with thread local allocation, +// define GC and LOCAL. Without thread-local allocation, define just GC. +// To run it with the University of Tokyo scalable GC, +// define SGC. To run it with malloc and explicit deallocation, define +// none of these. (This should also work for Hoard.) +// +// Note that defining GC or SGC removes the explicit deallocation passes, +// which seems fair. +// +// This is adapted from a benchmark written by John Ellis and Pete Kovac +// of Post Communications. +// It was modified by Hans Boehm of Silicon Graphics. +// Translated to C++ 30 May 1997 by William D Clinger of Northeastern Univ. +// Translated to C 15 March 2000 by Hans Boehm, now at HP Labs. +// Adapted to run NTHREADS client threads concurrently. Each +// thread executes the original benchmark. 12 June 2000 by Hans Boehm. +// Changed heap expansion rule, and made the number of threads run-time +// configurable. 25 Oct 2000 by Hans Boehm. +// +// This is no substitute for real applications. No actual application +// is likely to behave in exactly this way. However, this benchmark was +// designed to be more representative of real applications than other +// Java GC benchmarks of which we were aware at the time. +// It still doesn't seem too bad for something this small. +// It attempts to model those properties of allocation requests that +// are important to current GC techniques. +// It is designed to be used either to obtain a single overall performance +// number, or to give a more detailed estimate of how collector +// performance varies with object lifetimes. It prints the time +// required to allocate and collect balanced binary trees of various +// sizes. Smaller trees result in shorter object lifetimes. Each cycle +// allocates roughly the same amount of memory. +// Two data structures are kept around during the entire process, so +// that the measured performance is representative of applications +// that maintain some live in-memory data. One of these is a tree +// containing many pointers. The other is a large array containing +// double precision floating point numbers. Both should be of comparable +// size. +// +// The results are only really meaningful together with a specification +// of how much memory was used. This versions of the benchmark tries +// to preallocate a sufficiently large heap that expansion should not be +// needed. +// +// Unlike the original Ellis and Kovac benchmark, we do not attempt +// measure pause times. This facility should eventually be added back +// in. There are several reasons for omitting it for now. The original +// implementation depended on assumptions about the thread scheduler +// that don't hold uniformly. The results really measure both the +// scheduler and GC. Pause time measurements tend to not fit well with +// current benchmark suites. As far as we know, none of the current +// commercial Java implementations seriously attempt to minimize GC pause +// times. +// +// Since this benchmark has recently been more widely used, some +// anomalous behavious has been uncovered. The user should be aware +// of this: +// 1) Nearly all objects are of the same size. This benchmark is +// not useful for analyzing fragmentation behavior. It is unclear +// whether this is an issue for well-designed allocators. +// 2) Unless HOLES is defined, it tends to drop consecutively allocated +// memory at the same time. Many real applications do exhibit this +// phenomenon, but probably not to this extent. (Defining HOLES tends +// to move the benchmark to the opposite extreme.) +// 3) It appears harder to predict object lifetimes than for most real +// Java programs (see T. Harris, "Dynamic adptive pre-tenuring", +// ISMM '00). + +#include +#include +#include +#include + +#ifdef GC +# ifndef LINUX_THREADS +# define LINUX_THREADS +# endif +# ifndef _REENTRANT +# define _REENTRANT +# endif +# ifdef LOCAL +# define GC_REDIRECT_TO_LOCAL +# include "gc_local_alloc.h" +# endif +# include "gc.h" +#endif +#ifdef SGC +# include "sgc.h" +# define GC +# define pthread_create GC_pthread_create +# define pthread_join GC_pthread_join +#endif + +#define MAX_NTHREADS 1024 + +int nthreads = 0; + +#ifdef PROFIL + extern void init_profiling(); + extern dump_profile(); +#endif + +// These macros were a quick hack for the Macintosh. +// +// #define currentTime() clock() +// #define elapsedTime(x) ((1000*(x))/CLOCKS_PER_SEC) + +#define currentTime() stats_rtclock() +#define elapsedTime(x) (x) + +/* Get the current time in milliseconds */ + +unsigned +stats_rtclock( void ) +{ + struct timeval t; + struct timezone tz; + + if (gettimeofday( &t, &tz ) == -1) + return 0; + return (t.tv_sec * 1000 + t.tv_usec / 1000); +} + +static const int kStretchTreeDepth = 18; // about 16Mb +static const int kLongLivedTreeDepth = 16; // about 4Mb +static const int kArraySize = 500000; // about 4Mb +static const int kMinTreeDepth = 4; +static const int kMaxTreeDepth = 16; + +typedef struct Node0_struct { + struct Node0_struct * left; + struct Node0_struct * right; + int i, j; +} Node0; + +#ifdef HOLES +# define HOLE() GC_NEW(Node0); +#else +# define HOLE() +#endif + +typedef Node0 *Node; + +void init_Node(Node me, Node l, Node r) { + me -> left = l; + me -> right = r; +} + +#ifndef GC + void destroy_Node(Node me) { + if (me -> left) { + destroy_Node(me -> left); + } + if (me -> right) { + destroy_Node(me -> right); + } + free(me); + } +#endif + +// Nodes used by a tree of a given size +static int TreeSize(int i) { + return ((1 << (i + 1)) - 1); +} + +// Number of iterations to use for a given tree depth +static int NumIters(int i) { + return 2 * TreeSize(kStretchTreeDepth) / TreeSize(i); +} + +// Build tree top down, assigning to older objects. +static void Populate(int iDepth, Node thisNode) { + if (iDepth<=0) { + return; + } else { + iDepth--; +# ifdef GC + thisNode->left = GC_NEW(Node0); HOLE(); + thisNode->right = GC_NEW(Node0); HOLE(); +# else + thisNode->left = calloc(1, sizeof(Node0)); + thisNode->right = calloc(1, sizeof(Node0)); +# endif + Populate (iDepth, thisNode->left); + Populate (iDepth, thisNode->right); + } +} + +// Build tree bottom-up +static Node MakeTree(int iDepth) { + Node result; + if (iDepth<=0) { +# ifndef GC + result = calloc(1, sizeof(Node0)); +# else + result = GC_NEW(Node0); HOLE(); +# endif + /* result is implicitly initialized in both cases. */ + return result; + } else { + Node left = MakeTree(iDepth-1); + Node right = MakeTree(iDepth-1); +# ifndef GC + result = malloc(sizeof(Node0)); +# else + result = GC_NEW(Node0); HOLE(); +# endif + init_Node(result, left, right); + return result; + } +} + +static void PrintDiagnostics() { +#if 0 + long lFreeMemory = Runtime.getRuntime().freeMemory(); + long lTotalMemory = Runtime.getRuntime().totalMemory(); + + System.out.print(" Total memory available=" + + lTotalMemory + " bytes"); + System.out.println(" Free memory=" + lFreeMemory + " bytes"); +#endif +} + +static void TimeConstruction(int depth) { + long tStart, tFinish; + int iNumIters = NumIters(depth); + Node tempTree; + int i; + + printf("0x%x: Creating %d trees of depth %d\n", pthread_self(), iNumIters, depth); + + tStart = currentTime(); + for (i = 0; i < iNumIters; ++i) { +# ifndef GC + tempTree = calloc(1, sizeof(Node0)); +# else + tempTree = GC_NEW(Node0); +# endif + Populate(depth, tempTree); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + } + tFinish = currentTime(); + printf("\t0x%x: Top down construction took %d msec\n", + pthread_self(), elapsedTime(tFinish - tStart)); + + tStart = currentTime(); + for (i = 0; i < iNumIters; ++i) { + tempTree = MakeTree(depth); +# ifndef GC + destroy_Node(tempTree); +# endif + tempTree = 0; + } + tFinish = currentTime(); + printf("\t0x%x: Bottom up construction took %d msec\n", + pthread_self(), elapsedTime(tFinish - tStart)); + +} + +void * run_one_test(void * arg) { + int d, i; + Node longLivedTree; + double *array; + /* size_t initial_bytes = GC_get_total_bytes(); */ + + // Create a long lived object + printf(" Creating a long-lived binary tree of depth %d\n", + kLongLivedTreeDepth); +# ifndef GC + longLivedTree = calloc(1, sizeof(Node0)); +# else + longLivedTree = GC_NEW(Node0); +# endif + Populate(kLongLivedTreeDepth, longLivedTree); + + // Create long-lived array, filling half of it + printf(" Creating a long-lived array of %d doubles\n", kArraySize); +# ifndef GC + array = malloc(kArraySize * sizeof(double)); +# else +# ifndef NO_PTRFREE + array = GC_MALLOC_ATOMIC(sizeof(double) * kArraySize); +# else + array = GC_MALLOC(sizeof(double) * kArraySize); +# endif +# endif + for (i = 0; i < kArraySize/2; ++i) { + array[i] = 1.0/i; + } + + for (d = kMinTreeDepth; d <= kMaxTreeDepth; d += 2) { + TimeConstruction(d); + } + /* printf("Allocated %ld bytes before start, %ld after\n", + initial_bytes, GC_get_total_bytes() - initial_bytes); */ + if (longLivedTree->left -> right == 0 || array[1000] != 1.0/1000) + fprintf(stderr, "Failed\n"); + // fake reference to LongLivedTree + // and array + // to keep them from being optimized away + +} + +int main(int argc, char **argv) { + Node root; + Node tempTree[MAX_NTHREADS]; + long tStart, tFinish; + long tElapsed; + int i; +# ifdef SGC + SGC_attr_t attr; +# endif + + if (1 == argc) { + nthreads = 1; + } else if (2 == argc) { + nthreads = atoi(argv[1]); + if (nthreads < 1 || nthreads > MAX_NTHREADS) { + fprintf(stderr, "Invalid # of threads argument\n"); + exit(1); + } + } else { + fprintf(stderr, "Usage: %s [# of threads]\n"); + exit(1); + } +# if defined(SGC) + /* The University of Tokyo collector needs explicit */ + /* initialization. */ + SGC_attr_init(&attr); + SGC_init(nthreads, &attr); +# endif +#ifdef GC + // GC_full_freq = 30; + // GC_free_space_divisor = 16; + // GC_enable_incremental(); +#endif + printf("Garbage Collector Test\n"); + printf(" Live storage will peak at %d bytes or less .\n\n", + 2 * sizeof(Node0) * nthreads + * (TreeSize(kLongLivedTreeDepth) + TreeSize(kMaxTreeDepth)) + + sizeof(double) * kArraySize); + PrintDiagnostics(); + +# ifdef GC + /* GC_expand_hp fails with empty heap */ + GC_malloc(1); + GC_expand_hp(32*1024*1024*nthreads); +# endif + +# ifdef PROFIL + init_profiling(); +# endif + + tStart = currentTime(); + { + pthread_t thread[MAX_NTHREADS]; + for (i = 1; i < nthreads; ++i) { + int code; + + if ((code = pthread_create(thread+i, 0, run_one_test, 0)) != 0) { + fprintf(stderr, "Thread creation failed %u\n", code); + exit(1); + } + } + /* We use the main thread to run one test. This allows */ + /* profiling to work, for example. */ + run_one_test(0); + for (i = 1; i < nthreads; ++i) { + int code; + if ((code = pthread_join(thread[i], 0)) != 0) { + fprintf(stderr, "Thread join failed %u\n", code); + } + } + } + PrintDiagnostics(); + + tFinish = currentTime(); + tElapsed = elapsedTime(tFinish-tStart); + PrintDiagnostics(); + printf("Completed in %d msec\n", tElapsed); +# ifdef GC + printf("Completed %d collections\n", GC_gc_no); + printf("Heap size is %d\n", GC_get_heap_size()); +# endif +# ifdef PROFIL + dump_profile(); +# endif + return 0; +} + diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..2d7c92c9c --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ +TESTS=GCBench MT_GCBench MT_GCBench2 +COLLECTORS=bdw + +CC=gcc +CFLAGS=-Wall -O2 -g + +ALL_TESTS=$(foreach COLLECTOR,$(COLLECTORS),$(addprefix $(COLLECTOR)-,$(TESTS))) + +all: $(ALL_TESTS) + +bdw-%: bdw.h %.c + $(CC) $(CFLAGS) -lpthread `pkg-config --libs --cflags bdw-gc` -I. -o $@ $*.c bdw.h + +check: $(addprefix test-$(TARGET),$(TARGETS)) + +test-%: $(ALL_TESTS) + @echo "Running unit tests..." + @set -e; for test in $?; do \ + echo "Testing: $$test"; \ + ./$$test; \ + done + @echo "Success." + +.PHONY: check + +.PRECIOUS: $(ALL_TESTS) + +clean: + rm -f $(ALL_TESTS) diff --git a/bdw.h b/bdw.h new file mode 100644 index 000000000..28932aea8 --- /dev/null +++ b/bdw.h @@ -0,0 +1,13 @@ +// When pthreads are used, let `libgc' know about it and redirect +// allocation calls such as `GC_MALLOC ()' to (contention-free, faster) +// thread-local allocation. + +#define GC_THREADS 1 +#define GC_REDIRECT_TO_LOCAL 1 + +// Don't #define pthread routines to their GC_pthread counterparts. +// Instead we will be careful inside the benchmarks to use API to +// register threads with libgc. +#define GC_NO_THREAD_REDIRECTS 1 + +#include