#include #include #include #include #include #include #include #include #include "cleanbench.h" #include "randnum.h" /*********************** ** LU DECOMPOSITION ** ** (Linear Equations) ** ************************ ** These routines come from "Numerical Recipes in Pascal". ** Note that, as in the assignment algorithm, though we ** separately define LUARRAYROWS and LUARRAYCOLS, the two ** must be the same value (this routine depends on a square ** matrix). */ /* ** ARRAY_MAX ** ** This sets the upper limit on the number of arrays ** that the benchmark will attempt to build before ** flagging an error. It is not a critical constant, and ** may be increased if your system has the horsepower. */ #define ARRAY_MAX 10000 #define LUARRAYROWS 101L #define LUARRAYCOLS 101L typedef struct { union { double *p; double (*ap)[][LUARRAYCOLS]; } ptrs; } LUdblptr; double *LUtempvv; /* FIXME: really? */ static clock_t DoLUIteration(double *a, double *b, double *abase, double *bbase, unsigned long num_arrays); static void build_problem( double a[][LUARRAYCOLS], int n, double b[LUARRAYROWS]); static int ludcmp(double a[][LUARRAYCOLS], int n, int indx[], int *d); static void lubksb(double a[][LUARRAYCOLS], int n, int indx[LUARRAYROWS], double b[LUARRAYROWS]); static int lusolve(double a[][LUARRAYCOLS], int n, double b[LUARRAYROWS]); /********* ** DoLU ** ********** ** Perform the LU decomposition benchmark. */ double DoLU(void) { const char* context = "FPU:LU"; clock_t total_time = 0; int iterations = 0; double* a = NULL; double* b = NULL; double* abase = NULL; double* bbase = NULL; LUdblptr ptra; int n; int i; static int num_arrays = 0; static bool is_adjusted = false; /* ** Our first step is to build a "solvable" problem. This ** will become the "seed" set that all others will be ** derived from. (I.E., we'll simply copy these arrays ** into the others. */ a = malloc(sizeof(double) * LUARRAYCOLS * LUARRAYROWS); b = malloc(sizeof(double) * LUARRAYROWS); n=LUARRAYROWS; /* ** We need to allocate a temp vector that is used by the LU ** algorithm. This removes the allocation routine from the ** timing. */ LUtempvv = malloc(sizeof(double)*LUARRAYROWS); /* ** Build a problem to be solved. */ ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */ build_problem(*ptra.ptrs.ap,n,b); /* ** Now that we have a problem built, see if we need to do ** auto-adjust. If so, repeatedly call the DoLUIteration routine, ** increasing the number of solutions per iteration as you go. */ if (is_adjusted == false) { is_adjusted = true; for(i=1;i<=ARRAY_MAX;i++) { abase = realloc(abase, sizeof(double) * LUARRAYCOLS*LUARRAYROWS*(i+1)); if (!abase) { fprintf(stderr, "Error in %s, could not allocate memory. Exitting...\n", context); free(a); free(b); free(LUtempvv); exit(1); } bbase = realloc(bbase, sizeof(double) * LUARRAYROWS*(i+1)); if (!bbase) { fprintf(stderr, "Error in %s, could not allocate memory. Exitting...\n", context); free(a); free(b); free(abase); free(LUtempvv); exit(1); } if(DoLUIteration(a,b,abase,bbase,i)>MINIMUM_TICKS) { num_arrays=i; break; } } /* ** Were we able to do it? */ if (num_arrays==0) { fprintf(stderr, "FPU:LU -- Array limit reached\n"); free(a); free(b); free(abase); free(bbase); free(LUtempvv); exit(1); } } else { /* ** Don't need to adjust -- just allocate the proper ** number of arrays and proceed. */ abase = malloc(sizeof(double) * LUARRAYCOLS*LUARRAYROWS*num_arrays); if (!abase) { fprintf(stderr, "Error in %s, could not allocate memory. Exitting...\n", context); free(a); free(b); free(LUtempvv); exit(1); } bbase = malloc(sizeof(double) * LUARRAYROWS*num_arrays); if (!bbase) { fprintf(stderr, "Error in %s, could not allocate memory. Exitting...\n", context); free(a); free(b); free(abase); free(LUtempvv); exit(1); } } do { total_time += DoLUIteration(a, b, abase, bbase, num_arrays); iterations += num_arrays; } while(total_time < MINIMUM_SECONDS * CLOCKS_PER_SEC); free(a); free(b); free(abase); free(bbase); free(LUtempvv); return (double)(iterations * CLOCKS_PER_SEC) / (double)total_time; } /****************** ** DoLUIteration ** ******************* ** Perform an iteration of the LU decomposition benchmark. ** An iteration refers to the repeated solution of several ** identical matrices. */ static clock_t DoLUIteration(double *a,double *b, double *abase, double *bbase, unsigned long num_arrays) { clock_t start, stop; double *locabase; double *locbbase; LUdblptr ptra; /* For converting ptr to 2D array */ unsigned long j,i; /* Indexes */ /* ** Move the seed arrays (a & b) into the destination ** arrays; */ for(j=0;j big) big=fabs(a[i][j]); /* Bail out on singular matrix */ if(big==(double)0.0) return(0); LUtempvv[i]=1.0/big; } /* ** Crout's algorithm...loop over columns. */ for(j=0;j=big) { big=dum; imax=i; } } if(j!=imax) /* Interchange rows if necessary */ { for(k=0;k=0;i--) { sum=b[i]; if(i!=(n-1)) for(j=(i+1);j